diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index f4d17b3596..bb4e2d24c9 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,4 @@ -blank_issues_enabled: false +blank_issues_enabled: true contact_links: - name: Prometheus Community Support url: https://prometheus.io/community/ diff --git a/.github/workflows/buf-lint.yml b/.github/workflows/buf-lint.yml index 3f6cf76e16..d03f190769 100644 --- a/.github/workflows/buf-lint.yml +++ b/.github/workflows/buf-lint.yml @@ -12,8 +12,8 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: bufbuild/buf-setup-action@9672cee01808979ea1249f81d6d321217b9a10f6 # v1.47.2 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 diff --git a/.github/workflows/buf.yml b/.github/workflows/buf.yml index 632d38cb00..bf8ae3f6a4 100644 --- a/.github/workflows/buf.yml +++ b/.github/workflows/buf.yml @@ -12,8 +12,8 @@ jobs: runs-on: ubuntu-latest if: github.repository_owner == 'prometheus' steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: bufbuild/buf-setup-action@9672cee01808979ea1249f81d6d321217b9a10f6 # v1.47.2 with: github_token: ${{ secrets.GITHUB_TOKEN }} - uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5934a3dafe..f7703d940d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,11 +11,13 @@ jobs: container: # Whenever the Go version is updated here, .promu.yml # should also be updated. - image: quay.io/prometheus/golang-builder:1.22-base + image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/setup_environment + with: + enable_npm: true - run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 - run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false - run: make -C documentation/examples/remote_storage @@ -25,10 +27,10 @@ jobs: name: More Go tests runs-on: ubuntu-latest container: - image: quay.io/prometheus/golang-builder:1.22-base + image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - run: GOARCH=386 go test ./cmd/prometheus @@ -39,11 +41,14 @@ jobs: test_go_oldest: name: Go tests with previous Go version runs-on: ubuntu-latest + env: + # Enforce the Go version. + GOTOOLCHAIN: local container: # The go version in this image should be N-1 wrt test_go. - image: quay.io/prometheus/golang-builder:1.21-base + image: quay.io/prometheus/golang-builder:1.22-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - run: make build # Don't run NPM build; don't run race-detector. - run: make test GO_ONLY=1 test-flags="" @@ -54,11 +59,11 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. container: - image: quay.io/prometheus/golang-builder:1.22-base + image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/setup_environment with: enable_go: false @@ -74,10 +79,10 @@ jobs: name: Go tests on Windows runs-on: windows-latest steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 with: - go-version: 1.22.x + go-version: 1.23.x - run: | $TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"} go test $TestTargets -vet=off -v @@ -89,9 +94,9 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. container: - image: quay.io/prometheus/golang-builder:1.22-base + image: quay.io/prometheus/golang-builder:1.23-base steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - run: go install ./cmd/promtool/. - run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest - run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest @@ -107,6 +112,8 @@ jobs: if: | !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) && + !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) + && !(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')) && !(github.event_name == 'push' && github.event.ref == 'refs/heads/main') @@ -114,8 +121,8 @@ jobs: matrix: thread: [ 0, 1, 2 ] steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/build with: promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386" @@ -127,6 +134,8 @@ jobs: if: | (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) || + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) + || (github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')) || (github.event_name == 'push' && github.event.ref == 'refs/heads/main') @@ -137,8 +146,8 @@ jobs: # Whenever the Go version is updated here, .promu.yml # should also be updated. steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/build with: parallelism: 12 @@ -160,12 +169,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 with: cache: false - go-version: 1.22.x + go-version: 1.23.x - name: Run goyacc and check for diff run: make install-goyacc check-generated-parser golangci: @@ -173,20 +182,20 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 with: - go-version: 1.22.x + go-version: 1.23.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0 + uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v1.60.2 + version: v1.62.0 fuzzing: uses: ./.github/workflows/fuzzing.yml if: github.event_name == 'pull_request' @@ -199,8 +208,8 @@ jobs: needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] if: github.event_name == 'push' && github.event.ref == 'refs/heads/main' steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/publish_main with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -211,10 +220,13 @@ jobs: name: Publish release artefacts runs-on: ubuntu-latest needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all] - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) + || + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - uses: ./.github/promci/actions/publish_release with: docker_hub_login: ${{ secrets.docker_hub_login }} @@ -228,31 +240,40 @@ jobs: needs: [test_ui, codeql] steps: - name: Checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - - uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: prometheus/promci@52c7012f5f0070d7281b8db4a119e21341d43c91 # v0.4.5 - name: Install nodejs - uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3 + uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0 with: node-version-file: "web/ui/.nvmrc" registry-url: "https://registry.npmjs.org" - - uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 + - uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} restore-keys: | ${{ runner.os }}-node- - name: Check libraries version - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') - run: ./scripts/ui_release.sh --check-package "$(echo ${{ github.ref_name }}|sed s/v2/v0/)" + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) + || + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) + run: ./scripts/ui_release.sh --check-package "$(./scripts/get_module_version.sh ${{ github.ref_name }})" - name: build run: make assets - name: Copy files before publishing libs run: ./scripts/ui_release.sh --copy - name: Publish dry-run libraries - if: "!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))" + if: | + !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) + && + !(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) run: ./scripts/ui_release.sh --publish dry-run - name: Publish libraries - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.') + if: | + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')) + || + (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.')) run: ./scripts/ui_release.sh --publish env: # The setup-node action writes an .npmrc file with this env variable diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 89aa2ba29b..c30d907326 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -24,15 +24,15 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Initialize CodeQL - uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/init@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 with: languages: ${{ matrix.language }} - name: Autobuild - uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/autobuild@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6 + uses: github/codeql-action/analyze@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # v3.27.5 diff --git a/.github/workflows/container_description.yml b/.github/workflows/container_description.yml index 8ddbc34aeb..dcca16ff34 100644 --- a/.github/workflows/container_description.yml +++ b/.github/workflows/container_description.yml @@ -18,7 +18,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set docker hub repo name run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV - name: Push README to Dockerhub @@ -40,7 +40,7 @@ jobs: if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks. steps: - name: git checkout - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Set quay.io org name run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV - name: Set quay.io repo name diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index f3953cb2a4..5f1b0f25ce 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -21,7 +21,7 @@ jobs: fuzz-seconds: 600 dry-run: false - name: Upload Crash - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: failure() && steps.build.outcome == 'success' with: name: artifacts diff --git a/.github/workflows/repo_sync.yml b/.github/workflows/repo_sync.yml index 537e9abd84..a659d431d0 100644 --- a/.github/workflows/repo_sync.yml +++ b/.github/workflows/repo_sync.yml @@ -13,7 +13,7 @@ jobs: container: image: quay.io/prometheus/golang-builder steps: - - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - run: ./scripts/sync_repo_files.sh env: GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 82cccb2bc1..f637e921e5 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # tag=v4.1.6 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2 with: persist-credentials: false @@ -37,7 +37,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # tag=v4.3.4 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # tag=v4.4.3 with: name: SARIF file path: results.sarif @@ -45,6 +45,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # tag=v3.26.6 + uses: github/codeql-action/upload-sarif@f09c1c0a94de965c15400f5634aa42fac8fb8f88 # tag=v3.27.5 with: sarif_file: results.sarif diff --git a/.gitignore b/.gitignore index e85d766b09..0d99305f69 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,7 @@ benchmark.txt /documentation/examples/remote_storage/example_write_adapter/example_write_adapter npm_licenses.tar.bz2 -/web/ui/static/react +/web/ui/static /vendor /.build diff --git a/.golangci.yml b/.golangci.yml index 303cd33d8b..dfc74139f9 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -23,6 +23,7 @@ linters: - usestdlibvars - whitespace - loggercheck + - sloglint issues: max-issues-per-linter: 0 @@ -100,8 +101,6 @@ linters-settings: - (net/http.ResponseWriter).Write # No need to check for errors on server's shutdown. - (*net/http.Server).Shutdown - # Never check for logger errors. - - (github.com/go-kit/log.Logger).Log # Never check for rollback errors as Rollback() is called when a previous error was detected. - (github.com/prometheus/prometheus/storage.Appender).Rollback goimports: @@ -110,7 +109,7 @@ linters-settings: extra-rules: true perfsprint: # Optimizes `fmt.Errorf`. - errorf: false + errorf: true revive: # By default, revive will enable only the linting rules that are named in the configuration file. # So, it's needed to explicitly enable all required rules here. @@ -153,14 +152,4 @@ linters-settings: disable: - float-compare - go-require - enable: - - bool-compare - - compares - - empty - - error-is-as - - error-nil - - expected-actual - - len - - require-error - - suite-dont-use-pkg - - suite-extra-assert-call + enable-all: true diff --git a/.promu.yml b/.promu.yml index 0aa51d6d31..6feaa6ef64 100644 --- a/.promu.yml +++ b/.promu.yml @@ -1,7 +1,7 @@ go: # Whenever the Go version is updated here, # .github/workflows should also be updated. - version: 1.22 + version: 1.23 repository: path: github.com/prometheus/prometheus build: @@ -28,8 +28,6 @@ tarball: # Whenever there are new files to include in the tarball, # remember to make sure the new files will be generated after `make build`. files: - - consoles - - console_libraries - documentation/examples/prometheus.yml - LICENSE - NOTICE diff --git a/.yamllint b/.yamllint index 1859cb624b..8d09c375fd 100644 --- a/.yamllint +++ b/.yamllint @@ -1,7 +1,7 @@ --- extends: default ignore: | - ui/react-app/node_modules + **/node_modules rules: braces: diff --git a/CHANGELOG.md b/CHANGELOG.md index 37cbea6ef5..358284c98d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,121 @@ ## unreleased -* [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200 -* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706 -* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042 +* [CHANGE] Notifier: Increment the prometheus_notifications_errors_total metric by the number of affected alerts rather than by one per batch of affected alerts. #15428 +* [ENHANCEMENT] OTLP receiver: Convert also metric metadata. #15416 + +## 3.0.0 / 2024-11-14 + +This release includes new features such as a brand new UI and UTF-8 support enabled by default. As this marks the first new major version in seven years, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. For users that want to upgrade we recommend to read through our [migration guide](https://prometheus.io/docs/prometheus/3.0/migration/). + +* [CHANGE] Set the `GOMAXPROCS` variable automatically to match the Linux CPU quota. Use `--no-auto-gomaxprocs` to disable it. The `auto-gomaxprocs` feature flag was removed. #15376 +* [CHANGE] Set the `GOMEMLIMIT` variable automatically to match the Linux container memory limit. Use `--no-auto-gomemlimit` to disable it. The `auto-gomemlimit` feature flag was removed. #15373 +* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136 +* [CHANGE] Remote-write: default enable_http2 to false. #15219 +* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164 +* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178 +* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657 +* [CHANGE] Disallow configuring AM with the v1 api. #13883 +* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505 +* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930 +* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894 +* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160 +* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906 +* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion` is enabled. #14738 +* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111 +* [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872 +* [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904 +* [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365 +* [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365 +* [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705 +* [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807 +* [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770 +* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747 +* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526 +* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643 +* [FEATURE] OTLP receiver: Ability to skip UTF-8 normalization using `otlp.translation_strategy = NoUTF8EscapingWithSuffixes` configuration option. #15384 +* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769 +* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710 +* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196 +* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694 +* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096 +* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082 +* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677 +* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546 +* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909 +* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929 +* [ENHANCEMENT] Consul SD: Support catalog filters. #11224 +* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875 +* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975 +* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932 +* [PERF] TSDB: Grow postings by doubling. #14721 +* [PERF] Relabeling: Optimize adding a constant label pair. #12180 +* [BUGFIX] Scraping: Don't log errors on empty scrapes. #15357 +* [BUGFIX] UI: fix selector / series formatting for empty metric names. #15341 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941 +* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941 +* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251 +* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095 +* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910 +* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854 +* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884 +* [BUGFIX] PromQL: Unary negation of native histograms. #14821 +* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025 +* [BUGFIX] Autoreload: Reload invalid yaml files. #14947 +* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029 + +## 2.53.3 / 2024-11-04 + +* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685, #14740 + +## 2.53.2 / 2024-08-09 + +Fix a bug where Prometheus would crash with a segmentation fault if a remote-read +request accessed a block on disk at about the same time as TSDB created a new block. + +[BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515,#14523 + +## 2.55.1 / 2024-11-04 + +* [BUGFIX] `round()` function did not remove `__name__` label. #15250 + +## 2.55.0 / 2024-10-22 + +* [FEATURE] PromQL: Add experimental `info` function. #14495 +* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727 +* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817 +* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815 +* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734 +* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200 +* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346 +* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403 +* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506 +* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532 +* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706 +* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612 +* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379 +* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450 +* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477 +* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985 +* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621 +* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413 +* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816 +* [ENHANCEMENT] API: Support multiple listening addresses. #14665 +* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934 +* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120 +* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729 +* [BUGFIX] PromQL: make sort_by_label stable. #14985 +* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147 +* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622 +* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810 +* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766 +* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716 +* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821 +* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042 ## 2.54.1 / 2024-08-27 -* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685 +* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps (mixing samples of the same series with and without timestamps is still rejected). #14685 * [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654 * [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538 * [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514 @@ -90,6 +198,7 @@ This release changes the default for GOGC, the Go runtime control for the trade- ## 2.52.0 / 2024-05-07 * [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633 +* [CHANGE] Scrape: Multiple samples (even with different timestamps) are treated as duplicates during one scrape. * [FEATURE] Kubernetes SD: Add a new metric `prometheus_sd_kubernetes_failures_total` to track failed requests to Kubernetes API. #13554 * [FEATURE] Kubernetes SD: Add node and zone metadata labels when using the endpointslice role. #13935 * [FEATURE] Azure SD/Remote Write: Allow usage of Azure authorization SDK. #13099 @@ -103,7 +212,7 @@ This release changes the default for GOGC, the Go runtime control for the trade- * [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754 * [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772 * [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823 -* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838 +* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838 * [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846 * [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667 * [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852 @@ -640,7 +749,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2 ## 2.33.0 / 2022-01-29 -* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121 +* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121 * [CHANGE] Web: Promote remote-write-receiver to stable. #10119 * [FEATURE] Config: Add `stripPort` template function. #10002 * [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045 @@ -877,7 +986,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec. * [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682 * [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659 * [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790 -* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723 +* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723 * [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737 * [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766 @@ -1803,7 +1912,7 @@ information, read the announcement blog post and migration guide. ## 1.7.0 / 2017-06-06 * [CHANGE] Compress remote storage requests and responses with unframed/raw snappy. -* [CHANGE] Properly ellide secrets in config. +* [CHANGE] Properly elide secrets in config. * [FEATURE] Add OpenStack service discovery. * [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces. * [FEATURE] Add metric for discovered number of Alertmanagers. diff --git a/Dockerfile b/Dockerfile index b47f77dcd6..1e46a62f95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,27 +2,23 @@ ARG ARCH="amd64" ARG OS="linux" FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest LABEL maintainer="The Prometheus Authors " +LABEL org.opencontainers.image.source="https://github.com/prometheus/prometheus" ARG ARCH="amd64" ARG OS="linux" COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus COPY .build/${OS}-${ARCH}/promtool /bin/promtool COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml -COPY console_libraries/ /usr/share/prometheus/console_libraries/ -COPY consoles/ /usr/share/prometheus/consoles/ COPY LICENSE /LICENSE COPY NOTICE /NOTICE COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2 WORKDIR /prometheus -RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ && \ - chown -R nobody:nobody /etc/prometheus /prometheus +RUN chown -R nobody:nobody /etc/prometheus /prometheus USER nobody EXPOSE 9090 VOLUME [ "/prometheus" ] ENTRYPOINT [ "/bin/prometheus" ] CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ - "--storage.tsdb.path=/prometheus", \ - "--web.console.libraries=/usr/share/prometheus/console_libraries", \ - "--web.console.templates=/usr/share/prometheus/consoles" ] + "--storage.tsdb.path=/prometheus" ] diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 3661ddaa0a..de3f3c73b7 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -2,7 +2,6 @@ General maintainers: * Bryan Boreham (bjboreham@gmail.com / @bboreham) -* Levi Harrison (levi@leviharrison.dev / @LeviHarrison) * Ayoub Mrini (ayoubmrini424@gmail.com / @machine424) * Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie) @@ -13,13 +12,12 @@ Maintainers for specific parts of the codebase: * `k8s`: Frederic Branczyk ( / @brancz) * `documentation` * `prometheus-mixin`: Matthias Loibl ( / @metalmatze) -* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), +* `model/histogram` and other code related to native histograms: Björn Rabenstein ( / @beorn7), George Krajcsovits ( / @krajorama) * `storage` * `remote`: Callum Styan ( / @cstyan), Bartłomiej Płotka ( / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( / @npazosmendez), Alex Greenbank ( / @alexgreenbank) - * `otlptranslator`: Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) + * `otlptranslator`: Arthur Silva Sens ( / @ArthurSens), Arve Knudsen ( / @aknuds1), Jesús Vázquez ( / @jesusvazquez) * `tsdb`: Ganesh Vernekar ( / @codesome), Bartłomiej Płotka ( / @bwplotka), Jesús Vázquez ( / @jesusvazquez) - * `agent`: Robert Fratto ( / @rfratto) * `web` * `ui`: Julius Volz ( / @juliusv) * `module`: Augustin Husson ( @nexucis) diff --git a/Makefile b/Makefile index f2bb3fcb7a..0b5935de00 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,11 @@ include Makefile.common DOCKER_IMAGE_NAME ?= prometheus +# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set +ifdef PREBUILT_ASSETS_STATIC_DIR + SKIP_UI_BUILD = true +endif + .PHONY: update-npm-deps update-npm-deps: @echo ">> updating npm dependencies" @@ -42,13 +47,17 @@ upgrade-npm-deps: .PHONY: ui-bump-version ui-bump-version: - version=$$(sed s/2/0/ < VERSION) && ./scripts/ui_release.sh --bump-version "$${version}" + version=$$(./scripts/get_module_version.sh) && ./scripts/ui_release.sh --bump-version "$${version}" cd web/ui && npm install git add "./web/ui/package-lock.json" "./**/package.json" .PHONY: ui-install ui-install: cd $(UI_PATH) && npm install + # The old React app has been separated from the npm workspaces setup to avoid + # issues with conflicting dependencies. This is a temporary solution until the + # new Mantine-based UI is fully integrated and the old app can be removed. + cd $(UI_PATH)/react-app && npm install .PHONY: ui-build ui-build: @@ -65,10 +74,30 @@ ui-test: .PHONY: ui-lint ui-lint: cd $(UI_PATH) && npm run lint + # The old React app has been separated from the npm workspaces setup to avoid + # issues with conflicting dependencies. This is a temporary solution until the + # new Mantine-based UI is fully integrated and the old app can be removed. + cd $(UI_PATH)/react-app && npm run lint .PHONY: assets +ifndef SKIP_UI_BUILD assets: ui-install ui-build +.PHONY: npm_licenses +npm_licenses: ui-install + @echo ">> bundling npm licenses" + rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses + ln -s . npm_licenses + find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=- + rm -f npm_licenses +else +assets: + @echo '>> skipping assets build, pre-built assets provided' + +npm_licenses: + @echo '>> skipping assets npm licenses, pre-built assets provided' +endif + .PHONY: assets-compress assets-compress: assets @echo '>> compressing assets' @@ -117,14 +146,6 @@ else test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version endif -.PHONY: npm_licenses -npm_licenses: ui-install - @echo ">> bundling npm licenses" - rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses - ln -s . npm_licenses - find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=- - rm -f npm_licenses - .PHONY: tarball tarball: npm_licenses common-tarball diff --git a/Makefile.common b/Makefile.common index 34d65bb56d..fc47bdbb21 100644 --- a/Makefile.common +++ b/Makefile.common @@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.60.2 +GOLANGCI_LINT_VERSION ?= v1.62.0 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) @@ -275,3 +275,9 @@ $(1)_precheck: exit 1; \ fi endef + +govulncheck: install-govulncheck + govulncheck ./... + +install-govulncheck: + command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest diff --git a/README.md b/README.md index df974e1097..63e5b13ba1 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ The Makefile provides several targets: Prometheus is bundled with many service discovery plugins. When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml) -file to disable some service discoveries. The file is a yaml-formated list of go +file to disable some service discoveries. The file is a yaml-formatted list of go import path that will be built into the Prometheus binary. After you have changed the file, you @@ -158,8 +158,19 @@ This is experimental. ### Prometheus code base In order to comply with [go mod](https://go.dev/ref/mod#versions) rules, -Prometheus release number do not exactly match Go module releases. For the -Prometheus v2.y.z releases, we are publishing equivalent v0.y.z tags. +Prometheus release number do not exactly match Go module releases. + +For the +Prometheus v3.y.z releases, we are publishing equivalent v0.3y.z tags. The y in v0.3y.z is always padded to two digits, with a leading zero if needed. + +Therefore, a user that would want to use Prometheus v3.0.0 as a library could do: + +```shell +go get github.com/prometheus/prometheus@v0.300.0 +``` + +For the +Prometheus v2.y.z releases, we published the equivalent v0.y.z tags. Therefore, a user that would want to use Prometheus v2.35.0 as a library could do: @@ -177,7 +188,7 @@ For more information on building, running, and developing on the React-based UI, ## More information -* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v2.x.y will be displayed as v0.x.y. +* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v3.y.z will be displayed as v0.3y.z (the y in v0.3y.z is always padded to two digits, with a leading zero if needed), while v2.y.z will be displayed as v0.y.z. * See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels. ## Contributing diff --git a/RELEASE.md b/RELEASE.md index 0d3f7456cd..8e78a6a3ec 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -59,6 +59,7 @@ Release cadence of first pre-releases being cut is 6 weeks. | v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) | | v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) | | v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) | +| v2.55 | 2024-09-17 | Bryan Boreham (GitHub: @bboreham) | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. @@ -187,7 +188,7 @@ the Prometheus server, we use major version zero releases for the libraries. Tag the new library release via the following commands: ```bash -tag="v$(sed s/2/0/ < VERSION)" +tag="v$(./scripts/get_module_version.sh)" git tag -s "${tag}" -m "${tag}" git push origin "${tag}" ``` diff --git a/VERSION b/VERSION index 3a40665f50..4a36342fca 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.54.1 +3.0.0 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 5528358337..f8a4ecd8c9 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -18,15 +18,16 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "math/bits" "net" "net/http" - _ "net/http/pprof" // Comment this line to disable pprof endpoint. "net/url" "os" "os/signal" "path/filepath" + goregexp "regexp" //nolint:depguard // The Prometheus client library requires us to pass a regexp from this package. "runtime" "runtime/debug" "strconv" @@ -38,8 +39,6 @@ import ( "github.com/KimMachineGun/automemlimit/memlimit" "github.com/alecthomas/kingpin/v2" "github.com/alecthomas/units" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/mwitkow/go-conntrack" "github.com/oklog/run" @@ -47,8 +46,8 @@ import ( "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" - promlogflag "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + promslogflag "github.com/prometheus/common/promslog/flag" "github.com/prometheus/common/version" toolkit_web "github.com/prometheus/exporter-toolkit/web" "go.uber.org/atomic" @@ -58,8 +57,6 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/legacymanager" - "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -79,10 +76,50 @@ import ( "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/documentcli" "github.com/prometheus/prometheus/util/logging" + "github.com/prometheus/prometheus/util/notifications" prom_runtime "github.com/prometheus/prometheus/util/runtime" "github.com/prometheus/prometheus/web" ) +// klogv1OutputCallDepth is the stack depth where we can find the origin of this call. +const klogv1OutputCallDepth = 6 + +// klogv1DefaultPrefixLength is the length of the log prefix that we have to strip out. +const klogv1DefaultPrefixLength = 53 + +// klogv1Writer is used in SetOutputBySeverity call below to redirect any calls +// to klogv1 to end up in klogv2. +// This is a hack to support klogv1 without use of go-kit/log. It is inspired +// by klog's upstream klogv1/v2 coexistence example: +// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go +type klogv1Writer struct{} + +// Write redirects klogv1 calls to klogv2. +// This is a hack to support klogv1 without use of go-kit/log. It is inspired +// by klog's upstream klogv1/v2 coexistence example: +// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go +func (kw klogv1Writer) Write(p []byte) (n int, err error) { + if len(p) < klogv1DefaultPrefixLength { + klogv2.InfoDepth(klogv1OutputCallDepth, string(p)) + return len(p), nil + } + + switch p[0] { + case 'I': + klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'W': + klogv2.WarningDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'E': + klogv2.ErrorDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + case 'F': + klogv2.FatalDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + default: + klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:])) + } + + return len(p), nil +} + var ( appName = "prometheus" @@ -103,6 +140,8 @@ var ( ) func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation prometheus.MustRegister(versioncollector.NewCollector(strings.ReplaceAll(appName, "-", "_"))) var err error @@ -135,123 +174,108 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla type flagConfig struct { configFile string - agentStoragePath string - serverStoragePath string - notifier notifier.Options - forGracePeriod model.Duration - outageTolerance model.Duration - resendDelay model.Duration - maxConcurrentEvals int64 - web web.Options - scrape scrape.Options - tsdb tsdbOptions - agent agentOptions - lookbackDelta model.Duration - webTimeout model.Duration - queryTimeout model.Duration - queryConcurrency int - queryMaxSamples int - RemoteFlushDeadline model.Duration - nameEscapingScheme string + agentStoragePath string + serverStoragePath string + notifier notifier.Options + forGracePeriod model.Duration + outageTolerance model.Duration + resendDelay model.Duration + maxConcurrentEvals int64 + web web.Options + scrape scrape.Options + tsdb tsdbOptions + agent agentOptions + lookbackDelta model.Duration + webTimeout model.Duration + queryTimeout model.Duration + queryConcurrency int + queryMaxSamples int + RemoteFlushDeadline model.Duration + maxNotificationsSubscribers int - featureList []string - memlimitRatio float64 + enableAutoReload bool + autoReloadInterval model.Duration + + maxprocsEnable bool + memlimitEnable bool + memlimitRatio float64 + + featureList []string // These options are extracted from featureList // for ease of use. - enableExpandExternalLabels bool - enableNewSDManager bool - enablePerStepStats bool - enableAutoGOMAXPROCS bool - enableAutoGOMEMLIMIT bool - enableConcurrentRuleEval bool + enablePerStepStats bool + enableConcurrentRuleEval bool prometheusURL string corsRegexString string - promlogConfig promlog.Config - promqlEnableDelayedNameRemoval bool + + promslogConfig promslog.Config } // setFeatureListOptions sets the corresponding options from the featureList. -func (c *flagConfig) setFeatureListOptions(logger log.Logger) error { +func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error { for _, f := range c.featureList { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "remote-write-receiver": - c.web.EnableRemoteWriteReceiver = true - level.Warn(logger).Log("msg", "Remote write receiver enabled via feature flag remote-write-receiver. This is DEPRECATED. Use --web.enable-remote-write-receiver.") - case "otlp-write-receiver": - c.web.EnableOTLPWriteReceiver = true - level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled") - case "expand-external-labels": - c.enableExpandExternalLabels = true - level.Info(logger).Log("msg", "Experimental expand-external-labels enabled") case "exemplar-storage": c.tsdb.EnableExemplarStorage = true - level.Info(logger).Log("msg", "Experimental in-memory exemplar storage enabled") + logger.Info("Experimental in-memory exemplar storage enabled") case "memory-snapshot-on-shutdown": c.tsdb.EnableMemorySnapshotOnShutdown = true - level.Info(logger).Log("msg", "Experimental memory snapshot on shutdown enabled") + logger.Info("Experimental memory snapshot on shutdown enabled") case "extra-scrape-metrics": c.scrape.ExtraMetrics = true - level.Info(logger).Log("msg", "Experimental additional scrape metrics enabled") + logger.Info("Experimental additional scrape metrics enabled") case "metadata-wal-records": c.scrape.AppendMetadata = true - level.Info(logger).Log("msg", "Experimental metadata records in WAL enabled, required for remote write 2.0") - case "new-service-discovery-manager": - c.enableNewSDManager = true - level.Info(logger).Log("msg", "Experimental service discovery manager") - case "agent": - agentMode = true - level.Info(logger).Log("msg", "Experimental agent mode enabled.") + logger.Info("Experimental metadata records in WAL enabled, required for remote write 2.0") case "promql-per-step-stats": c.enablePerStepStats = true - level.Info(logger).Log("msg", "Experimental per-step statistics reporting") - case "auto-gomaxprocs": - c.enableAutoGOMAXPROCS = true - level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota") - case "auto-gomemlimit": - c.enableAutoGOMEMLIMIT = true - level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit") + logger.Info("Experimental per-step statistics reporting") + case "auto-reload-config": + c.enableAutoReload = true + if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 { + c.autoReloadInterval, _ = model.ParseDuration("1s") + } + logger.Info("Enabled automatic configuration file reloading. Checking for configuration changes every", "interval", c.autoReloadInterval) case "concurrent-rule-eval": c.enableConcurrentRuleEval = true - level.Info(logger).Log("msg", "Experimental concurrent rule evaluation enabled.") - case "no-default-scrape-port": - c.scrape.NoDefaultPort = true - level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.") + logger.Info("Experimental concurrent rule evaluation enabled.") case "promql-experimental-functions": parser.EnableExperimentalFunctions = true - level.Info(logger).Log("msg", "Experimental PromQL functions enabled.") + logger.Info("Experimental PromQL functions enabled.") case "native-histograms": c.tsdb.EnableNativeHistograms = true c.scrape.EnableNativeHistogramsIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + case "ooo-native-histograms": + c.tsdb.EnableOOONativeHistograms = true + logger.Info("Experimental out-of-order native histogram ingestion enabled. This will only take effect if OutOfOrderTimeWindow is > 0 and if EnableNativeHistograms = true") case "created-timestamp-zero-ingestion": c.scrape.EnableCreatedTimestampZeroIngestion = true // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers. config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols - level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) + logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols)) case "delayed-compaction": c.tsdb.EnableDelayedCompaction = true - level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.") + logger.Info("Experimental delayed compaction is enabled.") case "promql-delayed-name-removal": c.promqlEnableDelayedNameRemoval = true - level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.") - case "utf8-names": - model.NameValidationScheme = model.UTF8Validation - level.Info(logger).Log("msg", "Experimental UTF-8 support enabled") + logger.Info("Experimental PromQL delayed name removal enabled.") case "": continue - case "promql-at-modifier", "promql-negative-offset": - level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o) + case "old-ui": + c.web.UseOldUI = true + logger.Info("Serving previous version of the Prometheus web UI.") default: - level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o) + logger.Warn("Unknown option for --enable-feature", "option", o) } } } @@ -265,11 +289,6 @@ func main() { runtime.SetMutexProfileFraction(20) } - var ( - oldFlagRetentionDuration model.Duration - newFlagRetentionDuration model.Duration - ) - // Unregister the default GoCollector, and reregister with our defaults. if prometheus.Unregister(collectors.NewGoCollector()) { prometheus.MustRegister( @@ -277,6 +296,7 @@ func main() { collectors.WithGoCollectorRuntimeMetrics( collectors.MetricsGC, collectors.MetricsScheduler, + collectors.GoRuntimeMetricsRule{Matcher: goregexp.MustCompile(`^/sync/mutex/wait/total:seconds$`)}, ), ), ) @@ -290,7 +310,7 @@ func main() { Registerer: prometheus.DefaultRegisterer, Gatherer: prometheus.DefaultGatherer, }, - promlogConfig: promlog.Config{}, + promslogConfig: promslog.Config{}, } a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server").UsageWriter(os.Stdout) @@ -302,9 +322,16 @@ func main() { a.Flag("config.file", "Prometheus configuration file path."). Default("prometheus.yml").StringVar(&cfg.configFile) + a.Flag("config.auto-reload-interval", "Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes."). + Default("30s").SetValue(&cfg.autoReloadInterval) + a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated."). Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses) + a.Flag("auto-gomaxprocs", "Automatically set GOMAXPROCS to match Linux container CPU quota"). + Default("true").BoolVar(&cfg.maxprocsEnable) + a.Flag("auto-gomemlimit", "Automatically set GOMEMLIMIT to match Linux container or system memory limit"). + Default("true").BoolVar(&cfg.memlimitEnable) a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory"). Default("0.9").FloatVar(&cfg.memlimitRatio) @@ -320,6 +347,9 @@ func main() { a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners."). Default("512").IntVar(&cfg.web.MaxConnections) + a.Flag("web.max-notifications-subscribers", "Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close."). + Default("16").IntVar(&cfg.maxNotificationsSubscribers) + a.Flag("web.external-url", "The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically."). PlaceHolder("").StringVar(&cfg.prometheusURL) @@ -346,6 +376,9 @@ func main() { a.Flag("web.remote-write-receiver.accepted-protobuf-messages", fmt.Sprintf("List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: %v", supportedRemoteWriteProtoMsgs.String())). Default(supportedRemoteWriteProtoMsgs.Strings()...).SetValue(rwProtoMsgFlagValue(&cfg.web.AcceptRemoteWriteProtoMsgs)) + a.Flag("web.enable-otlp-receiver", "Enable API endpoint accepting OTLP write requests."). + Default("false").BoolVar(&cfg.web.EnableOTLPWriteReceiver) + a.Flag("web.console.templates", "Path to the console template directory, available at /consoles."). Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath) @@ -376,11 +409,8 @@ func main() { "Size at which to split the tsdb WAL segment files. Example: 100MB"). Hidden().PlaceHolder("").BytesVar(&cfg.tsdb.WALSegmentSize) - serverOnlyFlag(a, "storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead."). - SetValue(&oldFlagRetentionDuration) - - serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms."). - SetValue(&newFlagRetentionDuration) + serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. If neither this flag nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms."). + SetValue(&cfg.tsdb.RetentionDuration) serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B."). BytesVar(&cfg.tsdb.MaxBytes) @@ -388,11 +418,6 @@ func main() { serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). Default("false").BoolVar(&cfg.tsdb.NoLockfile) - // TODO: Remove in Prometheus 3.0. - var b bool - serverOnlyFlag(a, "storage.tsdb.allow-overlapping-blocks", "[DEPRECATED] This flag has no effect. Overlapping blocks are enabled by default now."). - Default("true").Hidden().BoolVar(&b) - serverOnlyFlag(a, "storage.tsdb.allow-overlapping-compaction", "Allow compaction of overlapping blocks. If set to false, TSDB stops vertical compaction and leaves overlapping blocks there. The use case is to let another component handle the compaction of overlapping blocks."). Default("true").Hidden().BoolVar(&cfg.tsdb.EnableOverlappingCompaction) @@ -408,6 +433,9 @@ func main() { serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk."). Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk) + serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled."). + Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent) + agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage."). Default("data-agent/").StringVar(&cfg.agentStoragePath) @@ -472,9 +500,6 @@ func main() { serverOnlyFlag(a, "alertmanager.drain-notification-queue-on-shutdown", "Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down."). Default("true").BoolVar(&cfg.notifier.DrainOnShutdown) - // TODO: Remove in Prometheus 3.0. - alertmanagerTimeout := a.Flag("alertmanager.timeout", "[DEPRECATED] This flag has no effect.").Hidden().String() - serverOnlyFlag(a, "query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations and federation."). Default("5m").SetValue(&cfg.lookbackDelta) @@ -490,12 +515,12 @@ func main() { a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates."). Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval) - a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme) - - a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). + a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details."). Default("").StringsVar(&cfg.featureList) - promlogflag.AddFlags(a, &cfg.promlogConfig) + a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode) + + promslogflag.AddFlags(a, &cfg.promslogConfig) a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error { if err := documentcli.GenerateMarkdown(a.Model(), os.Stdout); err != nil { @@ -513,22 +538,19 @@ func main() { os.Exit(2) } - logger := promlog.New(&cfg.promlogConfig) + logger := promslog.New(&cfg.promslogConfig) + slog.SetDefault(logger) + + notifs := notifications.NewNotifications(cfg.maxNotificationsSubscribers, prometheus.DefaultRegisterer) + cfg.web.NotificationsSub = notifs.Sub + cfg.web.NotificationsGetter = notifs.Get + notifs.AddNotification(notifications.StartingUp) if err := cfg.setFeatureListOptions(logger); err != nil { fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing feature list: %w", err)) os.Exit(1) } - if cfg.nameEscapingScheme != "" { - scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme) - if err != nil { - fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme) - os.Exit(1) - } - model.NameEscapingScheme = scheme - } - if agentMode && len(serverOnlyFlags) > 0 { fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags) os.Exit(3) @@ -561,18 +583,14 @@ func main() { os.Exit(2) } - if *alertmanagerTimeout != "" { - level.Warn(logger).Log("msg", "The flag --alertmanager.timeout has no effect and will be removed in the future.") - } - // Throw error for invalid config before starting other components. var cfgFile *config.Config - if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil { + if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil { absPath, pathErr := filepath.Abs(cfg.configFile) if pathErr != nil { absPath = cfg.configFile } - level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) + logger.Error(fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } if _, err := cfgFile.GetScrapeConfigs(); err != nil { @@ -580,7 +598,7 @@ func main() { if pathErr != nil { absPath = cfg.configFile } - level.Error(logger).Log("msg", fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) + logger.Error(fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err) os.Exit(2) } if cfg.tsdb.EnableExemplarStorage { @@ -611,20 +629,9 @@ func main() { cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") if !agentMode { - // Time retention settings. - if oldFlagRetentionDuration != 0 { - level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.") - cfg.tsdb.RetentionDuration = oldFlagRetentionDuration - } - - // When the new flag is set it takes precedence. - if newFlagRetentionDuration != 0 { - cfg.tsdb.RetentionDuration = newFlagRetentionDuration - } - if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { cfg.tsdb.RetentionDuration = defaultRetentionDuration - level.Info(logger).Log("msg", "No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) + logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) } // Check for overflows. This limits our max retention to 100y. @@ -634,7 +641,7 @@ func main() { panic(err) } cfg.tsdb.RetentionDuration = y - level.Warn(logger).Log("msg", "Time retention value is too high. Limiting to: "+y.String()) + logger.Warn("Time retention value is too high. Limiting to: " + y.String()) } // Max block size settings. @@ -650,16 +657,19 @@ func main() { cfg.tsdb.MaxBlockDuration = maxBlockDuration } + + // Delayed compaction checks + if cfg.tsdb.EnableDelayedCompaction && (cfg.tsdb.CompactionDelayMaxPercent > 100 || cfg.tsdb.CompactionDelayMaxPercent <= 0) { + logger.Warn("The --storage.tsdb.delayed-compaction.max-percent should have a value between 1 and 100. Using default", "default", tsdb.DefaultCompactionDelayMaxPercent) + cfg.tsdb.CompactionDelayMaxPercent = tsdb.DefaultCompactionDelayMaxPercent + } } noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{} noStepSubqueryInterval.Set(config.DefaultGlobalConfig.EvaluationInterval) - // Above level 6, the k8s client would log bearer tokens in clear-text. - klog.ClampLevel(6) - klog.SetLogger(log.With(logger, "component", "k8s_client_runtime")) - klogv2.ClampLevel(6) - klogv2.SetLogger(log.With(logger, "component", "k8s_client_runtime")) + klogv2.SetSlogLogger(logger.With("component", "k8s_client_runtime")) + klog.SetOutputBySeverity("INFO", klogv1Writer{}) modeAppName := "Prometheus Server" mode := "server" @@ -668,20 +678,22 @@ func main() { mode = "agent" } - level.Info(logger).Log("msg", "Starting "+modeAppName, "mode", mode, "version", version.Info()) + logger.Info("Starting "+modeAppName, "mode", mode, "version", version.Info()) if bits.UintSize < 64 { - level.Warn(logger).Log("msg", "This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) + logger.Warn("This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH) } - level.Info(logger).Log("build_context", version.BuildContext()) - level.Info(logger).Log("host_details", prom_runtime.Uname()) - level.Info(logger).Log("fd_limits", prom_runtime.FdLimits()) - level.Info(logger).Log("vm_limits", prom_runtime.VMLimits()) + logger.Info("operational information", + "build_context", version.BuildContext(), + "host_details", prom_runtime.Uname(), + "fd_limits", prom_runtime.FdLimits(), + "vm_limits", prom_runtime.VMLimits(), + ) var ( localStorage = &readyStorage{stats: tsdb.NewDBStats()} scraper = &readyScrapeManager{} - remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) + remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) ) @@ -689,12 +701,12 @@ func main() { ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxRule = context.Background() - notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) + notifierManager = notifier.NewManager(&cfg.notifier, logger.With("component", "notifier")) ctxScrape, cancelScrape = context.WithCancel(context.Background()) ctxNotify, cancelNotify = context.WithCancel(context.Background()) - discoveryManagerScrape discoveryManager - discoveryManagerNotify discoveryManager + discoveryManagerScrape *discovery.Manager + discoveryManagerNotify *discovery.Manager ) // Kubernetes client metrics are used by Kubernetes SD. @@ -704,62 +716,37 @@ func main() { // they are not specific to an SD instance. err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err) + logger.Error("failed to register Kubernetes client metrics", "err", err) os.Exit(1) } sdMetrics, err := discovery.CreateAndRegisterSDMetrics(prometheus.DefaultRegisterer) if err != nil { - level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + logger.Error("failed to register service discovery metrics", "err", err) os.Exit(1) } - if cfg.enableNewSDManager { - { - discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager scrape") - os.Exit(1) - } - discoveryManagerScrape = discMgr - } + discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape")) + if discoveryManagerScrape == nil { + logger.Error("failed to create a discovery manager scrape") + os.Exit(1) + } - { - discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager notify") - os.Exit(1) - } - discoveryManagerNotify = discMgr - } - } else { - { - discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("scrape")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager scrape") - os.Exit(1) - } - discoveryManagerScrape = discMgr - } - - { - discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("notify")) - if discMgr == nil { - level.Error(logger).Log("msg", "failed to create a discovery manager notify") - os.Exit(1) - } - discoveryManagerNotify = discMgr - } + discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify")) + if discoveryManagerNotify == nil { + logger.Error("failed to create a discovery manager notify") + os.Exit(1) } scrapeManager, err := scrape.NewManager( &cfg.scrape, - log.With(logger, "component", "scrape manager"), + logger.With("component", "scrape manager"), + logging.NewJSONFileLogger, fanoutStorage, prometheus.DefaultRegisterer, ) if err != nil { - level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err) + logger.Error("failed to create a scrape manager", "err", err) os.Exit(1) } @@ -770,16 +757,16 @@ func main() { ruleManager *rules.Manager ) - if cfg.enableAutoGOMAXPROCS { + if cfg.maxprocsEnable { l := func(format string, a ...interface{}) { - level.Info(logger).Log("component", "automaxprocs", "msg", fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...)) + logger.Info(fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...), "component", "automaxprocs") } if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil { - level.Warn(logger).Log("component", "automaxprocs", "msg", "Failed to set GOMAXPROCS automatically", "err", err) + logger.Warn("Failed to set GOMAXPROCS automatically", "component", "automaxprocs", "err", err) } } - if cfg.enableAutoGOMEMLIMIT { + if cfg.memlimitEnable { if _, err := memlimit.SetGoMemLimitWithOpts( memlimit.WithRatio(cfg.memlimitRatio), memlimit.WithProvider( @@ -789,17 +776,17 @@ func main() { ), ), ); err != nil { - level.Warn(logger).Log("component", "automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) + logger.Warn("automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err) } } if !agentMode { opts := promql.EngineOpts{ - Logger: log.With(logger, "component", "query engine"), + Logger: logger.With("component", "query engine"), Reg: prometheus.DefaultRegisterer, MaxSamples: cfg.queryMaxSamples, Timeout: time.Duration(cfg.queryTimeout), - ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")), + ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, logger.With("component", "activeQueryTracker")), LookbackDelta: time.Duration(cfg.lookbackDelta), NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get, // EnableAtModifier and EnableNegativeOffset have to be @@ -820,7 +807,7 @@ func main() { Context: ctxRule, ExternalURL: cfg.web.ExternalURL, Registerer: prometheus.DefaultRegisterer, - Logger: log.With(logger, "component", "rule manager"), + Logger: logger.With("component", "rule manager"), OutageTolerance: time.Duration(cfg.outageTolerance), ForGracePeriod: time.Duration(cfg.forGracePeriod), ResendDelay: time.Duration(cfg.resendDelay), @@ -871,7 +858,7 @@ func main() { } // Depends on cfg.web.ScrapeManager so needs to be after cfg.web.ScrapeManager = scrapeManager. - webHandler := web.New(log.With(logger, "component", "web"), &cfg.web) + webHandler := web.New(logger.With("component", "web"), &cfg.web) // Monitor outgoing connections on default transport with conntrack. http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc( @@ -998,18 +985,18 @@ func main() { listeners, err := webHandler.Listeners() if err != nil { - level.Error(logger).Log("msg", "Unable to start web listeners", "err", err) + logger.Error("Unable to start web listener", "err", err) if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } os.Exit(1) } err = toolkit_web.Validate(*webConfig) if err != nil { - level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err) + logger.Error("Unable to validate web configuration file", "err", err) if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } os.Exit(1) } @@ -1025,21 +1012,22 @@ func main() { // Don't forget to release the reloadReady channel so that waiting blocks can exit normally. select { case sig := <-term: - level.Warn(logger).Log("msg", "Received an OS signal, exiting gracefully...", "signal", sig.String()) + logger.Warn("Received an OS signal, exiting gracefully...", "signal", sig.String()) reloadReady.Close() case <-webHandler.Quit(): - level.Warn(logger).Log("msg", "Received termination request via web service, exiting gracefully...") + logger.Warn("Received termination request via web service, exiting gracefully...") case <-cancel: reloadReady.Close() } if err := queryEngine.Close(); err != nil { - level.Warn(logger).Log("msg", "Closing query engine failed", "err", err) + logger.Warn("Closing query engine failed", "err", err) } return nil }, func(err error) { close(cancel) - webHandler.SetReady(false) + webHandler.SetReady(web.Stopping) + notifs.AddNotification(notifications.ShuttingDown) }, ) } @@ -1048,11 +1036,11 @@ func main() { g.Add( func() error { err := discoveryManagerScrape.Run() - level.Info(logger).Log("msg", "Scrape discovery manager stopped") + logger.Info("Scrape discovery manager stopped") return err }, func(err error) { - level.Info(logger).Log("msg", "Stopping scrape discovery manager...") + logger.Info("Stopping scrape discovery manager...") cancelScrape() }, ) @@ -1062,11 +1050,11 @@ func main() { g.Add( func() error { err := discoveryManagerNotify.Run() - level.Info(logger).Log("msg", "Notify discovery manager stopped") + logger.Info("Notify discovery manager stopped") return err }, func(err error) { - level.Info(logger).Log("msg", "Stopping notify discovery manager...") + logger.Info("Stopping notify discovery manager...") cancelNotify() }, ) @@ -1095,7 +1083,7 @@ func main() { <-reloadReady.C err := scrapeManager.Run(discoveryManagerScrape.SyncCh()) - level.Info(logger).Log("msg", "Scrape manager stopped") + logger.Info("Scrape manager stopped") return err }, func(err error) { @@ -1103,7 +1091,7 @@ func main() { // so that it doesn't try to write samples to a closed storage. // We should also wait for rule manager to be fully stopped to ensure // we don't trigger any false positive alerts for rules using absent(). - level.Info(logger).Log("msg", "Stopping scrape manager...") + logger.Info("Stopping scrape manager...") scrapeManager.Stop() }, ) @@ -1129,6 +1117,23 @@ func main() { hup := make(chan os.Signal, 1) signal.Notify(hup, syscall.SIGHUP) cancel := make(chan struct{}) + + var checksum string + if cfg.enableAutoReload { + checksum, err = config.GenerateChecksum(cfg.configFile) + if err != nil { + logger.Error("Failed to generate initial checksum for configuration file", "err", err) + } + } + + callback := func(success bool) { + if success { + notifs.DeleteNotification(notifications.ConfigurationUnsuccessful) + return + } + notifs.AddNotification(notifications.ConfigurationUnsuccessful) + } + g.Add( func() error { <-reloadReady.C @@ -1136,15 +1141,44 @@ func main() { for { select { case <-hup: - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) + } else if cfg.enableAutoReload { + checksum, err = config.GenerateChecksum(cfg.configFile) + if err != nil { + logger.Error("Failed to generate checksum during configuration reload", "err", err) + } } case rc := <-webHandler.Reload(): - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { - level.Error(logger).Log("msg", "Error reloading config", "err", err) + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) rc <- err } else { rc <- nil + if cfg.enableAutoReload { + checksum, err = config.GenerateChecksum(cfg.configFile) + if err != nil { + logger.Error("Failed to generate checksum during configuration reload", "err", err) + } + } + } + case <-time.Tick(time.Duration(cfg.autoReloadInterval)): + if !cfg.enableAutoReload { + continue + } + currentChecksum, err := config.GenerateChecksum(cfg.configFile) + if err != nil { + checksum = currentChecksum + logger.Error("Failed to generate checksum during configuration reload", "err", err) + } else if currentChecksum == checksum { + continue + } + logger.Info("Configuration file change detected, reloading the configuration.") + + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil { + logger.Error("Error reloading config", "err", err) + } else { + checksum = currentChecksum } case <-cancel: return nil @@ -1171,14 +1205,15 @@ func main() { return nil } - if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil { + if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil { return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err) } reloadReady.Close() - webHandler.SetReady(true) - level.Info(logger).Log("msg", "Server is ready to receive web requests.") + webHandler.SetReady(web.Ready) + notifs.DeleteNotification(notifications.StartingUp) + logger.Info("Server is ready to receive web requests.") <-cancel return nil }, @@ -1193,7 +1228,7 @@ func main() { cancel := make(chan struct{}) g.Add( func() error { - level.Info(logger).Log("msg", "Starting TSDB ...") + logger.Info("Starting TSDB ...") if cfg.tsdb.WALSegmentSize != 0 { if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB") @@ -1212,13 +1247,13 @@ func main() { switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": - level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType) default: - level.Info(logger).Log("fs_type", fsType) + logger.Info("filesystem information", "fs_type", fsType) } - level.Info(logger).Log("msg", "TSDB started") - level.Debug(logger).Log("msg", "TSDB options", + logger.Info("TSDB started") + logger.Debug("TSDB options", "MinBlockDuration", cfg.tsdb.MinBlockDuration, "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, "MaxBytes", cfg.tsdb.MaxBytes, @@ -1237,7 +1272,7 @@ func main() { }, func(err error) { if err := fanoutStorage.Close(); err != nil { - level.Error(logger).Log("msg", "Error stopping storage", "err", err) + logger.Error("Error stopping storage", "err", err) } close(cancel) }, @@ -1249,7 +1284,7 @@ func main() { cancel := make(chan struct{}) g.Add( func() error { - level.Info(logger).Log("msg", "Starting WAL storage ...") + logger.Info("Starting WAL storage ...") if cfg.agent.WALSegmentSize != 0 { if cfg.agent.WALSegmentSize < 10*1024*1024 || cfg.agent.WALSegmentSize > 256*1024*1024 { return errors.New("flag 'storage.agent.wal-segment-size' must be set between 10MB and 256MB") @@ -1268,13 +1303,13 @@ func main() { switch fsType := prom_runtime.Statfs(localStoragePath); fsType { case "NFS_SUPER_MAGIC": - level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") + logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.") default: - level.Info(logger).Log("fs_type", fsType) + logger.Info(fsType) } - level.Info(logger).Log("msg", "Agent WAL storage started") - level.Debug(logger).Log("msg", "Agent WAL storage options", + logger.Info("Agent WAL storage started") + logger.Debug("Agent WAL storage options", "WALSegmentSize", cfg.agent.WALSegmentSize, "WALCompression", cfg.agent.WALCompression, "StripeSize", cfg.agent.StripeSize, @@ -1292,7 +1327,7 @@ func main() { }, func(e error) { if err := fanoutStorage.Close(); err != nil { - level.Error(logger).Log("msg", "Error stopping storage", "err", err) + logger.Error("Error stopping storage", "err", err) } close(cancel) }, @@ -1326,7 +1361,7 @@ func main() { <-reloadReady.C notifierManager.Run(discoveryManagerNotify.SyncCh()) - level.Info(logger).Log("msg", "Notifier manager stopped") + logger.Info("Notifier manager stopped") return nil }, func(err error) { @@ -1334,17 +1369,19 @@ func main() { }, ) } - if err := g.Run(); err != nil { - level.Error(logger).Log("err", err) - os.Exit(1) - } - level.Info(logger).Log("msg", "See you next time!") + func() { // This function exists so the top of the stack is named 'main.main.funcxxx' and not 'oklog'. + if err := g.Run(); err != nil { + logger.Error("Fatal error", "err", err) + os.Exit(1) + } + }() + logger.Info("See you next time!") } -func openDBWithMetrics(dir string, logger log.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) { +func openDBWithMetrics(dir string, logger *slog.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) { db, err := tsdb.Open( dir, - log.With(logger, "component", "tsdb"), + logger.With("component", "tsdb"), reg, opts, stats, @@ -1397,21 +1434,23 @@ type reloader struct { reloader func(*config.Config) error } -func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...reloader) (err error) { +func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) { start := time.Now() - timings := []interface{}{} - level.Info(logger).Log("msg", "Loading configuration file", "filename", filename) + timingsLogger := logger + logger.Info("Loading configuration file", "filename", filename) defer func() { if err == nil { configSuccess.Set(1) configSuccessTime.SetToCurrentTime() + callback(true) } else { configSuccess.Set(0) + callback(false) } }() - conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger) + conf, err := config.LoadFile(filename, agentMode, logger) if err != nil { return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err) } @@ -1426,10 +1465,10 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b for _, rl := range rls { rstart := time.Now() if err := rl.reloader(conf); err != nil { - level.Error(logger).Log("msg", "Failed to apply configuration", "err", err) + logger.Error("Failed to apply configuration", "err", err) failed = true } - timings = append(timings, rl.name, time.Since(rstart)) + timingsLogger = timingsLogger.With((rl.name), time.Since(rstart)) } if failed { return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) @@ -1437,7 +1476,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC) if oldGoGC != conf.Runtime.GoGC { - level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) + logger.Info("updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC) } // Write the new setting out to the ENV var for runtime API output. if conf.Runtime.GoGC >= 0 { @@ -1447,8 +1486,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b } noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval) - l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)} - level.Info(logger).Log(append(l, timings...)...) + timingsLogger.Info("Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)) return nil } @@ -1602,6 +1640,9 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender { type notReadyAppender struct{} +// SetOptions does nothing in this appender implementation. +func (n notReadyAppender) SetOptions(opts *storage.AppendOptions) {} + func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1614,6 +1655,10 @@ func (n notReadyAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels return 0, tsdb.ErrNotReady } +func (n notReadyAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, tsdb.ErrNotReady +} + func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { return 0, tsdb.ErrNotReady } @@ -1752,7 +1797,9 @@ type tsdbOptions struct { EnableMemorySnapshotOnShutdown bool EnableNativeHistograms bool EnableDelayedCompaction bool + CompactionDelayMaxPercent int EnableOverlappingCompaction bool + EnableOOONativeHistograms bool } func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { @@ -1772,8 +1819,10 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options { MaxExemplars: opts.MaxExemplars, EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown, EnableNativeHistograms: opts.EnableNativeHistograms, + EnableOOONativeHistograms: opts.EnableOOONativeHistograms, OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow, EnableDelayedCompaction: opts.EnableDelayedCompaction, + CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, } } @@ -1807,15 +1856,6 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option } } -// discoveryManager interfaces the discovery manager. This is used to keep using -// the manager that restarts SD's on reload for a few releases until we feel -// the new manager can be enabled for all users. -type discoveryManager interface { - ApplyConfig(cfg map[string]discovery.Configs) error - Run() error - SyncCh() <-chan map[string][]*targetgroup.Group -} - // rwProtoMsgFlagParser is a custom parser for config.RemoteWriteProtoMsg enum. type rwProtoMsgFlagParser struct { msgs *[]config.RemoteWriteProtoMsg diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index c827812e60..4bd1c71b2d 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -31,9 +31,9 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -42,6 +42,11 @@ import ( "github.com/prometheus/prometheus/rules" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + const startupTime = 10 * time.Second var ( @@ -120,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() fakeInputFile := "fake-input-file" expectedExitStatus := 2 @@ -206,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "9MB", + exitCode: 1, + }, + { + size: "257MB", + exitCode: 1, + }, + { + size: "10", + exitCode: 2, + }, + { + size: "1GB", + exitCode: 1, + }, + { + size: "12MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) { - t.Parallel() - if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() - for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} { - prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) + for _, tc := range []struct { + size string + exitCode int + }{ + { + size: "512KB", + exitCode: 1, + }, + { + size: "1MB", + exitCode: 0, + }, + } { + t.Run(tc.size, func(t *testing.T) { + t.Parallel() + prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data")) - // Log stderr in case of failure. - stderr, err := prom.StderrPipe() - require.NoError(t, err) - go func() { - slurp, _ := io.ReadAll(stderr) - t.Log(string(slurp)) - }() + // Log stderr in case of failure. + stderr, err := prom.StderrPipe() + require.NoError(t, err) + go func() { + slurp, _ := io.ReadAll(stderr) + t.Log(string(slurp)) + }() - err = prom.Start() - require.NoError(t, err) + err = prom.Start() + require.NoError(t, err) - if expectedExitStatus == 0 { - done := make(chan error, 1) - go func() { done <- prom.Wait() }() - select { - case err := <-done: - require.Fail(t, "prometheus should be still running: %v", err) - case <-time.After(startupTime): - prom.Process.Kill() - <-done + if tc.exitCode == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + require.Fail(t, "prometheus should be still running: %v", err) + case <-time.After(startupTime): + prom.Process.Kill() + <-done + } + return } - continue - } - err = prom.Wait() - require.Error(t, err) - var exitError *exec.ExitError - require.ErrorAs(t, err, &exitError) - status := exitError.Sys().(syscall.WaitStatus) - require.Equal(t, expectedExitStatus, status.ExitStatus()) + err = prom.Wait() + require.Error(t, err) + var exitError *exec.ExitError + require.ErrorAs(t, err, &exitError) + status := exitError.Sys().(syscall.WaitStatus) + require.Equal(t, tc.exitCode, status.ExitStatus()) + }) } } @@ -290,7 +338,7 @@ func TestTimeMetrics(t *testing.T) { tmpDir := t.TempDir() reg := prometheus.NewRegistry() - db, err := openDBWithMetrics(tmpDir, log.NewNopLogger(), reg, nil, nil) + db, err := openDBWithMetrics(tmpDir, promslog.NewNopLogger(), reg, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) @@ -348,7 +396,9 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames } func TestAgentSuccessfulStartup(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) + t.Parallel() + + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig) require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -366,7 +416,9 @@ func TestAgentSuccessfulStartup(t *testing.T) { } func TestAgentFailedStartupWithServerFlag(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + t.Parallel() + + prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) output := bytes.Buffer{} prom.Stderr = &output @@ -393,7 +445,9 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) { } func TestAgentFailedStartupWithInvalidConfig(t *testing.T) { - prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) + t.Parallel() + + prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig) require.NoError(t, prom.Start()) actualExitStatus := 0 @@ -414,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() testcases := []struct { mode string @@ -428,10 +483,11 @@ func TestModeSpecificFlags(t *testing.T) { for _, tc := range testcases { t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) { + t.Parallel() args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"} if tc.mode == "agent" { - args = append(args, "--enable-feature=agent", "--config.file="+agentConfig) + args = append(args, "--agent", "--config.file="+agentConfig) } else { args = append(args, "--config.file="+promConfig) } @@ -479,6 +535,8 @@ func TestDocumentation(t *testing.T) { if runtime.GOOS == "windows" { t.SkipNow() } + t.Parallel() + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -503,6 +561,8 @@ func TestDocumentation(t *testing.T) { } func TestRwProtoMsgFlagParser(t *testing.T) { + t.Parallel() + defaultOpts := config.RemoteWriteProtoMsgs{ config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2, } diff --git a/cmd/prometheus/main_unix_test.go b/cmd/prometheus/main_unix_test.go index 2011fb123f..94eec27e79 100644 --- a/cmd/prometheus/main_unix_test.go +++ b/cmd/prometheus/main_unix_test.go @@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index 62e317bf8b..25abf5e965 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -125,12 +125,61 @@ func (p *queryLogTest) query(t *testing.T) { require.NoError(t, err) require.Equal(t, 200, r.StatusCode) case ruleOrigin: - time.Sleep(2 * time.Second) + // Poll the /api/v1/rules endpoint until a new rule evaluation is detected. + var lastEvalTime time.Time + for { + r, err := http.Get(fmt.Sprintf("http://%s:%d/api/v1/rules", p.host, p.port)) + require.NoError(t, err) + + rulesBody, err := io.ReadAll(r.Body) + require.NoError(t, err) + defer r.Body.Close() + + // Parse the rules response to find the last evaluation time. + newEvalTime := parseLastEvaluation(rulesBody) + if newEvalTime.After(lastEvalTime) { + if !lastEvalTime.IsZero() { + break + } + lastEvalTime = newEvalTime + } + + time.Sleep(100 * time.Millisecond) + } default: panic("can't query this origin") } } +// parseLastEvaluation extracts the last evaluation timestamp from the /api/v1/rules response. +func parseLastEvaluation(rulesBody []byte) time.Time { + var ruleResponse struct { + Status string `json:"status"` + Data struct { + Groups []struct { + Rules []struct { + LastEvaluation string `json:"lastEvaluation"` + } `json:"rules"` + } `json:"groups"` + } `json:"data"` + } + + err := json.Unmarshal(rulesBody, &ruleResponse) + if err != nil { + return time.Time{} + } + + for _, group := range ruleResponse.Data.Groups { + for _, rule := range group.Rules { + if evalTime, err := time.Parse(time.RFC3339Nano, rule.LastEvaluation); err == nil { + return evalTime + } + } + } + + return time.Time{} +} + // queryString returns the expected queryString of a this test. func (p *queryLogTest) queryString() string { switch p.origin { @@ -322,7 +371,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Len(t, ql, qc) } else { - require.Greater(t, len(ql), qc, "no queries logged") + require.GreaterOrEqual(t, len(ql), qc, "no queries logged") } p.validateLastQuery(t, ql) qc = len(ql) @@ -353,7 +402,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Len(t, ql, qc) } else { - require.Greater(t, len(ql), qc, "no queries logged") + require.GreaterOrEqual(t, len(ql), qc, "no queries logged") } p.validateLastQuery(t, ql) @@ -393,6 +442,7 @@ func readQueryLog(t *testing.T, path string) []queryLogLine { file, err := os.Open(path) require.NoError(t, err) defer file.Close() + scanner := bufio.NewScanner(file) for scanner.Scan() { var q queryLogLine @@ -406,6 +456,7 @@ func TestQueryLog(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() cwd, err := os.Getwd() require.NoError(t, err) @@ -424,6 +475,7 @@ func TestQueryLog(t *testing.T) { } t.Run(p.String(), func(t *testing.T) { + t.Parallel() p.run(t) }) } diff --git a/cmd/prometheus/reload_test.go b/cmd/prometheus/reload_test.go new file mode 100644 index 0000000000..18a7ff2ad1 --- /dev/null +++ b/cmd/prometheus/reload_test.go @@ -0,0 +1,229 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bufio" + "encoding/json" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/util/testutil" +) + +const configReloadMetric = "prometheus_config_last_reload_successful" + +func TestAutoReloadConfig_ValidToValid(t *testing.T) { + steps := []struct { + configText string + expectedInterval string + expectedMetric float64 + }{ + { + configText: ` +global: + scrape_interval: 30s +`, + expectedInterval: "30s", + expectedMetric: 1, + }, + { + configText: ` +global: + scrape_interval: 15s +`, + expectedInterval: "15s", + expectedMetric: 1, + }, + { + configText: ` +global: + scrape_interval: 30s +`, + expectedInterval: "30s", + expectedMetric: 1, + }, + } + + runTestSteps(t, steps) +} + +func TestAutoReloadConfig_ValidToInvalidToValid(t *testing.T) { + steps := []struct { + configText string + expectedInterval string + expectedMetric float64 + }{ + { + configText: ` +global: + scrape_interval: 30s +`, + expectedInterval: "30s", + expectedMetric: 1, + }, + { + configText: ` +global: + scrape_interval: 15s +invalid_syntax +`, + expectedInterval: "30s", + expectedMetric: 0, + }, + { + configText: ` +global: + scrape_interval: 30s +`, + expectedInterval: "30s", + expectedMetric: 1, + }, + } + + runTestSteps(t, steps) +} + +func runTestSteps(t *testing.T, steps []struct { + configText string + expectedInterval string + expectedMetric float64 +}, +) { + configDir := t.TempDir() + configFilePath := filepath.Join(configDir, "prometheus.yml") + + t.Logf("Config file path: %s", configFilePath) + + require.NoError(t, os.WriteFile(configFilePath, []byte(steps[0].configText), 0o644), "Failed to write initial config file") + + port := testutil.RandomUnprivilegedPort(t) + runPrometheusWithLogging(t, configFilePath, port) + + baseURL := "http://localhost:" + strconv.Itoa(port) + require.Eventually(t, func() bool { + resp, err := http.Get(baseURL + "/-/ready") + if err != nil { + return false + } + defer resp.Body.Close() + return resp.StatusCode == http.StatusOK + }, 5*time.Second, 100*time.Millisecond, "Prometheus didn't become ready in time") + + for i, step := range steps { + t.Logf("Step %d", i) + require.NoError(t, os.WriteFile(configFilePath, []byte(step.configText), 0o644), "Failed to write config file for step") + + require.Eventually(t, func() bool { + return verifyScrapeInterval(t, baseURL, step.expectedInterval) && + verifyConfigReloadMetric(t, baseURL, step.expectedMetric) + }, 10*time.Second, 500*time.Millisecond, "Prometheus config reload didn't happen in time") + } +} + +func verifyScrapeInterval(t *testing.T, baseURL, expectedInterval string) bool { + resp, err := http.Get(baseURL + "/api/v1/status/config") + require.NoError(t, err) + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + config := struct { + Data struct { + YAML string `json:"yaml"` + } `json:"data"` + }{} + + require.NoError(t, json.Unmarshal(body, &config)) + return strings.Contains(config.Data.YAML, "scrape_interval: "+expectedInterval) +} + +func verifyConfigReloadMetric(t *testing.T, baseURL string, expectedValue float64) bool { + resp, err := http.Get(baseURL + "/metrics") + require.NoError(t, err) + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + lines := string(body) + var actualValue float64 + found := false + + for _, line := range strings.Split(lines, "\n") { + if strings.HasPrefix(line, configReloadMetric) { + parts := strings.Fields(line) + if len(parts) >= 2 { + actualValue, err = strconv.ParseFloat(parts[1], 64) + require.NoError(t, err) + found = true + break + } + } + } + + return found && actualValue == expectedValue +} + +func captureLogsToTLog(t *testing.T, r io.Reader) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + t.Log(scanner.Text()) + } + if err := scanner.Err(); err != nil { + t.Logf("Error reading logs: %v", err) + } +} + +func runPrometheusWithLogging(t *testing.T, configFilePath string, port int) { + stdoutPipe, stdoutWriter := io.Pipe() + stderrPipe, stderrWriter := io.Pipe() + + var wg sync.WaitGroup + wg.Add(2) + + prom := exec.Command(promPath, "-test.main", "--enable-feature=auto-reload-config", "--config.file="+configFilePath, "--config.auto-reload-interval=1s", "--web.listen-address=0.0.0.0:"+strconv.Itoa(port)) + prom.Stdout = stdoutWriter + prom.Stderr = stderrWriter + + go func() { + defer wg.Done() + captureLogsToTLog(t, stdoutPipe) + }() + go func() { + defer wg.Done() + captureLogsToTLog(t, stderrPipe) + }() + + t.Cleanup(func() { + prom.Process.Kill() + prom.Wait() + stdoutWriter.Close() + stderrWriter.Close() + wg.Wait() + }) + + require.NoError(t, prom.Start()) +} diff --git a/cmd/prometheus/scrape_failure_log_test.go b/cmd/prometheus/scrape_failure_log_test.go new file mode 100644 index 0000000000..8d86d719f9 --- /dev/null +++ b/cmd/prometheus/scrape_failure_log_test.go @@ -0,0 +1,193 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.uber.org/atomic" + + "github.com/prometheus/prometheus/util/testutil" +) + +func TestScrapeFailureLogFile(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + // Tracks the number of requests made to the mock server. + var requestCount atomic.Int32 + + // Starts a server that always returns HTTP 500 errors. + mockServerAddress := startGarbageServer(t, &requestCount) + + // Create a temporary directory for Prometheus configuration and logs. + tempDir := t.TempDir() + + // Define file paths for the scrape failure log and Prometheus configuration. + // Like other files, the scrape failure log file should be relative to the + // config file. Therefore, we split the name we put in the file and the full + // path used to check the content of the file. + scrapeFailureLogFileName := "scrape_failure.log" + scrapeFailureLogFile := filepath.Join(tempDir, scrapeFailureLogFileName) + promConfigFile := filepath.Join(tempDir, "prometheus.yml") + + // Step 1: Set up an initial Prometheus configuration that globally + // specifies a scrape failure log file. + promConfig := fmt.Sprintf(` +global: + scrape_interval: 500ms + scrape_failure_log_file: %s + +scrape_configs: + - job_name: 'test_job' + static_configs: + - targets: ['%s'] +`, scrapeFailureLogFileName, mockServerAddress) + + err := os.WriteFile(promConfigFile, []byte(promConfig), 0o644) + require.NoError(t, err, "Failed to write Prometheus configuration file") + + // Start Prometheus with the generated configuration and a random port, enabling the lifecycle API. + port := testutil.RandomUnprivilegedPort(t) + params := []string{ + "-test.main", + "--config.file=" + promConfigFile, + "--storage.tsdb.path=" + filepath.Join(tempDir, "data"), + fmt.Sprintf("--web.listen-address=127.0.0.1:%d", port), + "--web.enable-lifecycle", + } + prometheusProcess := exec.Command(promPath, params...) + prometheusProcess.Stdout = os.Stdout + prometheusProcess.Stderr = os.Stderr + + err = prometheusProcess.Start() + require.NoError(t, err, "Failed to start Prometheus") + defer prometheusProcess.Process.Kill() + + // Wait until the mock server receives at least two requests from Prometheus. + require.Eventually(t, func() bool { + return requestCount.Load() >= 2 + }, 30*time.Second, 500*time.Millisecond, "Expected at least two requests to the mock server") + + // Verify that the scrape failures have been logged to the specified file. + content, err := os.ReadFile(scrapeFailureLogFile) + require.NoError(t, err, "Failed to read scrape failure log") + require.Contains(t, string(content), "server returned HTTP status 500 Internal Server Error", "Expected scrape failure log entry not found") + + // Step 2: Update the Prometheus configuration to remove the scrape failure + // log file setting. + promConfig = fmt.Sprintf(` +global: + scrape_interval: 1s + +scrape_configs: + - job_name: 'test_job' + static_configs: + - targets: ['%s'] +`, mockServerAddress) + + err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644) + require.NoError(t, err, "Failed to update Prometheus configuration file") + + // Reload Prometheus with the updated configuration. + reloadPrometheus(t, port) + + // Count the number of lines in the scrape failure log file before any + // further requests. + preReloadLogLineCount := countLinesInFile(scrapeFailureLogFile) + + // Wait for at least two more requests to the mock server to ensure + // Prometheus continues scraping. + requestsBeforeReload := requestCount.Load() + require.Eventually(t, func() bool { + return requestCount.Load() >= requestsBeforeReload+2 + }, 30*time.Second, 500*time.Millisecond, "Expected two more requests to the mock server after configuration reload") + + // Ensure that no new lines were added to the scrape failure log file after + // the configuration change. + require.Equal(t, preReloadLogLineCount, countLinesInFile(scrapeFailureLogFile), "No new lines should be added to the scrape failure log file after removing the log setting") + + // Step 3: Re-add the scrape failure log file setting, but this time under + // scrape_configs, and reload Prometheus. + promConfig = fmt.Sprintf(` +global: + scrape_interval: 1s + +scrape_configs: + - job_name: 'test_job' + scrape_failure_log_file: %s + static_configs: + - targets: ['%s'] +`, scrapeFailureLogFileName, mockServerAddress) + + err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644) + require.NoError(t, err, "Failed to update Prometheus configuration file") + + // Reload Prometheus with the updated configuration. + reloadPrometheus(t, port) + + // Wait for at least two more requests to the mock server and verify that + // new log entries are created. + postReloadLogLineCount := countLinesInFile(scrapeFailureLogFile) + requestsBeforeReAddingLog := requestCount.Load() + require.Eventually(t, func() bool { + return requestCount.Load() >= requestsBeforeReAddingLog+2 + }, 30*time.Second, 500*time.Millisecond, "Expected two additional requests after re-adding the log setting") + + // Confirm that new lines were added to the scrape failure log file. + require.Greater(t, countLinesInFile(scrapeFailureLogFile), postReloadLogLineCount, "New lines should be added to the scrape failure log file after re-adding the log setting") +} + +// reloadPrometheus sends a reload request to the Prometheus server to apply +// updated configurations. +func reloadPrometheus(t *testing.T, port int) { + resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/-/reload", port), "", nil) + require.NoError(t, err, "Failed to reload Prometheus") + require.Equal(t, http.StatusOK, resp.StatusCode, "Unexpected status code when reloading Prometheus") +} + +// startGarbageServer sets up a mock server that returns a 500 Internal Server Error +// for all requests. It also increments the request count each time it's hit. +func startGarbageServer(t *testing.T, requestCount *atomic.Int32) string { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestCount.Inc() + w.WriteHeader(http.StatusInternalServerError) + })) + t.Cleanup(server.Close) + + parsedURL, err := url.Parse(server.URL) + require.NoError(t, err, "Failed to parse mock server URL") + + return parsedURL.Host +} + +// countLinesInFile counts and returns the number of lines in the specified file. +func countLinesInFile(filePath string) int { + data, err := os.ReadFile(filePath) + if err != nil { + return 0 // Return 0 if the file doesn't exist or can't be read. + } + return bytes.Count(data, []byte{'\n'}) +} diff --git a/cmd/promtool/analyze.go b/cmd/promtool/analyze.go index c1f523de52..26e6f2188c 100644 --- a/cmd/promtool/analyze.go +++ b/cmd/promtool/analyze.go @@ -34,8 +34,8 @@ import ( ) var ( - errNotNativeHistogram = fmt.Errorf("not a native histogram") - errNotEnoughData = fmt.Errorf("not enough data") + errNotNativeHistogram = errors.New("not a native histogram") + errNotEnoughData = errors.New("not enough data") outputHeader = `Bucket stats for each histogram series over time ------------------------------------------------ @@ -169,7 +169,7 @@ func querySamples(ctx context.Context, api v1.API, query string, end time.Time) matrix, ok := values.(model.Matrix) if !ok { - return nil, fmt.Errorf("query of buckets resulted in non-Matrix") + return nil, errors.New("query of buckets resulted in non-Matrix") } return matrix, nil @@ -259,7 +259,7 @@ func getBucketCountsAtTime(matrix model.Matrix, numBuckets, timeIdx int) ([]int, prev := matrix[i].Values[timeIdx] // Assume the results are nicely aligned. if curr.Timestamp != prev.Timestamp { - return counts, fmt.Errorf("matrix result is not time aligned") + return counts, errors.New("matrix result is not time aligned") } counts[i+1] = int(curr.Value - prev.Value) } diff --git a/cmd/promtool/analyze_test.go b/cmd/promtool/analyze_test.go index 83d2ac4a3d..1e16a22d35 100644 --- a/cmd/promtool/analyze_test.go +++ b/cmd/promtool/analyze_test.go @@ -109,6 +109,7 @@ func init() { } func TestGetBucketCountsAtTime(t *testing.T) { + t.Parallel() cases := []struct { matrix model.Matrix length int @@ -137,6 +138,7 @@ func TestGetBucketCountsAtTime(t *testing.T) { for _, c := range cases { t.Run(fmt.Sprintf("exampleMatrix@%d", c.timeIdx), func(t *testing.T) { + t.Parallel() res, err := getBucketCountsAtTime(c.matrix, c.length, c.timeIdx) require.NoError(t, err) require.Equal(t, c.expected, res) @@ -145,6 +147,7 @@ func TestGetBucketCountsAtTime(t *testing.T) { } func TestCalcClassicBucketStatistics(t *testing.T) { + t.Parallel() cases := []struct { matrix model.Matrix expected *statistics @@ -162,6 +165,7 @@ func TestCalcClassicBucketStatistics(t *testing.T) { for i, c := range cases { t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) { + t.Parallel() res, err := calcClassicBucketStatistics(c.matrix) require.NoError(t, err) require.Equal(t, c.expected, res) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 16491f0416..125c9a08e6 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -21,9 +21,10 @@ import ( "math" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/textparse" "github.com/prometheus/prometheus/tsdb" @@ -48,7 +49,7 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) { _, ts, _ := p.Series() if ts == nil { - return 0, 0, fmt.Errorf("expected timestamp for series got none") + return 0, 0, errors.New("expected timestamp for series got none") } if *ts > maxt { @@ -120,7 +121,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn // also need to append samples throughout the whole block range. To allow that, we // pretend that the block is twice as large here, but only really add sample in the // original interval later. - w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, 2*blockDuration) + w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), outputDir, 2*blockDuration) if err != nil { return fmt.Errorf("block writer: %w", err) } diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go index b818194e86..da64b5dca1 100644 --- a/cmd/promtool/backfill_test.go +++ b/cmd/promtool/backfill_test.go @@ -86,6 +86,7 @@ func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, exp } func TestBackfill(t *testing.T) { + t.Parallel() tests := []struct { ToParse string IsOk bool @@ -729,6 +730,7 @@ after_eof 1 2 } for _, test := range tests { t.Run(test.Description, func(t *testing.T) { + t.Parallel() t.Logf("Test:%s", test.Description) outputDir := t.TempDir() diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index e713a177fd..b52fe7cdbb 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -32,13 +32,13 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" "github.com/google/pprof/profile" "github.com/prometheus/client_golang/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil/promlint" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/exporter-toolkit/web" "gopkg.in/yaml.v2" @@ -58,10 +58,16 @@ import ( _ "github.com/prometheus/prometheus/plugins" // Register plugins. "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/promqltest" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/util/documentcli" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + const ( successExitCode = 0 failureExitCode = 1 @@ -211,6 +217,7 @@ func main() { "test-rule-file", "The unit test file.", ).Required().ExistingFiles() + testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool() testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool() defaultDBPath := "data/" @@ -236,14 +243,14 @@ func main() { tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.") dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() - dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() + dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String() dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.") dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() - dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() + dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String() dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() @@ -286,7 +293,7 @@ func main() { promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String() promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String() - featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings() + featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Currently unused.").Default("").Strings() documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden() @@ -316,26 +323,21 @@ func main() { } } - var noDefaultScrapePort bool for _, f := range *featureList { opts := strings.Split(f, ",") for _, o := range opts { switch o { - case "no-default-scrape-port": - noDefaultScrapePort = true case "": continue - case "promql-at-modifier", "promql-negative-offset": - fmt.Printf(" WARNING: Option for --enable-feature is a no-op after promotion to a stable feature: %q\n", o) default: - fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o) + fmt.Printf(" WARNING: --enable-feature is currently a no-op") } } } switch parsedCmd { case sdCheckCmd.FullCommand(): - os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer)) + os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer)) case checkConfigCmd.FullCommand(): os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...)) @@ -391,6 +393,7 @@ func main() { }, *testRulesRun, *testRulesDiff, + *testRulesDebug, *testRulesFiles...), ) @@ -441,7 +444,7 @@ func checkExperimental(f bool) { } } -var errLint = fmt.Errorf("lint error") +var errLint = errors.New("lint error") type lintConfig struct { all bool @@ -575,7 +578,7 @@ func checkFileExists(fn string) error { func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) { fmt.Println("Checking", filename) - cfg, err := config.LoadFile(filename, agentMode, false, log.NewNopLogger()) + cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger()) if err != nil { return nil, err } @@ -895,30 +898,30 @@ func compare(a, b compareRuleType) int { func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType { var duplicates []compareRuleType - var rules compareRuleTypes + var cRules compareRuleTypes for _, group := range groups { for _, rule := range group.Rules { - rules = append(rules, compareRuleType{ + cRules = append(cRules, compareRuleType{ metric: ruleMetric(rule), - label: labels.FromMap(rule.Labels), + label: rules.FromMaps(group.Labels, rule.Labels), }) } } - if len(rules) < 2 { + if len(cRules) < 2 { return duplicates } - sort.Sort(rules) + sort.Sort(cRules) - last := rules[0] - for i := 1; i < len(rules); i++ { - if compare(last, rules[i]) == 0 { + last := cRules[0] + for i := 1; i < len(cRules); i++ { + if compare(last, cRules[i]) == 0 { // Don't add a duplicated rule multiple times. if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 { - duplicates = append(duplicates, rules[i]) + duplicates = append(duplicates, cRules[i]) } } - last = rules[i] + last = cRules[i] } return duplicates @@ -1182,7 +1185,7 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu return fmt.Errorf("new api client error: %w", err) } - ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api) + ruleImporter := newRuleImporter(promslog.New(&promslog.Config{}), cfg, api) errs := ruleImporter.loadGroups(ctx, files) for _, err := range errs { if err != nil { @@ -1216,7 +1219,7 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c lb := labels.NewBuilder(labels.EmptyLabels()) for _, tg := range targetGroups { var failures []error - targets, failures = scrape.TargetsFromGroup(tg, scfg, false, targets, lb) + targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb) if len(failures) > 0 { first := failures[0] return first diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 78500fe937..9a07269188 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -31,12 +31,19 @@ import ( "testing" "time" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/rulefmt" + "github.com/prometheus/prometheus/promql/promqltest" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + var promtoolPath = os.Args[0] func TestMain(m *testing.M) { @@ -53,6 +60,7 @@ func TestMain(m *testing.M) { } func TestQueryRange(t *testing.T) { + t.Parallel() s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`) defer s.Close() @@ -76,6 +84,7 @@ func TestQueryRange(t *testing.T) { } func TestQueryInstant(t *testing.T) { + t.Parallel() s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "vector", "result": []}}`) defer s.Close() @@ -107,6 +116,7 @@ func mockServer(code int, body string) (*httptest.Server, func() *http.Request) } func TestCheckSDFile(t *testing.T) { + t.Parallel() cases := []struct { name string file string @@ -137,9 +147,10 @@ func TestCheckSDFile(t *testing.T) { } for _, test := range cases { t.Run(test.name, func(t *testing.T) { + t.Parallel() _, err := checkSDFile(test.file) if test.err != "" { - require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -148,6 +159,7 @@ func TestCheckSDFile(t *testing.T) { } func TestCheckDuplicates(t *testing.T) { + t.Parallel() cases := []struct { name string ruleFile string @@ -172,6 +184,7 @@ func TestCheckDuplicates(t *testing.T) { for _, test := range cases { c := test t.Run(c.name, func(t *testing.T) { + t.Parallel() rgs, err := rulefmt.ParseFile(c.ruleFile) require.Empty(t, err) dups := checkDuplicates(rgs.Groups) @@ -191,6 +204,7 @@ func BenchmarkCheckDuplicates(b *testing.B) { } func TestCheckTargetConfig(t *testing.T) { + t.Parallel() cases := []struct { name string file string @@ -219,9 +233,10 @@ func TestCheckTargetConfig(t *testing.T) { } for _, test := range cases { t.Run(test.name, func(t *testing.T) { + t.Parallel() _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { - require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -230,6 +245,7 @@ func TestCheckTargetConfig(t *testing.T) { } func TestCheckConfigSyntax(t *testing.T) { + t.Parallel() cases := []struct { name string file string @@ -302,13 +318,14 @@ func TestCheckConfigSyntax(t *testing.T) { } for _, test := range cases { t.Run(test.name, func(t *testing.T) { + t.Parallel() _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly) expectedErrMsg := test.err if strings.Contains(runtime.GOOS, "windows") { expectedErrMsg = test.errWindows } if expectedErrMsg != "" { - require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", test.err, err.Error()) + require.EqualErrorf(t, err, expectedErrMsg, "Expected error %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -317,6 +334,7 @@ func TestCheckConfigSyntax(t *testing.T) { } func TestAuthorizationConfig(t *testing.T) { + t.Parallel() cases := []struct { name string file string @@ -336,9 +354,10 @@ func TestAuthorizationConfig(t *testing.T) { for _, test := range cases { t.Run(test.name, func(t *testing.T) { + t.Parallel() _, err := checkConfig(false, "testdata/"+test.file, false) if test.err != "" { - require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error()) + require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error()) return } require.NoError(t, err) @@ -350,6 +369,7 @@ func TestCheckMetricsExtended(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("Skipping on windows") } + t.Parallel() f, err := os.Open("testdata/metrics-test.prom") require.NoError(t, err) @@ -386,6 +406,7 @@ func TestExitCodes(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode.") } + t.Parallel() for _, c := range []struct { file string @@ -410,8 +431,10 @@ func TestExitCodes(t *testing.T) { }, } { t.Run(c.file, func(t *testing.T) { + t.Parallel() for _, lintFatal := range []bool{true, false} { t.Run(strconv.FormatBool(lintFatal), func(t *testing.T) { + t.Parallel() args := []string{"-test.main", "check", "config", "testdata/" + c.file} if lintFatal { args = append(args, "--lint-fatal") @@ -442,6 +465,7 @@ func TestDocumentation(t *testing.T) { if runtime.GOOS == "windows" { t.SkipNow() } + t.Parallel() ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() @@ -535,17 +559,65 @@ func TestCheckRules(t *testing.T) { func TestCheckRulesWithRuleFiles(t *testing.T) { t.Run("rules-good", func(t *testing.T) { + t.Parallel() exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml") require.Equal(t, successExitCode, exitCode, "") }) t.Run("rules-bad", func(t *testing.T) { + t.Parallel() exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml") require.Equal(t, failureExitCode, exitCode, "") }) t.Run("rules-lint-fatal", func(t *testing.T) { + t.Parallel() exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml") require.Equal(t, lintErrExitCode, exitCode, "") }) } + +func TestTSDBDumpCommand(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + t.Parallel() + + storage := promqltest.LoadedStorage(t, ` + load 1m + metric{foo="bar"} 1 2 3 + `) + t.Cleanup(func() { storage.Close() }) + + for _, c := range []struct { + name string + subCmd string + sandboxDirRoot string + }{ + { + name: "dump", + subCmd: "dump", + }, + { + name: "dump with sandbox dir root", + subCmd: "dump", + sandboxDirRoot: t.TempDir(), + }, + { + name: "dump-openmetrics", + subCmd: "dump-openmetrics", + }, + { + name: "dump-openmetrics with sandbox dir root", + subCmd: "dump-openmetrics", + sandboxDirRoot: t.TempDir(), + }, + } { + t.Run(c.name, func(t *testing.T) { + t.Parallel() + args := []string{"-test.main", "tsdb", c.subCmd, storage.Dir()} + cmd := exec.Command(promtoolPath, args...) + require.NoError(t, cmd.Run()) + }) + } +} diff --git a/cmd/promtool/rules.go b/cmd/promtool/rules.go index 5a18644842..adb214b812 100644 --- a/cmd/promtool/rules.go +++ b/cmd/promtool/rules.go @@ -16,12 +16,12 @@ package main import ( "context" "fmt" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" @@ -38,7 +38,7 @@ type queryRangeAPI interface { } type ruleImporter struct { - logger log.Logger + logger *slog.Logger config ruleImporterConfig apiClient queryRangeAPI @@ -57,8 +57,8 @@ type ruleImporterConfig struct { // newRuleImporter creates a new rule importer that can be used to parse and evaluate recording rule files and create new series // written to disk in blocks. -func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter { - level.Info(logger).Log("backfiller", "new rule importer", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822)) +func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter { + logger.Info("new rule importer", "component", "backfiller", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822)) return &ruleImporter{ logger: logger, config: config, @@ -80,10 +80,10 @@ func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string) // importAll evaluates all the recording rules and creates new time series and writes them to disk in blocks. func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) { for name, group := range importer.groups { - level.Info(importer.logger).Log("backfiller", "processing group", "name", name) + importer.logger.Info("processing group", "component", "backfiller", "name", name) for i, r := range group.Rules() { - level.Info(importer.logger).Log("backfiller", "processing rule", "id", i, "name", r.Name()) + importer.logger.Info("processing rule", "component", "backfiller", "id", i, "name", r.Name()) if err := importer.importRule(ctx, r.Query().String(), r.Name(), r.Labels(), importer.config.start, importer.config.end, int64(importer.config.maxBlockDuration/time.Millisecond), group); err != nil { errs = append(errs, err) } @@ -124,7 +124,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName return fmt.Errorf("query range: %w", err) } if warnings != nil { - level.Warn(importer.logger).Log("msg", "Range query returned warnings.", "warnings", warnings) + importer.logger.Warn("Range query returned warnings.", "warnings", warnings) } // To prevent races with compaction, a block writer only allows appending samples @@ -133,7 +133,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName // also need to append samples throughout the whole block range. To allow that, we // pretend that the block is twice as large here, but only really add sample in the // original interval later. - w, err := tsdb.NewBlockWriter(log.NewNopLogger(), importer.config.outputDir, 2*blockDuration) + w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), importer.config.outputDir, 2*blockDuration) if err != nil { return fmt.Errorf("new block writer: %w", err) } diff --git a/cmd/promtool/rules_test.go b/cmd/promtool/rules_test.go index d55fb0c896..e22b8a556b 100644 --- a/cmd/promtool/rules_test.go +++ b/cmd/promtool/rules_test.go @@ -21,9 +21,9 @@ import ( "testing" "time" - "github.com/go-kit/log" v1 "github.com/prometheus/client_golang/api/prometheus/v1" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" @@ -43,6 +43,7 @@ const defaultBlockDuration = time.Duration(tsdb.DefaultBlockDuration) * time.Mil // TestBackfillRuleIntegration is an integration test that runs all the rule importer code to confirm the parts work together. func TestBackfillRuleIntegration(t *testing.T) { + t.Parallel() const ( testMaxSampleCount = 50 testValue = 123 @@ -72,6 +73,7 @@ func TestBackfillRuleIntegration(t *testing.T) { } for _, tt := range testCases { t.Run(tt.name, func(t *testing.T) { + t.Parallel() tmpDir := t.TempDir() ctx := context.Background() @@ -161,7 +163,7 @@ func TestBackfillRuleIntegration(t *testing.T) { } func newTestRuleImporter(_ context.Context, start time.Time, tmpDir string, testSamples model.Matrix, maxBlockDuration time.Duration) (*ruleImporter, error) { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() cfg := ruleImporterConfig{ outputDir: tmpDir, start: start.Add(-10 * time.Hour), @@ -210,6 +212,7 @@ func createMultiRuleTestFiles(path string) error { // TestBackfillLabels confirms that the labels in the rule file override the labels from the metrics // received from Prometheus Query API, including the __name__ label. func TestBackfillLabels(t *testing.T) { + t.Parallel() tmpDir := t.TempDir() ctx := context.Background() @@ -251,6 +254,7 @@ func TestBackfillLabels(t *testing.T) { require.NoError(t, err) t.Run("correct-labels", func(t *testing.T) { + t.Parallel() selectedSeries := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*")) for selectedSeries.Next() { series := selectedSeries.At() diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go index e65262d439..5e005bca8b 100644 --- a/cmd/promtool/sd.go +++ b/cmd/promtool/sd.go @@ -20,9 +20,9 @@ import ( "os" "time" - "github.com/go-kit/log" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" @@ -38,10 +38,10 @@ type sdCheckResult struct { } // CheckSD performs service discovery for the given job name and reports the results. -func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) +func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, registerer prometheus.Registerer) int { + logger := promslog.New(&promslog.Config{}) - cfg, err := config.LoadFile(sdConfigFiles, false, false, logger) + cfg, err := config.LoadFile(sdConfigFiles, false, logger) if err != nil { fmt.Fprintln(os.Stderr, "Cannot load config", err) return failureExitCode @@ -114,7 +114,7 @@ outerLoop: } results := []sdCheckResult{} for _, tgs := range sdCheckResults { - results = append(results, getSDCheckResult(tgs, scrapeConfig, noDefaultScrapePort)...) + results = append(results, getSDCheckResult(tgs, scrapeConfig)...) } res, err := json.MarshalIndent(results, "", " ") @@ -127,7 +127,7 @@ outerLoop: return successExitCode } -func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig, noDefaultScrapePort bool) []sdCheckResult { +func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult { sdCheckResults := []sdCheckResult{} lb := labels.NewBuilder(labels.EmptyLabels()) for _, targetGroup := range targetGroups { @@ -144,7 +144,7 @@ func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.Sc } } - res, orig, err := scrape.PopulateLabels(lb, scrapeConfig, noDefaultScrapePort) + res, orig, err := scrape.PopulateLabels(lb, scrapeConfig) result := sdCheckResult{ DiscoveredLabels: orig, Labels: res, diff --git a/cmd/promtool/sd_test.go b/cmd/promtool/sd_test.go index cb65ee72aa..90318ba983 100644 --- a/cmd/promtool/sd_test.go +++ b/cmd/promtool/sd_test.go @@ -29,6 +29,7 @@ import ( ) func TestSDCheckResult(t *testing.T) { + t.Parallel() targetGroups := []*targetgroup.Group{{ Targets: []model.LabelSet{ map[model.LabelName]model.LabelValue{"__address__": "localhost:8080", "foo": "bar"}, @@ -70,5 +71,5 @@ func TestSDCheckResult(t *testing.T) { }, } - testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig, true)) + testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig)) } diff --git a/cmd/promtool/testdata/config_with_service_discovery_files.yml b/cmd/promtool/testdata/config_with_service_discovery_files.yml index 13b6d7faff..6a550a8403 100644 --- a/cmd/promtool/testdata/config_with_service_discovery_files.yml +++ b/cmd/promtool/testdata/config_with_service_discovery_files.yml @@ -6,7 +6,7 @@ scrape_configs: alerting: alertmanagers: - scheme: http - api_version: v1 + api_version: v2 file_sd_configs: - files: - nonexistent_file.yml diff --git a/cmd/promtool/testdata/unittest.yml b/cmd/promtool/testdata/unittest.yml index ff511729ba..e2a8230902 100644 --- a/cmd/promtool/testdata/unittest.yml +++ b/cmd/promtool/testdata/unittest.yml @@ -69,13 +69,13 @@ tests: eval_time: 2m exp_samples: - labels: "test_histogram_repeat" - histogram: "{{count:2 sum:3 buckets:[2]}}" + histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}" - expr: test_histogram_increase eval_time: 2m exp_samples: - labels: "test_histogram_increase" - histogram: "{{count:4 sum:5.6 buckets:[4]}}" + histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}" # Ensure a value is stale as soon as it is marked as such. - expr: test_stale @@ -89,11 +89,11 @@ tests: # Ensure lookback delta is respected, when a value is missing. - expr: timestamp(test_missing) - eval_time: 5m + eval_time: 4m59s exp_samples: - value: 0 - expr: timestamp(test_missing) - eval_time: 5m1s + eval_time: 5m exp_samples: [] # Minimal test case to check edge case of a single sample. @@ -113,7 +113,7 @@ tests: - expr: count_over_time(fixed_data[1h]) eval_time: 1h exp_samples: - - value: 61 + - value: 60 - expr: timestamp(fixed_data) eval_time: 1h exp_samples: @@ -183,7 +183,7 @@ tests: - expr: job:test:count_over_time1m eval_time: 1m exp_samples: - - value: 61 + - value: 60 labels: 'job:test:count_over_time1m{job="test"}' - expr: timestamp(job:test:count_over_time1m) eval_time: 1m10s @@ -194,7 +194,7 @@ tests: - expr: job:test:count_over_time1m eval_time: 2m exp_samples: - - value: 61 + - value: 60 labels: 'job:test:count_over_time1m{job="test"}' - expr: timestamp(job:test:count_over_time1m) eval_time: 2m59s999ms diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 971ea8ab00..cefacfec28 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "runtime" @@ -32,9 +33,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" "go.uber.org/atomic" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" @@ -60,7 +62,7 @@ type writeBenchmark struct { memprof *os.File blockprof *os.File mtxprof *os.File - logger log.Logger + logger *slog.Logger } func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error { @@ -68,7 +70,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err outPath: outPath, samplesFile: samplesFile, numMetrics: numMetrics, - logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), + logger: promslog.New(&promslog.Config{}), } if b.outPath == "" { dir, err := os.MkdirTemp("", "tsdb_bench") @@ -87,9 +89,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err dir := filepath.Join(b.outPath, "storage") - l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller) - - st, err := tsdb.Open(dir, l, nil, &tsdb.Options{ + st, err := tsdb.Open(dir, b.logger, nil, &tsdb.Options{ RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond), MinBlockDuration: int64(2 * time.Hour / time.Millisecond), }, tsdb.NewDBStats()) @@ -367,25 +367,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) { fmt.Fprintf(tw, "%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n", meta.ULID, - getFormatedTime(meta.MinTime, humanReadable), - getFormatedTime(meta.MaxTime, humanReadable), + getFormattedTime(meta.MinTime, humanReadable), + getFormattedTime(meta.MaxTime, humanReadable), time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond, meta.Stats.NumSamples, meta.Stats.NumChunks, meta.Stats.NumSeries, - getFormatedBytes(b.Size(), humanReadable), + getFormattedBytes(b.Size(), humanReadable), ) } } -func getFormatedTime(timestamp int64, humanReadable bool) string { +func getFormattedTime(timestamp int64, humanReadable bool) string { if humanReadable { return time.Unix(timestamp/1000, 0).UTC().String() } return strconv.FormatInt(timestamp, 10) } -func getFormatedBytes(bytes int64, humanReadable bool) string { +func getFormattedBytes(bytes int64, humanReadable bool) string { if humanReadable { return units.Base2Bytes(bytes).String() } @@ -405,7 +405,7 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) } } - b, err := db.Block(blockID) + b, err := db.Block(blockID, tsdb.DefaultPostingsDecoderFactory) if err != nil { return nil, nil, err } @@ -589,7 +589,10 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten if err != nil { return err } - postings = index.Intersect(postings, index.NewListPostings(refs)) + // Only intersect postings if matchers are specified. + if len(matchers) > 0 { + postings = index.Intersect(postings, index.NewListPostings(refs)) + } count := 0 for postings.Next() { count++ @@ -662,7 +665,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. histogramChunkSize = append(histogramChunkSize, len(chk.Bytes())) fhchk, ok := chk.(*chunkenc.FloatHistogramChunk) if !ok { - return fmt.Errorf("chunk is not FloatHistogramChunk") + return errors.New("chunk is not FloatHistogramChunk") } it := fhchk.Iterator(nil) bucketCount := 0 @@ -677,7 +680,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. histogramChunkSize = append(histogramChunkSize, len(chk.Bytes())) hchk, ok := chk.(*chunkenc.HistogramChunk) if !ok { - return fmt.Errorf("chunk is not HistogramChunk") + return errors.New("chunk is not HistogramChunk") } it := hchk.Iterator(nil) bucketCount := 0 @@ -733,7 +736,7 @@ func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt i for _, mset := range matcherSets { sets = append(sets, q.Select(ctx, true, nil, mset...)) } - ss = storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) + ss = storage.NewMergeSeriesSet(sets, 0, storage.ChainedSeriesMerge) } else { ss = q.Select(ctx, false, nil, matcherSets[0]...) } diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index ffc5467b47..e745a3fe7a 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -32,6 +32,7 @@ import ( ) func TestGenerateBucket(t *testing.T) { + t.Parallel() tcs := []struct { min, max int start, end, step int @@ -55,7 +56,7 @@ func TestGenerateBucket(t *testing.T) { } // getDumpedSamples dumps samples and returns them. -func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string { +func getDumpedSamples(t *testing.T, databasePath, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string { t.Helper() oldStdout := os.Stdout @@ -64,8 +65,8 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin err := dumpSamples( context.Background(), - path, - t.TempDir(), + databasePath, + sandboxDirRoot, mint, maxt, match, @@ -96,13 +97,15 @@ func TestTSDBDump(t *testing.T) { heavy_metric{foo="bar"} 5 4 3 2 1 heavy_metric{foo="foo"} 5 4 3 2 1 `) + t.Cleanup(func() { storage.Close() }) tests := []struct { - name string - mint int64 - maxt int64 - match []string - expectedDump string + name string + mint int64 + maxt int64 + sandboxDirRoot string + match []string + expectedDump string }{ { name: "default match", @@ -111,6 +114,14 @@ func TestTSDBDump(t *testing.T) { match: []string{"{__name__=~'(?s:.*)'}"}, expectedDump: "testdata/dump-test-1.prom", }, + { + name: "default match with sandbox dir root set", + mint: math.MinInt64, + maxt: math.MaxInt64, + sandboxDirRoot: t.TempDir(), + match: []string{"{__name__=~'(?s:.*)'}"}, + expectedDump: "testdata/dump-test-1.prom", + }, { name: "same matcher twice", mint: math.MinInt64, @@ -149,7 +160,7 @@ func TestTSDBDump(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.mint, tt.maxt, tt.match, formatSeriesSet) + dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, tt.mint, tt.maxt, tt.match, formatSeriesSet) expectedMetrics, err := os.ReadFile(tt.expectedDump) require.NoError(t, err) expectedMetrics = normalizeNewLine(expectedMetrics) @@ -171,12 +182,29 @@ func TestTSDBDumpOpenMetrics(t *testing.T) { my_counter{foo="bar", baz="abc"} 1 2 3 4 5 my_gauge{bar="foo", abc="baz"} 9 8 0 4 7 `) + t.Cleanup(func() { storage.Close() }) - expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom") - require.NoError(t, err) - expectedMetrics = normalizeNewLine(expectedMetrics) - dumpedMetrics := getDumpedSamples(t, storage.Dir(), math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics) - require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics)) + tests := []struct { + name string + sandboxDirRoot string + }{ + { + name: "default match", + }, + { + name: "default match with sandbox dir root set", + sandboxDirRoot: t.TempDir(), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom") + require.NoError(t, err) + expectedMetrics = normalizeNewLine(expectedMetrics) + dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics) + require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics)) + }) + } } func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) { @@ -195,7 +223,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) { }) // Dump the blocks into OM format - dumpedMetrics := getDumpedSamples(t, dbDir, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics) + dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics) // Should get back the initial metrics. require.Equal(t, string(initialMetrics), dumpedMetrics) diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 7030635d1c..78dacdc569 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -26,13 +26,13 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/google/go-cmp/cmp" "github.com/grafana/regexp" "github.com/nsf/jsondiff" "gopkg.in/yaml.v2" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -46,11 +46,11 @@ import ( // RulesUnitTest does unit testing of rules based on the unit testing files provided. // More info about the file format can be found in the docs. -func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { - return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...) +func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { + return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...) } -func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int { +func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int { failed := false junit := &junitxml.JUnitXML{} @@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, } for _, f := range files { - if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil { + if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil { fmt.Fprintln(os.Stderr, " FAILED:") for _, e := range errs { fmt.Fprintln(os.Stderr, e.Error()) @@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, return successExitCode } -func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error { +func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error { b, err := os.ReadFile(filename) if err != nil { ts.Abort(err) @@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg if t.Interval == 0 { t.Interval = unitTestInp.EvaluationInterval } - ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...) + ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...) if ers != nil { for _, e := range ers { tc.Fail(e.Error()) @@ -198,7 +198,14 @@ type testGroup struct { } // test performs the unit tests. -func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) { +func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) { + if debug { + testStart := time.Now() + fmt.Printf("DEBUG: Starting test %s\n", testname) + defer func() { + fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart)) + }() + } // Setup testing suite. suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts) if err != nil { @@ -218,7 +225,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i Appendable: suite.Storage(), Context: context.Background(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } m := rules.NewManager(opts) groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...) @@ -482,6 +489,32 @@ Outer: } } + if debug { + ts := tg.maxEvalTime() + // Potentially a test can be specified at a time with fractional seconds, + // which PromQL cannot represent, so round up to the next whole second. + ts = (ts + time.Second).Truncate(time.Second) + expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts) + q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts)) + if err != nil { + fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err) + return errs + } + res := q.Exec(suite.Context()) + if res.Err != nil { + fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err) + return errs + } + switch v := res.Value.(type) { + case promql.Matrix: + fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts) + fmt.Println(v.String()) + default: + fmt.Printf("DEBUG: Got unexpected type %T\n", v) + return errs + } + } + if len(errs) > 0 { return errs } diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go index 9bbac28e9f..ec34ad3185 100644 --- a/cmd/promtool/unittest_test.go +++ b/cmd/promtool/unittest_test.go @@ -26,6 +26,7 @@ import ( ) func TestRulesUnitTest(t *testing.T) { + t.Parallel() type args struct { files []string } @@ -141,14 +142,16 @@ func TestRulesUnitTest(t *testing.T) { reuseCount[tt.want] += len(tt.args.files) } t.Run(tt.name, func(t *testing.T) { - if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want { + t.Parallel() + if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want { t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want) } }) } t.Run("Junit xml output ", func(t *testing.T) { + t.Parallel() var buf bytes.Buffer - if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 { + if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 { t.Errorf("RulesUnitTestResults() = %v, want 1", got) } var test junitxml.JUnitXML @@ -185,6 +188,7 @@ func TestRulesUnitTest(t *testing.T) { } func TestRulesUnitTestRun(t *testing.T) { + t.Parallel() type args struct { run []string files []string @@ -230,7 +234,8 @@ func TestRulesUnitTestRun(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...) + t.Parallel() + got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...) require.Equal(t, tt.want, got) }) } diff --git a/config/config.go b/config/config.go index c9e8efbf3e..86d8563536 100644 --- a/config/config.go +++ b/config/config.go @@ -16,6 +16,8 @@ package config import ( "errors" "fmt" + "log/slog" + "mime" "net/url" "os" "path/filepath" @@ -25,12 +27,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/sigv4" + "github.com/prometheus/sigv4" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" @@ -73,7 +73,7 @@ const ( ) // Load parses the YAML input s into a Config. -func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) { +func Load(s string, logger *slog.Logger) (*Config, error) { cfg := &Config{} // If the entire config body is empty the UnmarshalYAML method is // never called. We thus have to set the DefaultConfig at the entry @@ -85,10 +85,6 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro return nil, err } - if !expandExternalLabels { - return cfg, nil - } - b := labels.NewScratchBuilder(0) cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) { newV := os.Expand(v.Value, func(s string) string { @@ -98,26 +94,40 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro if v := os.Getenv(s); v != "" { return v } - level.Warn(logger).Log("msg", "Empty environment variable", "name", s) + logger.Warn("Empty environment variable", "name", s) return "" }) if newV != v.Value { - level.Debug(logger).Log("msg", "External label replaced", "label", v.Name, "input", v.Value, "output", newV) + logger.Debug("External label replaced", "label", v.Name, "input", v.Value, "output", newV) } // Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024 b.Add(v.Name, newV) }) - cfg.GlobalConfig.ExternalLabels = b.Labels() + if !b.Labels().IsEmpty() { + cfg.GlobalConfig.ExternalLabels = b.Labels() + } + + switch cfg.OTLPConfig.TranslationStrategy { + case UnderscoreEscapingWithSuffixes: + case "": + case NoUTF8EscapingWithSuffixes: + if cfg.GlobalConfig.MetricNameValidationScheme == LegacyValidationConfig { + return nil, errors.New("OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled") + } + default: + return nil, fmt.Errorf("unsupported OTLP translation strategy %q", cfg.OTLPConfig.TranslationStrategy) + } + return cfg, nil } // LoadFile parses the given YAML file into a Config. -func LoadFile(filename string, agentMode, expandExternalLabels bool, logger log.Logger) (*Config, error) { +func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) { content, err := os.ReadFile(filename) if err != nil { return nil, err } - cfg, err := Load(string(content), expandExternalLabels, logger) + cfg, err := Load(string(content), logger) if err != nil { return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err) } @@ -166,13 +176,13 @@ var ( // DefaultScrapeConfig is the default scrape configuration. DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals. - ScrapeClassicHistograms: false, - MetricsPath: "/metrics", - Scheme: "http", - HonorLabels: false, - HonorTimestamps: true, - HTTPClientConfig: config.DefaultHTTPClientConfig, - EnableCompression: true, + AlwaysScrapeClassicHistograms: false, + MetricsPath: "/metrics", + Scheme: "http", + HonorLabels: false, + HonorTimestamps: true, + HTTPClientConfig: config.DefaultHTTPClientConfig, + EnableCompression: true, } // DefaultAlertmanagerConfig is the default alertmanager configuration. @@ -183,13 +193,18 @@ var ( HTTPClientConfig: config.DefaultHTTPClientConfig, } + DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{ + FollowRedirects: true, + EnableHTTP2: false, + } + // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), ProtobufMessage: RemoteWriteProtoMsgV1, QueueConfig: DefaultQueueConfig, MetadataConfig: DefaultMetadataConfig, - HTTPClientConfig: config.DefaultHTTPClientConfig, + HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig, } // DefaultQueueConfig is the default remote queue configuration. @@ -236,7 +251,9 @@ var ( } // DefaultOTLPConfig is the default OTLP configuration. - DefaultOTLPConfig = OTLPConfig{} + DefaultOTLPConfig = OTLPConfig{ + TranslationStrategy: UnderscoreEscapingWithSuffixes, + } ) // Config is the top-level configuration for Prometheus's config files. @@ -429,6 +446,8 @@ type GlobalConfig struct { RuleQueryOffset model.Duration `yaml:"rule_query_offset,omitempty"` // File to which PromQL queries are logged. QueryLogFile string `yaml:"query_log_file,omitempty"` + // File to which scrape failures are logged. + ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` // An uncompressed response body larger than this many bytes will cause the @@ -474,9 +493,22 @@ func (s ScrapeProtocol) Validate() error { return nil } +// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol. +func (s ScrapeProtocol) HeaderMediaType() string { + if _, ok := ScrapeProtocolsHeaders[s]; !ok { + return "" + } + mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s]) + if err != nil { + return "" + } + return mediaType +} + var ( PrometheusProto ScrapeProtocol = "PrometheusProto" PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4" + PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0" OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1" OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0" UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8 @@ -484,6 +516,7 @@ var ( ScrapeProtocolsHeaders = map[ScrapeProtocol]string{ PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited", PrometheusText0_0_4: "text/plain;version=0.0.4", + PrometheusText1_0_0: "text/plain;version=1.0.0;escaping=allow-utf-8", OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1", OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0", } @@ -493,6 +526,7 @@ var ( DefaultScrapeProtocols = []ScrapeProtocol{ OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } @@ -504,6 +538,7 @@ var ( PrometheusProto, OpenMetricsText1_0_0, OpenMetricsText0_0_1, + PrometheusText1_0_0, PrometheusText0_0_4, } ) @@ -529,6 +564,7 @@ func validateAcceptScrapeProtocols(sps []ScrapeProtocol) error { // SetDirectory joins any relative file paths with dir. func (c *GlobalConfig) SetDirectory(dir string) { c.QueryLogFile = config.JoinDir(dir, c.QueryLogFile) + c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -591,6 +627,7 @@ func (c *GlobalConfig) isZero() bool { c.EvaluationInterval == 0 && c.RuleQueryOffset == 0 && c.QueryLogFile == "" && + c.ScrapeFailureLogFile == "" && c.ScrapeProtocols == nil } @@ -628,10 +665,19 @@ type ScrapeConfig struct { // The protocols to negotiate during a scrape. It tells clients what // protocol are accepted by Prometheus and with what preference (most wanted is first). // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, - // OpenMetricsText1.0.0, PrometheusText0.0.4. + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` - // Whether to scrape a classic histogram that is also exposed as a native histogram. - ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"` + // The fallback protocol to use if the Content-Type provided by the target + // is not provided, blank, or not one of the expected values. + // Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, + // OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4. + ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"` + // Whether to scrape a classic histogram, even if it is also exposed as a native histogram. + AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"` + // Whether to convert all scraped classic histograms into a native histogram with custom buckets. + ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"` + // File to which scrape failures are logged. + ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"` // The HTTP resource path on which to fetch metrics from targets. MetricsPath string `yaml:"metrics_path,omitempty"` // The URL scheme with which to fetch metrics from targets. @@ -684,6 +730,7 @@ type ScrapeConfig struct { func (c *ScrapeConfig) SetDirectory(dir string) { c.ServiceDiscoveryConfigs.SetDirectory(dir) c.HTTPClientConfig.SetDirectory(dir) + c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile) } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -765,6 +812,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { if c.KeepDroppedTargets == 0 { c.KeepDroppedTargets = globalConfig.KeepDroppedTargets } + if c.ScrapeFailureLogFile == "" { + c.ScrapeFailureLogFile = globalConfig.ScrapeFailureLogFile + } if c.ScrapeProtocols == nil { c.ScrapeProtocols = globalConfig.ScrapeProtocols @@ -773,11 +823,17 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error { return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName) } + if c.ScrapeFallbackProtocol != "" { + if err := c.ScrapeFallbackProtocol.Validate(); err != nil { + return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err) + } + } + switch globalConfig.MetricNameValidationScheme { - case "", LegacyValidationConfig: - case UTF8ValidationConfig: + case LegacyValidationConfig: + case "", UTF8ValidationConfig: if model.NameValidationScheme != model.UTF8Validation { - return fmt.Errorf("utf8 name validation requested but feature not enabled via --enable-feature=utf8-names") + panic("utf8 name validation requested but model.NameValidationScheme is not set to UTF8") } default: return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme) @@ -948,6 +1004,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig { // AlertmanagerAPIVersion represents a version of the // github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. +// 'v1' is no longer supported. type AlertmanagerAPIVersion string // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -977,7 +1034,7 @@ const ( ) var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{ - AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2, + AlertmanagerAPIVersionV2, } // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. @@ -1029,7 +1086,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil if httpClientConfigAuthEnabled && c.SigV4Config != nil { - return fmt.Errorf("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured") + return errors.New("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured") } // Check for users putting URLs in target groups. @@ -1138,6 +1195,7 @@ type RemoteWriteConfig struct { Name string `yaml:"name,omitempty"` SendExemplars bool `yaml:"send_exemplars,omitempty"` SendNativeHistograms bool `yaml:"send_native_histograms,omitempty"` + RoundRobinDNS bool `yaml:"round_robin_dns,omitempty"` // ProtobufMessage specifies the protobuf message to use against the remote // receiver as specified in https://prometheus.io/docs/specs/remote_write_spec_2_0/ ProtobufMessage RemoteWriteProtoMsg `yaml:"protobuf_message,omitempty"` @@ -1359,9 +1417,21 @@ func getGoGCEnv() int { return DefaultRuntimeConfig.GoGC } +type translationStrategyOption string + +var ( + // NoUTF8EscapingWithSuffixes will keep UTF-8 characters as they are, units and type suffixes will still be added. + NoUTF8EscapingWithSuffixes translationStrategyOption = "NoUTF8EscapingWithSuffixes" + // UnderscoreEscapingWithSuffixes is the default option for translating OTLP to Prometheus. + // This option will translate all UTF-8 characters to underscores, while adding units and type suffixes. + UnderscoreEscapingWithSuffixes translationStrategyOption = "UnderscoreEscapingWithSuffixes" +) + // OTLPConfig is the configuration for writing to the OTLP endpoint. type OTLPConfig struct { - PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"` + PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"` + TranslationStrategy translationStrategyOption `yaml:"translation_strategy,omitempty"` + KeepIdentifyingResourceAttributes bool `yaml:"keep_identifying_resource_attributes,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -1377,7 +1447,7 @@ func (c *OTLPConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { for i, attr := range c.PromoteResourceAttributes { attr = strings.TrimSpace(attr) if attr == "" { - err = errors.Join(err, fmt.Errorf("empty promoted OTel resource attribute")) + err = errors.Join(err, errors.New("empty promoted OTel resource attribute")) continue } if _, exists := seen[attr]; exists { diff --git a/config/config_test.go b/config/config_test.go index 2219061823..4b5b11a9fd 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -24,10 +24,10 @@ import ( "time" "github.com/alecthomas/units" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -62,6 +62,11 @@ import ( "github.com/prometheus/prometheus/util/testutil" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + func mustParseURL(u string) *config.URL { parsed, err := url.Parse(u) if err != nil { @@ -78,14 +83,16 @@ const ( globLabelNameLengthLimit = 200 globLabelValueLengthLimit = 200 globalGoGC = 42 + globScrapeFailureLogFile = "testdata/fail.log" ) var expectedConf = &Config{ GlobalConfig: GlobalConfig{ - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, - EvaluationInterval: model.Duration(30 * time.Second), - QueryLogFile: "", + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + EvaluationInterval: model.Duration(30 * time.Second), + QueryLogFile: "testdata/query.log", + ScrapeFailureLogFile: globScrapeFailureLogFile, ExternalLabels: labels.FromStrings("foo", "bar", "monitor", "codelab"), @@ -135,7 +142,7 @@ var expectedConf = &Config{ }, }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, }, { @@ -151,7 +158,7 @@ var expectedConf = &Config{ KeyFile: filepath.FromSlash("testdata/valid_key_file"), }, FollowRedirects: true, - EnableHTTP2: true, + EnableHTTP2: false, }, Headers: map[string]string{"name": "value"}, }, @@ -161,6 +168,7 @@ var expectedConf = &Config{ PromoteResourceAttributes: []string{ "k8s.cluster.name", "k8s.job.name", "k8s.namespace.name", }, + TranslationStrategy: UnderscoreEscapingWithSuffixes, }, RemoteReadConfigs: []*RemoteReadConfig{ @@ -199,18 +207,20 @@ var expectedConf = &Config{ { JobName: "prometheus", - HonorLabels: true, - HonorTimestamps: true, - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, - EnableCompression: true, - BodySizeLimit: globBodySizeLimit, - SampleLimit: globSampleLimit, - TargetLimit: globTargetLimit, - LabelLimit: globLabelLimit, - LabelNameLengthLimit: globLabelNameLengthLimit, - LabelValueLengthLimit: globLabelValueLengthLimit, - ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + HonorLabels: true, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + EnableCompression: true, + BodySizeLimit: globBodySizeLimit, + SampleLimit: globSampleLimit, + TargetLimit: globTargetLimit, + LabelLimit: globLabelLimit, + LabelNameLengthLimit: globLabelNameLengthLimit, + LabelValueLengthLimit: globLabelValueLengthLimit, + ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFallbackProtocol: PrometheusText0_0_4, + ScrapeFailureLogFile: "testdata/fail_prom.log", MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -225,6 +235,15 @@ var expectedConf = &Config{ TLSConfig: config.TLSConfig{ MinVersion: config.TLSVersion(tls.VersionTLS10), }, + HTTPHeaders: &config.Headers{ + Headers: map[string]config.Header{ + "foo": { + Values: []string{"foobar"}, + Secrets: []config.Secret{"bar", "foo"}, + Files: []string{filepath.FromSlash("testdata/valid_password_file")}, + }, + }, + }, }, ServiceDiscoveryConfigs: discovery.Configs{ @@ -314,6 +333,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: 210, LabelValueLengthLimit: 210, ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4}, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.HTTPClientConfig{ BasicAuth: &config.BasicAuth{ @@ -411,6 +431,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -466,6 +487,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: "/metrics", Scheme: "http", @@ -499,6 +521,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -538,6 +561,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -577,6 +601,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -606,6 +631,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -643,6 +669,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -677,6 +704,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -718,6 +746,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -749,6 +778,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -783,6 +813,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -810,6 +841,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -840,6 +872,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: "/federate", Scheme: DefaultScrapeConfig.Scheme, @@ -870,6 +903,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -900,6 +934,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -927,6 +962,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -962,6 +998,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -996,6 +1033,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1027,6 +1065,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1057,6 +1096,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1091,6 +1131,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1128,6 +1169,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1184,6 +1226,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1211,6 +1254,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.DefaultHTTPClientConfig, MetricsPath: DefaultScrapeConfig.MetricsPath, @@ -1249,6 +1293,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.DefaultHTTPClientConfig, MetricsPath: DefaultScrapeConfig.MetricsPath, @@ -1293,6 +1338,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1328,6 +1374,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, HTTPClientConfig: config.DefaultHTTPClientConfig, MetricsPath: DefaultScrapeConfig.MetricsPath, @@ -1357,6 +1404,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1389,6 +1437,7 @@ var expectedConf = &Config{ LabelNameLengthLimit: globLabelNameLengthLimit, LabelValueLengthLimit: globLabelValueLengthLimit, ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols, + ScrapeFailureLogFile: globScrapeFailureLogFile, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -1453,8 +1502,13 @@ var expectedConf = &Config{ }, } +func TestYAMLNotLongerSupportedAMApi(t *testing.T) { + _, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger()) + require.Error(t, err) +} + func TestYAMLRoundtrip(t *testing.T) { - want, err := LoadFile("testdata/roundtrip.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1467,7 +1521,7 @@ func TestYAMLRoundtrip(t *testing.T) { } func TestRemoteWriteRetryOnRateLimit(t *testing.T) { - want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1482,7 +1536,7 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) { func TestOTLPSanitizeResourceAttributes(t *testing.T) { t.Run("good config", func(t *testing.T) { - want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, log.NewNopLogger()) + want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -1494,25 +1548,101 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) { }) t.Run("bad config", func(t *testing.T) { - _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, log.NewNopLogger()) + _, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger()) require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`) require.ErrorContains(t, err, `empty promoted OTel resource attribute`) }) } +func TestOTLPAllowServiceNameInTargetInfo(t *testing.T) { + t.Run("good config", func(t *testing.T) { + want, err := LoadFile(filepath.Join("testdata", "otlp_allow_keep_identifying_resource_attributes.good.yml"), false, promslog.NewNopLogger()) + require.NoError(t, err) + + out, err := yaml.Marshal(want) + require.NoError(t, err) + var got Config + require.NoError(t, yaml.UnmarshalStrict(out, &got)) + + require.True(t, got.OTLPConfig.KeepIdentifyingResourceAttributes) + }) +} + +func TestOTLPAllowUTF8(t *testing.T) { + t.Run("good config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.good.yml") + verify := func(t *testing.T, conf *Config, err error) { + t.Helper() + require.NoError(t, err) + require.Equal(t, NoUTF8EscapingWithSuffixes, conf.OTLPConfig.TranslationStrategy) + } + + t.Run("LoadFile", func(t *testing.T) { + conf, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, conf, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + conf, err := Load(string(content), promslog.NewNopLogger()) + verify(t, conf, err) + }) + }) + + t.Run("incompatible config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.incompatible.yml") + verify := func(t *testing.T, err error) { + t.Helper() + require.ErrorContains(t, err, `OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled`) + } + + t.Run("LoadFile", func(t *testing.T) { + _, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + _, err = Load(string(content), promslog.NewNopLogger()) + t.Log("err", err) + verify(t, err) + }) + }) + + t.Run("bad config", func(t *testing.T) { + fpath := filepath.Join("testdata", "otlp_allow_utf8.bad.yml") + verify := func(t *testing.T, err error) { + t.Helper() + require.ErrorContains(t, err, `unsupported OTLP translation strategy "Invalid"`) + } + + t.Run("LoadFile", func(t *testing.T) { + _, err := LoadFile(fpath, false, promslog.NewNopLogger()) + verify(t, err) + }) + t.Run("Load", func(t *testing.T) { + content, err := os.ReadFile(fpath) + require.NoError(t, err) + _, err = Load(string(content), promslog.NewNopLogger()) + verify(t, err) + }) + }) +} + func TestLoadConfig(t *testing.T) { // Parse a valid file that sets a global scrape timeout. This tests whether parsing // an overwritten default field in the global config permanently changes the default. - _, err := LoadFile("testdata/global_timeout.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) + require.NoError(t, err) require.Equal(t, expectedConf, c) } func TestScrapeIntervalLarger(t *testing.T) { - c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.ScrapeConfigs, 1) for _, sc := range c.ScrapeConfigs { @@ -1522,7 +1652,7 @@ func TestScrapeIntervalLarger(t *testing.T) { // YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { - c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger()) + c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) secretRe := regexp.MustCompile(`\\u003csecret\\u003e|`) @@ -1532,38 +1662,38 @@ func TestElideSecrets(t *testing.T) { yamlConfig := string(config) matches := secretRe.FindAllStringIndex(yamlConfig, -1) - require.Len(t, matches, 22, "wrong number of secret matches found") + require.Len(t, matches, 24, "wrong number of secret matches found") require.NotContains(t, yamlConfig, "mysecret", "yaml marshal reveals authentication credentials.") } func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { // Parse a valid file that sets a rule files with an absolute path - c, err := LoadFile(ruleFilesConfigFile, false, false, log.NewNopLogger()) + c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger()) require.NoError(t, err) require.Equal(t, ruleFilesExpectedConf, c) } func TestKubernetesEmptyAPIServer(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesWithKubeConfig(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } func TestKubernetesSelectors(t *testing.T) { - _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, log.NewNopLogger()) + _, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) - _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, log.NewNopLogger()) + _, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) } @@ -2033,24 +2163,39 @@ var expectedErrors = []struct { }, { filename: "scrape_config_files_scrape_protocols.bad.yml", - errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`, + errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`, }, { filename: "scrape_config_files_scrape_protocols2.bad.yml", errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`, }, + { + filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml", + errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`, + }, + { + filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml", + errMsg: `unmarshal errors`, + }, } func TestBadConfigs(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation + defer func() { + model.NameValidationScheme = model.UTF8Validation + }() for _, ee := range expectedErrors { - _, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger()) - require.Error(t, err, "%s", ee.filename) - require.Contains(t, err.Error(), ee.errMsg, + _, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger()) + require.ErrorContains(t, err, ee.errMsg, "Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err) } } func TestBadStaticConfigsJSON(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation + defer func() { + model.NameValidationScheme = model.UTF8Validation + }() content, err := os.ReadFile("testdata/static_config.bad.json") require.NoError(t, err) var tg targetgroup.Group @@ -2059,6 +2204,10 @@ func TestBadStaticConfigsJSON(t *testing.T) { } func TestBadStaticConfigsYML(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation + defer func() { + model.NameValidationScheme = model.UTF8Validation + }() content, err := os.ReadFile("testdata/static_config.bad.yml") require.NoError(t, err) var tg targetgroup.Group @@ -2067,7 +2216,7 @@ func TestBadStaticConfigsYML(t *testing.T) { } func TestEmptyConfig(t *testing.T) { - c, err := Load("", false, log.NewNopLogger()) + c, err := Load("", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig require.Equal(t, exp, *c) @@ -2077,38 +2226,34 @@ func TestExpandExternalLabels(t *testing.T) { // Cleanup ant TEST env variable that could exist on the system. os.Setenv("TEST", "") - c, err := LoadFile("testdata/external_labels.good.yml", false, false, log.NewNopLogger()) - require.NoError(t, err) - testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels) - - c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger()) + c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) os.Setenv("TEST", "TestValue") - c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger()) + c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels) } func TestAgentMode(t *testing.T) { - _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, log.NewNopLogger()) + _, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field alerting is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field rule_files is not allowed in agent mode") - _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, log.NewNopLogger()) + _, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger()) require.ErrorContains(t, err, "field remote_read is not allowed in agent mode") - c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger()) + c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Empty(t, c.RemoteWriteConfigs) - c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger()) + c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger()) require.NoError(t, err) require.Len(t, c.RemoteWriteConfigs, 1) require.Equal( @@ -2119,7 +2264,7 @@ func TestAgentMode(t *testing.T) { } func TestEmptyGlobalBlock(t *testing.T) { - c, err := Load("global:\n", false, log.NewNopLogger()) + c, err := Load("global:\n", promslog.NewNopLogger()) require.NoError(t, err) exp := DefaultConfig exp.Runtime = DefaultRuntimeConfig @@ -2274,7 +2419,7 @@ func TestGetScrapeConfigs(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger()) + c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger()) require.NoError(t, err) scfgs, err := c.GetScrapeConfigs() @@ -2292,7 +2437,7 @@ func kubernetesSDHostURL() config.URL { } func TestScrapeConfigDisableCompression(t *testing.T) { - want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger()) + want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2323,23 +2468,23 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { { name: "global setting implies local settings", inputFile: "scrape_config_global_validation_mode", - expectScheme: "utf8", + expectScheme: "legacy", }, { name: "local setting", inputFile: "scrape_config_local_validation_mode", - expectScheme: "utf8", + expectScheme: "legacy", }, { name: "local setting overrides global setting", inputFile: "scrape_config_local_global_validation_mode", - expectScheme: "legacy", + expectScheme: "utf8", }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger()) + want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger()) require.NoError(t, err) out, err := yaml.Marshal(want) @@ -2352,3 +2497,54 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) { }) } } + +func TestScrapeProtocolHeader(t *testing.T) { + tests := []struct { + name string + proto ScrapeProtocol + expectedValue string + }{ + { + name: "blank", + proto: ScrapeProtocol(""), + expectedValue: "", + }, + { + name: "invalid", + proto: ScrapeProtocol("invalid"), + expectedValue: "", + }, + { + name: "prometheus protobuf", + proto: PrometheusProto, + expectedValue: "application/vnd.google.protobuf", + }, + { + name: "prometheus text 0.0.4", + proto: PrometheusText0_0_4, + expectedValue: "text/plain", + }, + { + name: "prometheus text 1.0.0", + proto: PrometheusText1_0_0, + expectedValue: "text/plain", + }, + { + name: "openmetrics 0.0.1", + proto: OpenMetricsText0_0_1, + expectedValue: "application/openmetrics-text", + }, + { + name: "openmetrics 1.0.0", + proto: OpenMetricsText1_0_0, + expectedValue: "application/openmetrics-text", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + mediaType := tc.proto.HeaderMediaType() + + require.Equal(t, tc.expectedValue, mediaType) + }) + } +} diff --git a/config/reload.go b/config/reload.go new file mode 100644 index 0000000000..8be1b28d8a --- /dev/null +++ b/config/reload.go @@ -0,0 +1,92 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v2" +) + +type ExternalFilesConfig struct { + RuleFiles []string `yaml:"rule_files"` + ScrapeConfigFiles []string `yaml:"scrape_config_files"` +} + +// GenerateChecksum generates a checksum of the YAML file and the files it references. +func GenerateChecksum(yamlFilePath string) (string, error) { + hash := sha256.New() + + yamlContent, err := os.ReadFile(yamlFilePath) + if err != nil { + return "", fmt.Errorf("error reading YAML file: %w", err) + } + _, err = hash.Write(yamlContent) + if err != nil { + return "", fmt.Errorf("error writing YAML file to hash: %w", err) + } + + var config ExternalFilesConfig + if err := yaml.Unmarshal(yamlContent, &config); err != nil { + return "", fmt.Errorf("error unmarshalling YAML: %w", err) + } + + dir := filepath.Dir(yamlFilePath) + + for i, file := range config.RuleFiles { + config.RuleFiles[i] = filepath.Join(dir, file) + } + for i, file := range config.ScrapeConfigFiles { + config.ScrapeConfigFiles[i] = filepath.Join(dir, file) + } + + files := map[string][]string{ + "r": config.RuleFiles, // "r" for rule files + "s": config.ScrapeConfigFiles, // "s" for scrape config files + } + + for _, prefix := range []string{"r", "s"} { + for _, pattern := range files[prefix] { + matchingFiles, err := filepath.Glob(pattern) + if err != nil { + return "", fmt.Errorf("error finding files with pattern %q: %w", pattern, err) + } + + for _, file := range matchingFiles { + // Write prefix to the hash ("r" or "s") followed by \0, then + // the file path. + _, err = hash.Write([]byte(prefix + "\x00" + file + "\x00")) + if err != nil { + return "", fmt.Errorf("error writing %q path to hash: %w", file, err) + } + + // Read and hash the content of the file. + content, err := os.ReadFile(file) + if err != nil { + return "", fmt.Errorf("error reading file %s: %w", file, err) + } + _, err = hash.Write(append(content, []byte("\x00")...)) + if err != nil { + return "", fmt.Errorf("error writing %q content to hash: %w", file, err) + } + } + } + } + + return hex.EncodeToString(hash.Sum(nil)), nil +} diff --git a/config/reload_test.go b/config/reload_test.go new file mode 100644 index 0000000000..f0f44f3588 --- /dev/null +++ b/config/reload_test.go @@ -0,0 +1,222 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestGenerateChecksum(t *testing.T) { + tmpDir := t.TempDir() + + // Define paths for the temporary files. + yamlFilePath := filepath.Join(tmpDir, "test.yml") + ruleFilePath := filepath.Join(tmpDir, "rule_file.yml") + scrapeConfigFilePath := filepath.Join(tmpDir, "scrape_config.yml") + + // Define initial and modified content for the files. + originalRuleContent := "groups:\n- name: example\n rules:\n - alert: ExampleAlert" + modifiedRuleContent := "groups:\n- name: example\n rules:\n - alert: ModifiedAlert" + + originalScrapeConfigContent := "scrape_configs:\n- job_name: example" + modifiedScrapeConfigContent := "scrape_configs:\n- job_name: modified_example" + + // Define YAML content referencing the rule and scrape config files. + yamlContent := ` +rule_files: + - rule_file.yml +scrape_config_files: + - scrape_config.yml +` + + // Write initial content to files. + require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) + require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) + require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) + + // Generate the original checksum. + originalChecksum := calculateChecksum(t, yamlFilePath) + + t.Run("Rule File Change", func(t *testing.T) { + // Modify the rule file. + require.NoError(t, os.WriteFile(ruleFilePath, []byte(modifiedRuleContent), 0o644)) + + // Checksum should change. + modifiedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, modifiedChecksum) + + // Revert the rule file. + require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Scrape Config Change", func(t *testing.T) { + // Modify the scrape config file. + require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(modifiedScrapeConfigContent), 0o644)) + + // Checksum should change. + modifiedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, modifiedChecksum) + + // Revert the scrape config file. + require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Rule File Deletion", func(t *testing.T) { + // Delete the rule file. + require.NoError(t, os.Remove(ruleFilePath)) + + // Checksum should change. + deletedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, deletedChecksum) + + // Restore the rule file. + require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Scrape Config Deletion", func(t *testing.T) { + // Delete the scrape config file. + require.NoError(t, os.Remove(scrapeConfigFilePath)) + + // Checksum should change. + deletedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, deletedChecksum) + + // Restore the scrape config file. + require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Main File Change", func(t *testing.T) { + // Modify the main YAML file. + modifiedYamlContent := ` +global: + scrape_interval: 3s +rule_files: + - rule_file.yml +scrape_config_files: + - scrape_config.yml +` + require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644)) + + // Checksum should change. + modifiedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, modifiedChecksum) + + // Revert the main YAML file. + require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Rule File Removed from YAML Config", func(t *testing.T) { + // Modify the YAML content to remove the rule file. + modifiedYamlContent := ` +scrape_config_files: + - scrape_config.yml +` + require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644)) + + // Checksum should change. + modifiedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, modifiedChecksum) + + // Revert the YAML content. + require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Scrape Config Removed from YAML Config", func(t *testing.T) { + // Modify the YAML content to remove the scrape config file. + modifiedYamlContent := ` +rule_files: + - rule_file.yml +` + require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644)) + + // Checksum should change. + modifiedChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, modifiedChecksum) + + // Revert the YAML content. + require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Empty Rule File", func(t *testing.T) { + // Write an empty rule file. + require.NoError(t, os.WriteFile(ruleFilePath, []byte(""), 0o644)) + + // Checksum should change. + emptyChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, emptyChecksum) + + // Restore the rule file. + require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) + + t.Run("Empty Scrape Config File", func(t *testing.T) { + // Write an empty scrape config file. + require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(""), 0o644)) + + // Checksum should change. + emptyChecksum := calculateChecksum(t, yamlFilePath) + require.NotEqual(t, originalChecksum, emptyChecksum) + + // Restore the scrape config file. + require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644)) + + // Checksum should return to the original. + revertedChecksum := calculateChecksum(t, yamlFilePath) + require.Equal(t, originalChecksum, revertedChecksum) + }) +} + +// calculateChecksum generates a checksum for the given YAML file path. +func calculateChecksum(t *testing.T, yamlFilePath string) string { + checksum, err := GenerateChecksum(yamlFilePath) + require.NoError(t, err) + require.NotEmpty(t, checksum) + return checksum +} diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 56741822c2..2501652d5b 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -8,6 +8,8 @@ global: label_limit: 30 label_name_length_limit: 200 label_value_length_limit: 200 + query_log_file: query.log + scrape_failure_log_file: fail.log # scrape_timeout is set to the global default (10s). external_labels: @@ -72,6 +74,9 @@ scrape_configs: # metrics_path defaults to '/metrics' # scheme defaults to 'http'. + fallback_scrape_protocol: PrometheusText0.0.4 + + scrape_failure_log_file: fail_prom.log file_sd_configs: - files: - foo/*.slow.json @@ -87,6 +92,12 @@ scrape_configs: my: label your: label + http_headers: + foo: + values: ["foobar"] + secrets: ["bar", "foo"] + files: ["valid_password_file"] + relabel_configs: - source_labels: [job, __meta_dns_name] regex: (.*)some-[regex] diff --git a/config/testdata/config_with_deprecated_am_api_config.yml b/config/testdata/config_with_deprecated_am_api_config.yml new file mode 100644 index 0000000000..ac89537ff1 --- /dev/null +++ b/config/testdata/config_with_deprecated_am_api_config.yml @@ -0,0 +1,7 @@ +alerting: + alertmanagers: + - scheme: http + api_version: v1 + file_sd_configs: + - files: + - nonexistent_file.yml diff --git a/config/testdata/jobname_dup.bad.yml b/config/testdata/jobname_dup.bad.yml index 0265493c30..d03cb0cf97 100644 --- a/config/testdata/jobname_dup.bad.yml +++ b/config/testdata/jobname_dup.bad.yml @@ -1,4 +1,6 @@ # Two scrape configs with the same job names are not allowed. +global: + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus - job_name: service-x diff --git a/config/testdata/lowercase.bad.yml b/config/testdata/lowercase.bad.yml index 9bc9583341..6dd72e6476 100644 --- a/config/testdata/lowercase.bad.yml +++ b/config/testdata/lowercase.bad.yml @@ -1,3 +1,5 @@ +global: + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus relabel_configs: diff --git a/config/testdata/otlp_allow_keep_identifying_resource_attributes.good.yml b/config/testdata/otlp_allow_keep_identifying_resource_attributes.good.yml new file mode 100644 index 0000000000..63151e2a77 --- /dev/null +++ b/config/testdata/otlp_allow_keep_identifying_resource_attributes.good.yml @@ -0,0 +1,2 @@ +otlp: + keep_identifying_resource_attributes: true diff --git a/config/testdata/otlp_allow_utf8.bad.yml b/config/testdata/otlp_allow_utf8.bad.yml new file mode 100644 index 0000000000..488e4b0558 --- /dev/null +++ b/config/testdata/otlp_allow_utf8.bad.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: legacy +otlp: + translation_strategy: Invalid diff --git a/config/testdata/otlp_allow_utf8.good.yml b/config/testdata/otlp_allow_utf8.good.yml new file mode 100644 index 0000000000..f3069d2fdd --- /dev/null +++ b/config/testdata/otlp_allow_utf8.good.yml @@ -0,0 +1,2 @@ +otlp: + translation_strategy: NoUTF8EscapingWithSuffixes diff --git a/config/testdata/otlp_allow_utf8.incompatible.yml b/config/testdata/otlp_allow_utf8.incompatible.yml new file mode 100644 index 0000000000..2625c24131 --- /dev/null +++ b/config/testdata/otlp_allow_utf8.incompatible.yml @@ -0,0 +1,4 @@ +global: + metric_name_validation_scheme: legacy +otlp: + translation_strategy: NoUTF8EscapingWithSuffixes diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml new file mode 100644 index 0000000000..07cfe47594 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: "prometheusproto" + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml new file mode 100644 index 0000000000..c5d133f9c4 --- /dev/null +++ b/config/testdata/scrape_config_files_fallback_scrape_protocol2.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: node + fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"] + static_configs: + - targets: ['localhost:8080'] diff --git a/config/testdata/scrape_config_global_validation_mode.yml b/config/testdata/scrape_config_global_validation_mode.yml index 1548554397..e9b0618c70 100644 --- a/config/testdata/scrape_config_global_validation_mode.yml +++ b/config/testdata/scrape_config_global_validation_mode.yml @@ -1,4 +1,4 @@ global: - metric_name_validation_scheme: utf8 + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus diff --git a/config/testdata/scrape_config_local_global_validation_mode.yml b/config/testdata/scrape_config_local_global_validation_mode.yml index d13605e21d..30b54834a5 100644 --- a/config/testdata/scrape_config_local_global_validation_mode.yml +++ b/config/testdata/scrape_config_local_global_validation_mode.yml @@ -1,5 +1,5 @@ global: - metric_name_validation_scheme: utf8 + metric_name_validation_scheme: legacy scrape_configs: - job_name: prometheus - metric_name_validation_scheme: legacy + metric_name_validation_scheme: utf8 diff --git a/config/testdata/scrape_config_local_validation_mode.yml b/config/testdata/scrape_config_local_validation_mode.yml index fad4235806..90279ff081 100644 --- a/config/testdata/scrape_config_local_validation_mode.yml +++ b/config/testdata/scrape_config_local_validation_mode.yml @@ -1,3 +1,3 @@ scrape_configs: - job_name: prometheus - metric_name_validation_scheme: utf8 + metric_name_validation_scheme: legacy diff --git a/console_libraries/menu.lib b/console_libraries/menu.lib deleted file mode 100644 index 199ebf9f48..0000000000 --- a/console_libraries/menu.lib +++ /dev/null @@ -1,82 +0,0 @@ -{{/* vim: set ft=html: */}} - -{{/* Navbar, should be passed . */}} -{{ define "navbar" }} - -{{ end }} - -{{/* LHS menu, should be passed . */}} -{{ define "menu" }} -
- -
-{{ end }} - -{{/* Helper, pass (args . path name) */}} -{{ define "_menuItem" }} - -{{ end }} - diff --git a/console_libraries/prom.lib b/console_libraries/prom.lib deleted file mode 100644 index d7d436f947..0000000000 --- a/console_libraries/prom.lib +++ /dev/null @@ -1,138 +0,0 @@ -{{/* vim: set ft=html: */}} -{{/* Load Prometheus console library JS/CSS. Should go in */}} -{{ define "prom_console_head" }} - - - - - - - - - - - - - -{{ end }} - -{{/* Top of all pages. */}} -{{ define "head" -}} - - - -{{ template "prom_console_head" }} - - -{{ template "navbar" . }} - -{{ template "menu" . }} -{{ end }} - -{{ define "__prom_query_drilldown_noop" }}{{ . }}{{ end }} -{{ define "humanize" }}{{ humanize . }}{{ end }} -{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }} -{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }} -{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }} -{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }} -{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }} -{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }} -{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }} - -{{/* prom_query_drilldown (args expr suffix? renderTemplate?) -Displays the result of the expression, with a link to /graph for it. - -renderTemplate is the name of the template to use to render the value. -*/}} -{{ define "prom_query_drilldown" }} -{{ $expr := .arg0 }}{{ $suffix := (or .arg1 "") }}{{ $renderTemplate := (or .arg2 "__prom_query_drilldown_noop") }} -{{ with query $expr }}{{tmpl $renderTemplate ( . | first | value )}}{{ $suffix }}{{ else }}-{{ end }} -{{ end }} - -{{ define "prom_path" }}/consoles/{{ .Path }}?{{ range $param, $value := .Params }}{{ $param }}={{ $value }}&{{ end }}{{ end }}" - -{{ define "prom_right_table_head" }} -
- -{{ end }} -{{ define "prom_right_table_tail" }} -
-
-{{ end }} - -{{/* RHS table head, pass job name. Should be used after prom_right_table_head. */}} -{{ define "prom_right_table_job_head" }} - - {{ . }} - {{ template "prom_query_drilldown" (args (printf "sum(up{job='%s'})" .)) }} / {{ template "prom_query_drilldown" (args (printf "count(up{job='%s'})" .)) }} - - - CPU - {{ template "prom_query_drilldown" (args (printf "avg by(job)(irate(process_cpu_seconds_total{job='%s'}[5m]))" .) "s/s" "humanizeNoSmallPrefix") }} - - - Memory - {{ template "prom_query_drilldown" (args (printf "avg by(job)(process_resident_memory_bytes{job='%s'})" .) "B" "humanize1024") }} - -{{ end }} - - -{{ define "prom_content_head" }} -
-
-{{ template "prom_graph_timecontrol" . }} -{{ end }} -{{ define "prom_content_tail" }} -
-
-{{ end }} - -{{ define "prom_graph_timecontrol" }} -
-
-
- -
-
- -
-
-
- - - -
-
-
- -
-{{ end }} - -{{/* Bottom of all pages. */}} -{{ define "tail" }} - - -{{ end }} diff --git a/consoles/index.html.example b/consoles/index.html.example deleted file mode 100644 index c725d30dea..0000000000 --- a/consoles/index.html.example +++ /dev/null @@ -1,28 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Overview

-

These are example consoles for Prometheus.

- -

These consoles expect exporters to have the following job labels:

- - - - - - - - - - - - - -
ExporterJob label
Node Exporternode
Prometheusprometheus
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node-cpu.html b/consoles/node-cpu.html deleted file mode 100644 index 284ad738f2..0000000000 --- a/consoles/node-cpu.html +++ /dev/null @@ -1,60 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - CPU(s): {{ template "prom_query_drilldown" (args (printf "scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance)) }} - -{{ range printf "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='%s'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance .Params.instance | query | sortByLabel "mode" }} - - {{ .Labels.mode | title }} CPU - {{ .Value | printf "%.1f" }}% - -{{ end }} - Misc - - Processes Running - {{ template "prom_query_drilldown" (args (printf "node_procs_running{job='node',instance='%s'}" .Params.instance) "" "humanize") }} - - - Processes Blocked - {{ template "prom_query_drilldown" (args (printf "node_procs_blocked{job='node',instance='%s'}" .Params.instance) "" "humanize") }} - - - Forks - {{ template "prom_query_drilldown" (args (printf "irate(node_forks_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} - - - Context Switches - {{ template "prom_query_drilldown" (args (printf "irate(node_context_switches_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} - - - Interrupts - {{ template "prom_query_drilldown" (args (printf "irate(node_intr_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }} - - - 1m Loadavg - {{ template "prom_query_drilldown" (args (printf "node_load1{job='node',instance='%s'}" .Params.instance)) }} - - - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node CPU - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

- -

CPU Usage

-
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node-disk.html b/consoles/node-disk.html deleted file mode 100644 index ffff41b797..0000000000 --- a/consoles/node-disk.html +++ /dev/null @@ -1,78 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Disks - -{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }} - {{ .Labels.device }} - - Utilization - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} - - - Throughput - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} - - - Avg Read Time - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_reads_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} - - - Avg Write Time - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_write_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_writes_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }} - -{{ end }} - - Filesystem Fullness - -{{ define "roughlyNearZero" }} -{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }} -{{ end }} -{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }} - - {{ .Labels.mountpoint }} - {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} - -{{ end }} - - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node Disk - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

- -

Disk I/O Utilization

-
- -

Filesystem Usage

-
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node-overview.html b/consoles/node-overview.html deleted file mode 100644 index 4ae8984b99..0000000000 --- a/consoles/node-overview.html +++ /dev/null @@ -1,121 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - Overview - - User CPU - {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='user'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} - - - System CPU - {{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='system'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }} - - - Memory Total - {{ template "prom_query_drilldown" (args (printf "node_memory_MemTotal_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} - - - Memory Free - {{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }} - - - Network - -{{ range printf "node_network_receive_bytes_total{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device" }} - - {{ .Labels.device }} Received - {{ template "prom_query_drilldown" (args (printf "irate(node_network_receive_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} - - - {{ .Labels.device }} Transmitted - {{ template "prom_query_drilldown" (args (printf "irate(node_network_transmit_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }} - -{{ end }} - - Disks - -{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s',device!~'^(md\\\\d+$|dm-)'}" .Params.instance | query | sortByLabel "device" }} - - {{ .Labels.device }} Utilization - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }} - -{{ end }} -{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }} - - {{ .Labels.device }} Throughput - {{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }} - -{{ end }} - - Filesystem Fullness - -{{ define "roughlyNearZero" }} -{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }} -{{ end }} -{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }} - - {{ .Labels.mountpoint }} - {{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }} - -{{ end }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node Overview - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}

- -

CPU Usage

-
- - -

Disk I/O Utilization

-
- - -

Memory

-
- - -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/node.html b/consoles/node.html deleted file mode 100644 index c1dfc1a891..0000000000 --- a/consoles/node.html +++ /dev/null @@ -1,35 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Node - {{ template "prom_query_drilldown" (args "sum(up{job='node'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='node'})") }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Node

- - - - - - - - -{{ range query "up{job='node'}" | sortByLabel "instance" }} - - - Yes{{ else }} class="alert-danger">No{{ end }} - - - -{{ else }} - -{{ end }} -
NodeUpCPU
Used
Memory
Available
{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Labels.instance }}{{ template "prom_query_drilldown" (args (printf "100 * (1 - avg by(instance) (sum without(mode) (irate(node_cpu_seconds_total{job='node',mode=~'idle|iowait|steal',instance='%s'}[5m]))))" .Labels.instance) "%" "printf.1f") }}{{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'} + node_memory_Cached_bytes{job='node',instance='%s'} + node_memory_Buffers_bytes{job='node',instance='%s'}" .Labels.instance .Labels.instance .Labels.instance) "B" "humanize1024") }}
No nodes found.
- - -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/prometheus-overview.html b/consoles/prometheus-overview.html deleted file mode 100644 index 08e027de06..0000000000 --- a/consoles/prometheus-overview.html +++ /dev/null @@ -1,96 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Overview - - - CPU - {{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }} - - - Memory - {{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }} - - - Version - {{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}} - - - - Storage - - - Ingested Samples - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }} - - - Head Series - {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_head_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} - - - Blocks Loaded - {{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_blocks_loaded{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} - - - Rules - - - Evaluation Duration - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }} - - - Notification Latency - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }} - - - Notification Queue - {{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }} - - - HTTP Server - -{{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }} - - {{ .Labels.handler }} - {{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }} - -{{ end }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -
-

Prometheus Overview - {{ .Params.instance }}

- -

Ingested Samples

-
- - -

HTTP Server

-
- -
-{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/consoles/prometheus.html b/consoles/prometheus.html deleted file mode 100644 index e0d026376d..0000000000 --- a/consoles/prometheus.html +++ /dev/null @@ -1,34 +0,0 @@ -{{ template "head" . }} - -{{ template "prom_right_table_head" }} - - Prometheus - {{ template "prom_query_drilldown" (args "sum(up{job='prometheus'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='prometheus'})") }} - -{{ template "prom_right_table_tail" }} - -{{ template "prom_content_head" . }} -

Prometheus

- - - - - - - - -{{ range query "up{job='prometheus'}" | sortByLabel "instance" }} - - - - - - -{{ else }} - -{{ end }} -
PrometheusUpIngested SamplesMemory
{{ .Labels.instance }}Yes{{ else }} class="alert-danger">No{{ end }}{{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Labels.instance) "/s" "humanizeNoSmallPrefix") }}{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Labels.instance) "B" "humanize1024")}}
No devices found.
- -{{ template "prom_content_tail" . }} - -{{ template "tail" }} diff --git a/discovery/README.md b/discovery/README.md index 4c06608625..d5418e7fb1 100644 --- a/discovery/README.md +++ b/discovery/README.md @@ -233,7 +233,7 @@ type Config interface { } type DiscovererOptions struct { - Logger log.Logger + Logger *slog.Logger // A registerer for the Discoverer's metrics. Registerer prometheus.Registerer diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index a44912481a..0f35c401e6 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" @@ -29,11 +30,11 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -146,9 +147,9 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // the Discoverer interface. type EC2Discovery struct { *refresh.Discovery - logger log.Logger + logger *slog.Logger cfg *EC2SDConfig - ec2 *ec2.EC2 + ec2 ec2iface.EC2API // azToAZID maps this account's availability zones to their underlying AZ // ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so @@ -157,14 +158,14 @@ type EC2Discovery struct { } // NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets. -func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { +func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) { m, ok := metrics.(*ec2Metrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } d := &EC2Discovery{ logger: logger, @@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.Dis return d, nil } -func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) { +func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) { if d.ec2 != nil { return d.ec2, nil } @@ -254,8 +255,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error // Prometheus requires a reload if AWS adds a new AZ to the region. if d.azToAZID == nil { if err := d.refreshAZIDs(ctx); err != nil { - level.Debug(d.logger).Log( - "msg", "Unable to describe availability zones", + d.logger.Debug( + "Unable to describe availability zones", "err", err) } } @@ -296,8 +297,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone) azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone] if !ok && d.azToAZID != nil { - level.Debug(d.logger).Log( - "msg", "Availability zone ID not found", + d.logger.Debug( + "Availability zone ID not found", "az", *inst.Placement.AvailabilityZone) } labels[ec2LabelAZID] = model.LabelValue(azID) diff --git a/discovery/aws/ec2_test.go b/discovery/aws/ec2_test.go new file mode 100644 index 0000000000..f34065c23e --- /dev/null +++ b/discovery/aws/ec2_test.go @@ -0,0 +1,434 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package aws + +import ( + "context" + "errors" + "testing" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/request" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ec2/ec2iface" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery/targetgroup" +) + +// Helper function to get pointers on literals. +// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package. +func strptr(str string) *string { + return &str +} + +func boolptr(b bool) *bool { + return &b +} + +func int64ptr(i int64) *int64 { + return &i +} + +// Struct for test data. +type ec2DataStore struct { + region string + + azToAZID map[string]string + + ownerID string + + instances []*ec2.Instance +} + +// The tests itself. +func TestMain(m *testing.M) { + goleak.VerifyTestMain(m) +} + +func TestEC2DiscoveryRefreshAZIDs(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + shouldFail bool + ec2Data *ec2DataStore + }{ + { + name: "Normal", + shouldFail: false, + ec2Data: &ec2DataStore{ + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + }, + }, + { + name: "HandleError", + shouldFail: true, + ec2Data: &ec2DataStore{}, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + } + + err := d.refreshAZIDs(ctx) + if tt.shouldFail { + require.Error(t, err) + } else { + require.NoError(t, err) + require.Equal(t, client.ec2Data.azToAZID, d.azToAZID) + } + }) + } +} + +func TestEC2DiscoveryRefresh(t *testing.T) { + ctx := context.Background() + + // iterate through the test cases + for _, tt := range []struct { + name string + ec2Data *ec2DataStore + expected []*targetgroup.Group + }{ + { + name: "NoPrivateIp", + ec2Data: &ec2DataStore{ + region: "region-noprivateip", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + InstanceId: strptr("instance-id-noprivateip"), + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-noprivateip", + }, + }, + }, + { + name: "NoVpc", + ec2Data: &ec2DataStore{ + region: "region-novpc", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + ownerID: "owner-id-novpc", + instances: []*ec2.Instance{ + { + // set every possible options and test them here + Architecture: strptr("architecture-novpc"), + ImageId: strptr("ami-novpc"), + InstanceId: strptr("instance-id-novpc"), + InstanceLifecycle: strptr("instance-lifecycle-novpc"), + InstanceType: strptr("instance-type-novpc"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + Platform: strptr("platform-novpc"), + PrivateDnsName: strptr("private-dns-novpc"), + PrivateIpAddress: strptr("1.2.3.4"), + PublicDnsName: strptr("public-dns-novpc"), + PublicIpAddress: strptr("42.42.42.2"), + State: &ec2.InstanceState{Name: strptr("running")}, + // test tags once and for all + Tags: []*ec2.Tag{ + {Key: strptr("tag-1-key"), Value: strptr("tag-1-value")}, + {Key: strptr("tag-2-key"), Value: strptr("tag-2-value")}, + nil, + {Value: strptr("tag-4-value")}, + {Key: strptr("tag-5-key")}, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-novpc", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("1.2.3.4:4242"), + "__meta_ec2_ami": model.LabelValue("ami-novpc"), + "__meta_ec2_architecture": model.LabelValue("architecture-novpc"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"), + "__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"), + "__meta_ec2_platform": model.LabelValue("platform-novpc"), + "__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"), + "__meta_ec2_private_ip": model.LabelValue("1.2.3.4"), + "__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"), + "__meta_ec2_public_ip": model.LabelValue("42.42.42.2"), + "__meta_ec2_region": model.LabelValue("region-novpc"), + "__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"), + "__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"), + }, + }, + }, + }, + }, + { + name: "Ipv4", + ec2Data: &ec2DataStore{ + region: "region-ipv4", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv4"), + InstanceId: strptr("instance-id-ipv4"), + InstanceType: strptr("instance-type-ipv4"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")}, + PrivateIpAddress: strptr("5.6.7.8"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-3"), + VpcId: strptr("vpc-ipv4"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without subnet -> should be ignored + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:1::1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + }, + // interface with subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + // interface with another subnet, no IPv6 + { + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-1"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv4", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("5.6.7.8:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv4"), + "__meta_ec2_availability_zone": model.LabelValue("azname-c"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-3"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"), + "__meta_ec2_private_ip": model.LabelValue("5.6.7.8"), + "__meta_ec2_region": model.LabelValue("region-ipv4"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"), + }, + }, + }, + }, + }, + { + name: "Ipv6", + ec2Data: &ec2DataStore{ + region: "region-ipv6", + azToAZID: map[string]string{ + "azname-a": "azid-1", + "azname-b": "azid-2", + "azname-c": "azid-3", + }, + instances: []*ec2.Instance{ + { + // just the minimum needed for the refresh work + ImageId: strptr("ami-ipv6"), + InstanceId: strptr("instance-id-ipv6"), + InstanceType: strptr("instance-type-ipv6"), + Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")}, + PrivateIpAddress: strptr("9.10.11.12"), + State: &ec2.InstanceState{Name: strptr("running")}, + SubnetId: strptr("azid-2"), + VpcId: strptr("vpc-ipv6"), + // network intefaces + NetworkInterfaces: []*ec2.InstanceNetworkInterface{ + // interface without primary IPv6, index 2 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::1:1"), + IsPrimaryIpv6: boolptr(false), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 1 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(1), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::2:1"), + IsPrimaryIpv6: boolptr(false), + }, + { + Ipv6Address: strptr("2001:db8:2::2:2"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-2"), + }, + // interface with primary IPv6, index 3 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(3), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{ + { + Ipv6Address: strptr("2001:db8:2::3:1"), + IsPrimaryIpv6: boolptr(true), + }, + }, + SubnetId: strptr("azid-1"), + }, + // interface without primary IPv6, index 0 + { + Attachment: &ec2.InstanceNetworkInterfaceAttachment{ + DeviceIndex: int64ptr(0), + }, + Ipv6Addresses: []*ec2.InstanceIpv6Address{}, + SubnetId: strptr("azid-3"), + }, + }, + }, + }, + }, + expected: []*targetgroup.Group{ + { + Source: "region-ipv6", + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue("9.10.11.12:4242"), + "__meta_ec2_ami": model.LabelValue("ami-ipv6"), + "__meta_ec2_availability_zone": model.LabelValue("azname-b"), + "__meta_ec2_availability_zone_id": model.LabelValue("azid-2"), + "__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"), + "__meta_ec2_instance_state": model.LabelValue("running"), + "__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"), + "__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"), + "__meta_ec2_owner_id": model.LabelValue(""), + "__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"), + "__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"), + "__meta_ec2_private_ip": model.LabelValue("9.10.11.12"), + "__meta_ec2_region": model.LabelValue("region-ipv6"), + "__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"), + "__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"), + }, + }, + }, + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + client := newMockEC2Client(tt.ec2Data) + + d := &EC2Discovery{ + ec2: client, + cfg: &EC2SDConfig{ + Port: 4242, + Region: client.ec2Data.region, + }, + } + + g, err := d.refresh(ctx) + require.NoError(t, err) + require.Equal(t, tt.expected, g) + }) + } +} + +// EC2 client mock. +type mockEC2Client struct { + ec2iface.EC2API + ec2Data ec2DataStore +} + +func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client { + client := mockEC2Client{ + ec2Data: *ec2Data, + } + return &client +} + +func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) { + if len(m.ec2Data.azToAZID) == 0 { + return nil, errors.New("No AZs found") + } + + azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID)) + + i := 0 + for k, v := range m.ec2Data.azToAZID { + azs[i] = &ec2.AvailabilityZone{ + ZoneName: strptr(k), + ZoneId: strptr(v), + } + i++ + } + + return &ec2.DescribeAvailabilityZonesOutput{ + AvailabilityZones: azs, + }, nil +} + +func (m *mockEC2Client) DescribeInstancesPagesWithContext(ctx aws.Context, input *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, opts ...request.Option) error { + r := ec2.Reservation{} + r.SetInstances(m.ec2Data.instances) + r.SetOwnerId(m.ec2Data.ownerID) + + o := ec2.DescribeInstancesOutput{} + o.SetReservations([]*ec2.Reservation{&r}) + + _ = fn(&o, true) + + return nil +} diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 0ad7f2d541..b892867f1b 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" @@ -29,10 +30,10 @@ import ( "github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/lightsail" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -130,14 +131,14 @@ type LightsailDiscovery struct { } // NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets. -func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { +func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) { m, ok := metrics.(*lightsailMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } d := &LightsailDiscovery{ diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 70d95b9f3a..ec1c51ace9 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "math/rand" "net" "net/http" @@ -35,10 +36,9 @@ import ( "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -175,7 +175,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type Discovery struct { *refresh.Discovery - logger log.Logger + logger *slog.Logger cfg *SDConfig port int cache *cache.Cache[string, *armnetwork.Interface] @@ -183,14 +183,14 @@ type Discovery struct { } // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. -func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*azureMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000))) d := &Discovery{ @@ -228,26 +228,26 @@ type azureClient struct { vm *armcompute.VirtualMachinesClient vmss *armcompute.VirtualMachineScaleSetsClient vmssvm *armcompute.VirtualMachineScaleSetVMsClient - logger log.Logger + logger *slog.Logger } var _ client = &azureClient{} -// createAzureClient is a helper function for creating an Azure compute client to ARM. -func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { - cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment) +// createAzureClient is a helper method for creating an Azure compute client to ARM. +func (d *Discovery) createAzureClient() (client, error) { + cloudConfiguration, err := CloudConfigurationFromName(d.cfg.Environment) if err != nil { return &azureClient{}, err } var c azureClient - c.logger = logger + c.logger = d.logger telemetry := policy.TelemetryOptions{ ApplicationID: userAgent, } - credential, err := newCredential(cfg, policy.ClientOptions{ + credential, err := newCredential(*d.cfg, policy.ClientOptions{ Cloud: cloudConfiguration, Telemetry: telemetry, }) @@ -255,7 +255,7 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { return &azureClient{}, err } - client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd") + client, err := config_util.NewClientFromConfig(d.cfg.HTTPClientConfig, "azure_sd") if err != nil { return &azureClient{}, err } @@ -267,22 +267,22 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) { }, } - c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options) + c.vm, err = armcompute.NewVirtualMachinesClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options) + c.nic, err = armnetwork.NewInterfacesClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options) + c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } - c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options) + c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(d.cfg.SubscriptionID, credential, options) if err != nil { return &azureClient{}, err } @@ -337,35 +337,27 @@ type virtualMachine struct { } // Create a new azureResource object from an ID string. -func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) { +func newAzureResourceFromID(id string, logger *slog.Logger) (*arm.ResourceID, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } resourceID, err := arm.ParseResourceID(id) if err != nil { err := fmt.Errorf("invalid ID '%s': %w", id, err) - level.Error(logger).Log("err", err) + logger.Error("Failed to parse resource ID", "err", err) return &arm.ResourceID{}, err } return resourceID, nil } -func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { - defer level.Debug(d.logger).Log("msg", "Azure discovery completed") - - client, err := createAzureClient(*d.cfg, d.logger) - if err != nil { - d.metrics.failuresCount.Inc() - return nil, fmt.Errorf("could not create Azure client: %w", err) - } - +func (d *Discovery) refreshAzureClient(ctx context.Context, client client) ([]*targetgroup.Group, error) { machines, err := client.getVMs(ctx, d.cfg.ResourceGroup) if err != nil { d.metrics.failuresCount.Inc() return nil, fmt.Errorf("could not get virtual machines: %w", err) } - level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines)) + d.logger.Debug("Found virtual machines during Azure discovery.", "count", len(machines)) // Load the vms managed by scale sets. scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup) @@ -418,6 +410,18 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { return []*targetgroup.Group{&tg}, nil } +func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { + defer d.logger.Debug("Azure discovery completed") + + client, err := d.createAzureClient() + if err != nil { + d.metrics.failuresCount.Inc() + return nil, fmt.Errorf("could not create Azure client: %w", err) + } + + return d.refreshAzureClient(ctx, client) +} + func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) { r, err := newAzureResourceFromID(vm.ID, d.logger) if err != nil { @@ -459,7 +463,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM } if err != nil { if errors.Is(err, errorNotFound) { - level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err) + d.logger.Warn("Network interface does not exist", "name", nicID, "err", err) } else { return nil, err } @@ -480,7 +484,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM // yet support this. On deallocated machines, this value happens to be nil so it // is a cheap and easy way to determine if a machine is allocated or not. if networkInterface.Properties.Primary == nil { - level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) + d.logger.Debug("Skipping deallocated virtual machine", "machine", vm.Name) return nil, nil } @@ -724,7 +728,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) { rs := time.Duration(random) * time.Second exptime := time.Duration(d.cfg.RefreshInterval*10) + rs d.cache.Set(nicID, netInt, cache.WithExpiration(exptime)) - level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds()) + d.logger.Debug("Adding nic", "nic", nicID, "time", exptime.Seconds()) } // getFromCache will get the network Interface for the specified nicID diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go index 32dab66c8c..b905e9fcef 100644 --- a/discovery/azure/azure_test.go +++ b/discovery/azure/azure_test.go @@ -15,19 +15,34 @@ package azure import ( "context" - "fmt" + "log/slog" + "net/http" + "slices" + "strings" "testing" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/to" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" + fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" + fakenetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4/fake" cache "github.com/Code-Hex/go-generics-cache" "github.com/Code-Hex/go-generics-cache/policy/lru" - "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" + + "github.com/prometheus/prometheus/discovery" + "github.com/prometheus/prometheus/discovery/targetgroup" ) +const defaultMockNetworkID string = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}" + func TestMain(m *testing.M) { goleak.VerifyTestMain(m, goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"), @@ -96,13 +111,12 @@ func TestVMToLabelSet(t *testing.T) { vmType := "type" location := "westeurope" computerName := "computer_name" - networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1" ipAddress := "10.20.30.40" primary := true networkProfile := armcompute.NetworkProfile{ NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ { - ID: &networkID, + ID: to.Ptr(defaultMockNetworkID), Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary}, }, }, @@ -139,7 +153,7 @@ func TestVMToLabelSet(t *testing.T) { Location: location, OsType: "Linux", Tags: map[string]*string{}, - NetworkInterfaces: []string{networkID}, + NetworkInterfaces: []string{defaultMockNetworkID}, Size: size, } @@ -150,11 +164,12 @@ func TestVMToLabelSet(t *testing.T) { cfg := DefaultSDConfig d := &Discovery{ cfg: &cfg, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))), } network := armnetwork.Interface{ - Name: &networkID, + Name: to.Ptr(defaultMockNetworkID), + ID: to.Ptr(defaultMockNetworkID), Properties: &armnetwork.InterfacePropertiesFormat{ Primary: &primary, IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ @@ -164,9 +179,9 @@ func TestVMToLabelSet(t *testing.T) { }, }, } - client := &mockAzureClient{ - networkInterface: &network, - } + + client := createMockAzureClient(t, nil, nil, nil, network, nil) + labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM) require.NoError(t, err) require.Len(t, labelSet, 11) @@ -475,34 +490,372 @@ func TestNewAzureResourceFromID(t *testing.T) { } } +func TestAzureRefresh(t *testing.T) { + tests := []struct { + scenario string + vmResp []armcompute.VirtualMachinesClientListAllResponse + vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse + vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse + interfacesResp armnetwork.Interface + expectedTG []*targetgroup.Group + }{ + { + scenario: "VMs, VMSS and VMSSVMs in Multiple Responses", + vmResp: []armcompute.VirtualMachinesClientListAllResponse{ + { + VirtualMachineListResult: armcompute.VirtualMachineListResult{ + Value: []*armcompute.VirtualMachine{ + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1"), to.Ptr("vm1")), + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2"), to.Ptr("vm2")), + }, + }, + }, + { + VirtualMachineListResult: armcompute.VirtualMachineListResult{ + Value: []*armcompute.VirtualMachine{ + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3"), to.Ptr("vm3")), + defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4"), to.Ptr("vm4")), + }, + }, + }, + }, + vmssResp: []armcompute.VirtualMachineScaleSetsClientListAllResponse{ + { + VirtualMachineScaleSetListWithLinkResult: armcompute.VirtualMachineScaleSetListWithLinkResult{ + Value: []*armcompute.VirtualMachineScaleSet{ + { + ID: to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1"), + Name: to.Ptr("vmScaleSet1"), + Location: to.Ptr("australiaeast"), + Type: to.Ptr("Microsoft.Compute/virtualMachineScaleSets"), + }, + }, + }, + }, + }, + vmssvmResp: []armcompute.VirtualMachineScaleSetVMsClientListResponse{ + { + VirtualMachineScaleSetVMListResult: armcompute.VirtualMachineScaleSetVMListResult{ + Value: []*armcompute.VirtualMachineScaleSetVM{ + defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1"), to.Ptr("vmScaleSet1_vm1")), + defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2"), to.Ptr("vmScaleSet1_vm2")), + }, + }, + }, + }, + interfacesResp: armnetwork.Interface{ + ID: to.Ptr(defaultMockNetworkID), + Properties: &armnetwork.InterfacePropertiesFormat{ + Primary: to.Ptr(true), + IPConfigurations: []*armnetwork.InterfaceIPConfiguration{ + {Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{ + PrivateIPAddress: to.Ptr("10.0.0.1"), + }}, + }, + }, + }, + expectedTG: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm1", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm2", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm3", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vm4", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vmScaleSet1_vm1", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_scale_set": "vmScaleSet1", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + { + "__address__": "10.0.0.1:80", + "__meta_azure_machine_computer_name": "computer_name", + "__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2", + "__meta_azure_machine_location": "australiaeast", + "__meta_azure_machine_name": "vmScaleSet1_vm2", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_private_ip": "10.0.0.1", + "__meta_azure_machine_resource_group": "{resourceGroup}", + "__meta_azure_machine_scale_set": "vmScaleSet1", + "__meta_azure_machine_size": "size", + "__meta_azure_machine_tag_prometheus": "", + "__meta_azure_subscription_id": "", + "__meta_azure_tenant_id": "", + }, + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.scenario, func(t *testing.T) { + t.Parallel() + azureSDConfig := &DefaultSDConfig + + azureClient := createMockAzureClient(t, tc.vmResp, tc.vmssResp, tc.vmssvmResp, tc.interfacesResp, nil) + + reg := prometheus.NewRegistry() + refreshMetrics := discovery.NewRefreshMetrics(reg) + metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics) + + sd, err := NewDiscovery(azureSDConfig, nil, metrics) + require.NoError(t, err) + + tg, err := sd.refreshAzureClient(context.Background(), azureClient) + require.NoError(t, err) + + sortTargetsByID(tg[0].Targets) + require.Equal(t, tc.expectedTG, tg) + }) + } +} + type mockAzureClient struct { - networkInterface *armnetwork.Interface + azureClient } -var _ client = &mockAzureClient{} +func createMockAzureClient(t *testing.T, vmResp []armcompute.VirtualMachinesClientListAllResponse, vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse, vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse, interfaceResp armnetwork.Interface, logger *slog.Logger) client { + t.Helper() + mockVMServer := defaultMockVMServer(vmResp) + mockVMSSServer := defaultMockVMSSServer(vmssResp) + mockVMScaleSetVMServer := defaultMockVMSSVMServer(vmssvmResp) + mockInterfaceServer := defaultMockInterfaceServer(interfaceResp) -func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) { - return nil, nil -} + vmClient, err := armcompute.NewVirtualMachinesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachinesServerTransport(&mockVMServer), + }, + }) + require.NoError(t, err) -func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) { - return nil, nil -} + vmssClient, err := armcompute.NewVirtualMachineScaleSetsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachineScaleSetsServerTransport(&mockVMSSServer), + }, + }) + require.NoError(t, err) -func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) { - return nil, nil -} + vmssvmClient, err := armcompute.NewVirtualMachineScaleSetVMsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fake.NewVirtualMachineScaleSetVMsServerTransport(&mockVMScaleSetVMServer), + }, + }) + require.NoError(t, err) -func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) { - if networkInterfaceID == "" { - return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty") + interfacesClient, err := armnetwork.NewInterfacesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{ + ClientOptions: azcore.ClientOptions{ + Transport: fakenetwork.NewInterfacesServerTransport(&mockInterfaceServer), + }, + }) + require.NoError(t, err) + + return &mockAzureClient{ + azureClient: azureClient{ + vm: vmClient, + vmss: vmssClient, + vmssvm: vmssvmClient, + nic: interfacesClient, + logger: logger, + }, } - return m.networkInterface, nil } -func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) { - if scaleSetName == "" { - return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty") +func defaultMockInterfaceServer(interfaceResp armnetwork.Interface) fakenetwork.InterfacesServer { + return fakenetwork.InterfacesServer{ + Get: func(ctx context.Context, resourceGroupName, networkInterfaceName string, options *armnetwork.InterfacesClientGetOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetResponse], errResp azfake.ErrorResponder) { + resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetResponse{Interface: interfaceResp}, nil) + return + }, + GetVirtualMachineScaleSetNetworkInterface: func(ctx context.Context, resourceGroupName, virtualMachineScaleSetName, virtualmachineIndex, networkInterfaceName string, options *armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse], errResp azfake.ErrorResponder) { + resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse{Interface: interfaceResp}, nil) + return + }, } - return m.networkInterface, nil +} + +func defaultMockVMServer(vmResp []armcompute.VirtualMachinesClientListAllResponse) fake.VirtualMachinesServer { + return fake.VirtualMachinesServer{ + NewListAllPager: func(options *armcompute.VirtualMachinesClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachinesClientListAllResponse]) { + for _, page := range vmResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultMockVMSSServer(vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse) fake.VirtualMachineScaleSetsServer { + return fake.VirtualMachineScaleSetsServer{ + NewListAllPager: func(options *armcompute.VirtualMachineScaleSetsClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetsClientListAllResponse]) { + for _, page := range vmssResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultMockVMSSVMServer(vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse) fake.VirtualMachineScaleSetVMsServer { + return fake.VirtualMachineScaleSetVMsServer{ + NewListPager: func(resourceGroupName, virtualMachineScaleSetName string, options *armcompute.VirtualMachineScaleSetVMsClientListOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetVMsClientListResponse]) { + for _, page := range vmssvmResp { + resp.AddPage(http.StatusOK, page, nil) + } + return + }, + } +} + +func defaultVMWithIDAndName(id, name *string) *armcompute.VirtualMachine { + vmSize := armcompute.VirtualMachineSizeTypes("size") + osType := armcompute.OperatingSystemTypesLinux + defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/testVM" + defaultName := "testVM" + + if id == nil { + id = &defaultID + } + if name == nil { + name = &defaultName + } + + return &armcompute.VirtualMachine{ + ID: id, + Name: name, + Type: to.Ptr("Microsoft.Compute/virtualMachines"), + Location: to.Ptr("australiaeast"), + Properties: &armcompute.VirtualMachineProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr("computer_name"), + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + { + ID: to.Ptr(defaultMockNetworkID), + }, + }, + }, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + }, + Tags: map[string]*string{ + "prometheus": new(string), + }, + } +} + +func defaultVMSSVMWithIDAndName(id, name *string) *armcompute.VirtualMachineScaleSetVM { + vmSize := armcompute.VirtualMachineSizeTypes("size") + osType := armcompute.OperatingSystemTypesLinux + defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/testVMScaleSet/virtualMachines/testVM" + defaultName := "testVM" + + if id == nil { + id = &defaultID + } + if name == nil { + name = &defaultName + } + + return &armcompute.VirtualMachineScaleSetVM{ + ID: id, + Name: name, + Type: to.Ptr("Microsoft.Compute/virtualMachines"), + InstanceID: to.Ptr("123"), + Location: to.Ptr("australiaeast"), + Properties: &armcompute.VirtualMachineScaleSetVMProperties{ + OSProfile: &armcompute.OSProfile{ + ComputerName: to.Ptr("computer_name"), + }, + StorageProfile: &armcompute.StorageProfile{ + OSDisk: &armcompute.OSDisk{ + OSType: &osType, + }, + }, + NetworkProfile: &armcompute.NetworkProfile{ + NetworkInterfaces: []*armcompute.NetworkInterfaceReference{ + {ID: to.Ptr(defaultMockNetworkID)}, + }, + }, + HardwareProfile: &armcompute.HardwareProfile{ + VMSize: &vmSize, + }, + }, + Tags: map[string]*string{ + "prometheus": new(string), + }, + } +} + +func sortTargetsByID(targets []model.LabelSet) { + slices.SortFunc(targets, func(a, b model.LabelSet) int { + return strings.Compare(string(a["__meta_azure_machine_id"]), string(b["__meta_azure_machine_id"])) + }) } diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index bdc1fc8dce..33b82d23a4 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" consul "github.com/hashicorp/consul/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -113,8 +113,11 @@ type SDConfig struct { Services []string `yaml:"services,omitempty"` // A list of tags used to filter instances inside a service. Services must contain all tags in the list. ServiceTags []string `yaml:"tags,omitempty"` - // Desired node metadata. + // Desired node metadata. As of Consul 1.14, consider `filter` instead. NodeMeta map[string]string `yaml:"node_meta,omitempty"` + // Consul filter string + // See https://www.consul.io/api-docs/catalog#filtering-1, for syntax + Filter string `yaml:"filter,omitempty"` HTTPClientConfig config.HTTPClientConfig `yaml:",inline"` } @@ -174,22 +177,23 @@ type Discovery struct { watchedServices []string // Set of services which will be discovered. watchedTags []string // Tags used to filter instances of a service. watchedNodeMeta map[string]string + watchedFilter string allowStale bool refreshInterval time.Duration finalizer func() - logger log.Logger + logger *slog.Logger metrics *consulMetrics } // NewDiscovery returns a new Discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*consulMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } wrapper, err := config.NewClientFromConfig(conf.HTTPClientConfig, "consul_sd", config.WithIdleConnTimeout(2*watchTimeout)) @@ -218,6 +222,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere watchedServices: conf.Services, watchedTags: conf.ServiceTags, watchedNodeMeta: conf.NodeMeta, + watchedFilter: conf.Filter, allowStale: conf.AllowStale, refreshInterval: time.Duration(conf.RefreshInterval), clientDatacenter: conf.Datacenter, @@ -236,7 +241,7 @@ func (d *Discovery) shouldWatch(name string, tags []string) bool { return d.shouldWatchFromName(name) && d.shouldWatchFromTags(tags) } -// shouldWatch returns whether the service of the given name should be watched based on its name. +// shouldWatchFromName returns whether the service of the given name should be watched based on its name. func (d *Discovery) shouldWatchFromName(name string) bool { // If there's no fixed set of watched services, we watch everything. if len(d.watchedServices) == 0 { @@ -251,7 +256,7 @@ func (d *Discovery) shouldWatchFromName(name string) bool { return false } -// shouldWatch returns whether the service of the given name should be watched based on its tags. +// shouldWatchFromTags returns whether the service of the given name should be watched based on its tags. // This gets called when the user doesn't specify a list of services in order to avoid watching // *all* services. Details in https://github.com/prometheus/prometheus/pull/3814 func (d *Discovery) shouldWatchFromTags(tags []string) bool { @@ -282,7 +287,7 @@ func (d *Discovery) getDatacenter() error { info, err := d.client.Agent().Self() if err != nil { - level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) + d.logger.Error("Error retrieving datacenter name", "err", err) d.metrics.rpcFailuresCount.Inc() return err } @@ -290,12 +295,12 @@ func (d *Discovery) getDatacenter() error { dc, ok := info["Config"]["Datacenter"].(string) if !ok { err := fmt.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"]) - level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) + d.logger.Error("Error retrieving datacenter name", "err", err) return err } d.clientDatacenter = dc - d.logger = log.With(d.logger, "datacenter", dc) + d.logger = d.logger.With("datacenter", dc) return nil } @@ -361,13 +366,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { // entire list of services. func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) { catalog := d.client.Catalog() - level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ",")) + d.logger.Debug("Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, WaitTime: watchTimeout, AllowStale: d.allowStale, NodeMeta: d.watchedNodeMeta, + Filter: d.watchedFilter, } t0 := time.Now() srvs, meta, err := catalog.Services(opts.WithContext(ctx)) @@ -382,7 +388,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. } if err != nil { - level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) + d.logger.Error("Error refreshing service list", "err", err) d.metrics.rpcFailuresCount.Inc() time.Sleep(retryInterval) return @@ -445,7 +451,7 @@ type consulService struct { discovery *Discovery client *consul.Client tagSeparator string - logger log.Logger + logger *slog.Logger rpcFailuresCount prometheus.Counter serviceRPCDuration prometheus.Observer } @@ -490,7 +496,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G // Get updates for a service. func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, health *consul.Health, lastIndex *uint64) { - level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ",")) + srv.logger.Debug("Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ",")) opts := &consul.QueryOptions{ WaitIndex: *lastIndex, @@ -513,7 +519,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr } if err != nil { - level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) + srv.logger.Error("Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) srv.rpcFailuresCount.Inc() time.Sleep(retryInterval) return diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go index e3bc7938f5..cdbb80baba 100644 --- a/discovery/consul/consul_test.go +++ b/discovery/consul/consul_test.go @@ -21,10 +21,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "gopkg.in/yaml.v2" @@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { case "/v1/catalog/services?index=1&wait=120000ms": time.Sleep(5 * time.Second) response = ServicesTestAnswer + case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms": + response = ServicesTestAnswer default: t.Errorf("Unhandled consul call: %s", r.URL) } @@ -270,7 +272,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { } func newDiscovery(t *testing.T, config *SDConfig) *Discovery { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() metrics := NewTestMetrics(t, config, prometheus.NewRegistry()) @@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) { <-ch } +// Watch the test service with a specific tag and node-meta via Filter parameter. +func TestFilterOption(t *testing.T) { + stub, config := newServer(t) + defer stub.Close() + + config.Services = []string{"test"} + config.Filter = `NodeMeta.rack_name == "2304"` + config.Token = "fake-token" + + d := newDiscovery(t, config) + + ctx, cancel := context.WithCancel(context.Background()) + ch := make(chan []*targetgroup.Group) + go func() { + d.Run(ctx, ch) + close(ch) + }() + checkOneTarget(t, <-ch) + cancel() +} + func TestGetDatacenterShouldReturnError(t *testing.T) { for _, tc := range []struct { handler func(http.ResponseWriter, *http.Request) @@ -407,7 +430,7 @@ func TestGetDatacenterShouldReturnError(t *testing.T) { err = d.getDatacenter() // An error should be returned. - require.Equal(t, tc.errMessage, err.Error()) + require.EqualError(t, err, tc.errMessage) // Should still be empty. require.Equal(t, "", d.clientDatacenter) } diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index ecee60cb1f..fce8d1a354 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -15,7 +15,9 @@ package digitalocean import ( "context" + "errors" "fmt" + "log/slog" "net" "net/http" "strconv" @@ -23,7 +25,6 @@ import ( "time" "github.com/digitalocean/godo" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -111,10 +112,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*digitaloceanMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/digitalocean/digitalocean_test.go b/discovery/digitalocean/digitalocean_test.go index 841b5ef977..a282225ac2 100644 --- a/discovery/digitalocean/digitalocean_test.go +++ b/discovery/digitalocean/digitalocean_test.go @@ -19,9 +19,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -57,7 +57,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/discovery.go b/discovery/discovery.go index a91faf6c86..c400de3632 100644 --- a/discovery/discovery.go +++ b/discovery/discovery.go @@ -15,9 +15,9 @@ package discovery import ( "context" + "log/slog" "reflect" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -47,7 +47,7 @@ type DiscovererMetrics interface { // DiscovererOptions provides options for a Discoverer. type DiscovererOptions struct { - Logger log.Logger + Logger *slog.Logger Metrics DiscovererMetrics @@ -109,7 +109,7 @@ func (c *Configs) SetDirectory(dir string) { // UnmarshalYAML implements yaml.Unmarshaler. func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error { - cfgTyp := getConfigType(configsType) + cfgTyp := reflect.StructOf(configFields) cfgPtr := reflect.New(cfgTyp) cfgVal := cfgPtr.Elem() @@ -124,7 +124,7 @@ func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error { // MarshalYAML implements yaml.Marshaler. func (c Configs) MarshalYAML() (interface{}, error) { - cfgTyp := getConfigType(configsType) + cfgTyp := reflect.StructOf(configFields) cfgPtr := reflect.New(cfgTyp) cfgVal := cfgPtr.Elem() diff --git a/util/testutil/logging.go b/discovery/discovery_test.go similarity index 57% rename from util/testutil/logging.go rename to discovery/discovery_test.go index db096ea234..af327195f2 100644 --- a/util/testutil/logging.go +++ b/discovery/discovery_test.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Prometheus Authors +// Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -11,25 +11,26 @@ // See the License for the specific language governing permissions and // limitations under the License. -package testutil +package discovery import ( "testing" - "github.com/go-kit/log" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" ) -type logger struct { - t *testing.T -} +func TestConfigsCustomUnMarshalMarshal(t *testing.T) { + input := `static_configs: +- targets: + - foo:1234 + - bar:4321 +` + cfg := &Configs{} + err := yaml.UnmarshalStrict([]byte(input), cfg) + require.NoError(t, err) -// NewLogger returns a gokit compatible Logger which calls t.Log. -func NewLogger(t *testing.T) log.Logger { - return logger{t: t} -} - -// Log implements log.Logger. -func (t logger) Log(keyvals ...interface{}) error { - t.t.Log(keyvals...) - return nil + output, err := yaml.Marshal(cfg) + require.NoError(t, err) + require.Equal(t, input, string(output)) } diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index 314c3d38cd..405dba44f7 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/refresh" @@ -111,21 +111,21 @@ type Discovery struct { names []string port int qtype uint16 - logger log.Logger + logger *slog.Logger metrics *dnsMetrics - lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) + lookupFn func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dnsMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } qtype := dns.TypeSRV @@ -174,7 +174,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { for _, name := range d.names { go func(n string) { if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) { - level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err) + d.logger.Error("Error refreshing DNS targets", "err", err) } wg.Done() }(name) @@ -238,7 +238,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ // CNAME responses can occur with "Type: A" dns_sd_config requests. continue default: - level.Warn(d.logger).Log("msg", "Invalid record", "record", record) + d.logger.Warn("Invalid record", "record", record) continue } tg.Targets = append(tg.Targets, model.LabelSet{ @@ -288,7 +288,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ // error will be generic-looking, because trying to return all the errors // returned by the combination of all name permutations and servers is a // nightmare. -func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { +func lookupWithSearchPath(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { conf, err := dns.ClientConfigFromFile(resolvConf) if err != nil { return nil, fmt.Errorf("could not load resolv.conf: %w", err) @@ -337,14 +337,14 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms // A non-viable answer is "anything else", which encompasses both various // system-level problems (like network timeouts) and also // valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc). -func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) { +func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger *slog.Logger) (*dns.Msg, error) { client := &dns.Client{} for _, server := range conf.Servers { servAddr := net.JoinHostPort(server, conf.Port) msg, err := askServerForName(name, qtype, client, servAddr, true) if err != nil { - level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err) + logger.Warn("DNS resolution failed", "server", server, "name", name, "err", err) continue } diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go index 33a976827d..f01a075c45 100644 --- a/discovery/dns/dns_test.go +++ b/discovery/dns/dns_test.go @@ -15,12 +15,12 @@ package dns import ( "context" - "fmt" + "errors" + "log/slog" "net" "testing" "time" - "github.com/go-kit/log" "github.com/miekg/dns" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -40,7 +40,7 @@ func TestDNS(t *testing.T) { testCases := []struct { name string config SDConfig - lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) + lookup func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) expected []*targetgroup.Group }{ @@ -52,8 +52,8 @@ func TestDNS(t *testing.T) { Port: 80, Type: "A", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { - return nil, fmt.Errorf("some error") + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { + return nil, errors.New("some error") }, expected: []*targetgroup.Group{}, }, @@ -65,7 +65,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "A", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.A{A: net.IPv4(192, 0, 2, 2)}, @@ -97,7 +97,7 @@ func TestDNS(t *testing.T) { Port: 80, Type: "AAAA", }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.AAAA{AAAA: net.IPv6loopback}, @@ -128,7 +128,7 @@ func TestDNS(t *testing.T) { Type: "SRV", RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, @@ -167,7 +167,7 @@ func TestDNS(t *testing.T) { Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.SRV{Port: 3306, Target: "db1.example.com."}, @@ -198,7 +198,7 @@ func TestDNS(t *testing.T) { Names: []string{"_mysql._tcp.db.example.com."}, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{}, nil }, expected: []*targetgroup.Group{ @@ -215,7 +215,7 @@ func TestDNS(t *testing.T) { Port: 25, RefreshInterval: model.Duration(time.Minute), }, - lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { + lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) { return &dns.Msg{ Answer: []dns.RR{ &dns.MX{Preference: 0, Mx: "smtp1.example.com."}, diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go index 779c081aee..3cac667f85 100644 --- a/discovery/eureka/eureka.go +++ b/discovery/eureka/eureka.go @@ -16,14 +16,13 @@ package eureka import ( "context" "errors" - "fmt" + "log/slog" "net" "net/http" "net/url" "strconv" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -126,10 +125,10 @@ type Discovery struct { } // NewDiscovery creates a new Eureka discovery for the given role. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*eurekaMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd") diff --git a/discovery/file/file.go b/discovery/file/file.go index e7e9d0870f..beea03222b 100644 --- a/discovery/file/file.go +++ b/discovery/file/file.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" @@ -26,12 +27,11 @@ import ( "time" "github.com/fsnotify/fsnotify" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" @@ -175,20 +175,20 @@ type Discovery struct { // and how many target groups they contained. // This is used to detect deleted target groups. lastRefresh map[string]int - logger log.Logger + logger *slog.Logger metrics *fileMetrics } // NewDiscovery returns a new file discovery for the given paths. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { fm, ok := metrics.(*fileMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } disc := &Discovery{ @@ -210,7 +210,7 @@ func (d *Discovery) listFiles() []string { for _, p := range d.paths { files, err := filepath.Glob(p) if err != nil { - level.Error(d.logger).Log("msg", "Error expanding glob", "glob", p, "err", err) + d.logger.Error("Error expanding glob", "glob", p, "err", err) continue } paths = append(paths, files...) @@ -231,7 +231,7 @@ func (d *Discovery) watchFiles() { p = "./" } if err := d.watcher.Add(p); err != nil { - level.Error(d.logger).Log("msg", "Error adding file watch", "path", p, "err", err) + d.logger.Error("Error adding file watch", "path", p, "err", err) } } } @@ -240,7 +240,7 @@ func (d *Discovery) watchFiles() { func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { watcher, err := fsnotify.NewWatcher() if err != nil { - level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err) + d.logger.Error("Error adding file watcher", "err", err) d.metrics.fileWatcherErrorsCount.Inc() return } @@ -280,7 +280,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { case err := <-d.watcher.Errors: if err != nil { - level.Error(d.logger).Log("msg", "Error watching file", "err", err) + d.logger.Error("Error watching file", "err", err) } } } @@ -300,7 +300,7 @@ func (d *Discovery) deleteTimestamp(filename string) { // stop shuts down the file watcher. func (d *Discovery) stop() { - level.Debug(d.logger).Log("msg", "Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) + d.logger.Debug("Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths)) done := make(chan struct{}) defer close(done) @@ -320,10 +320,10 @@ func (d *Discovery) stop() { } }() if err := d.watcher.Close(); err != nil { - level.Error(d.logger).Log("msg", "Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) + d.logger.Error("Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err) } - level.Debug(d.logger).Log("msg", "File discovery stopped") + d.logger.Debug("File discovery stopped") } // refresh reads all files matching the discovery's patterns and sends the respective @@ -339,7 +339,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) if err != nil { d.metrics.fileSDReadErrorsCount.Inc() - level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err) + d.logger.Error("Error reading file", "path", p, "err", err) // Prevent deletion down below. ref[p] = d.lastRefresh[p] continue @@ -356,7 +356,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) for f, n := range d.lastRefresh { m, ok := ref[f] if !ok || n > m { - level.Debug(d.logger).Log("msg", "file_sd refresh found file that should be removed", "file", f) + d.logger.Debug("file_sd refresh found file that should be removed", "file", f) d.deleteTimestamp(f) for i := m; i < n; i++ { select { diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go index 15f32dd247..9a5b0e856e 100644 --- a/discovery/gce/gce.go +++ b/discovery/gce/gce.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "golang.org/x/oauth2/google" @@ -129,10 +129,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*gceMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go index df56f94c5f..ba64250c0f 100644 --- a/discovery/hetzner/hcloud.go +++ b/discovery/hetzner/hcloud.go @@ -15,12 +15,12 @@ package hetzner import ( "context" + "log/slog" "net" "net/http" "strconv" "time" - "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -58,7 +58,7 @@ type hcloudDiscovery struct { } // newHcloudDiscovery returns a new hcloudDiscovery which periodically refreshes its targets. -func newHcloudDiscovery(conf *SDConfig, _ log.Logger) (*hcloudDiscovery, error) { +func newHcloudDiscovery(conf *SDConfig, _ *slog.Logger) (*hcloudDiscovery, error) { d := &hcloudDiscovery{ port: conf.Port, } diff --git a/discovery/hetzner/hcloud_test.go b/discovery/hetzner/hcloud_test.go index 10b799037a..fa8291625a 100644 --- a/discovery/hetzner/hcloud_test.go +++ b/discovery/hetzner/hcloud_test.go @@ -18,8 +18,8 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -43,7 +43,7 @@ func TestHCloudSDRefresh(t *testing.T) { cfg.HTTPClientConfig.BearerToken = hcloudTestToken cfg.hcloudEndpoint = suite.Mock.Endpoint() - d, err := newHcloudDiscovery(&cfg, log.NewNopLogger()) + d, err := newHcloudDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go index 69c823d382..02e2272999 100644 --- a/discovery/hetzner/hetzner.go +++ b/discovery/hetzner/hetzner.go @@ -17,9 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "time" - "github.com/go-kit/log" "github.com/hetznercloud/hcloud-go/v2/hcloud" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -135,10 +135,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*hetznerMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf, logger) @@ -157,7 +157,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere ), nil } -func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) { switch conf.Role { case HetznerRoleHcloud: if conf.hcloudEndpoint == "" { diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go index 516470b05a..958f8f710f 100644 --- a/discovery/hetzner/robot.go +++ b/discovery/hetzner/robot.go @@ -18,13 +18,13 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -51,7 +51,7 @@ type robotDiscovery struct { } // newRobotDiscovery returns a new robotDiscovery which periodically refreshes its targets. -func newRobotDiscovery(conf *SDConfig, _ log.Logger) (*robotDiscovery, error) { +func newRobotDiscovery(conf *SDConfig, _ *slog.Logger) (*robotDiscovery, error) { d := &robotDiscovery{ port: conf.Port, endpoint: conf.robotEndpoint, diff --git a/discovery/hetzner/robot_test.go b/discovery/hetzner/robot_test.go index abee5fea90..2618bd097c 100644 --- a/discovery/hetzner/robot_test.go +++ b/discovery/hetzner/robot_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -42,7 +42,7 @@ func TestRobotSDRefresh(t *testing.T) { cfg.HTTPClientConfig.BasicAuth = &config.BasicAuth{Username: robotTestUsername, Password: robotTestPassword} cfg.robotEndpoint = suite.Mock.Endpoint() - d, err := newRobotDiscovery(&cfg, log.NewNopLogger()) + d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) @@ -91,12 +91,11 @@ func TestRobotSDRefreshHandleError(t *testing.T) { cfg := DefaultSDConfig cfg.robotEndpoint = suite.Mock.Endpoint() - d, err := newRobotDiscovery(&cfg, log.NewNopLogger()) + d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger()) require.NoError(t, err) targetGroups, err := d.refresh(context.Background()) - require.Error(t, err) - require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error()) + require.EqualError(t, err, "non 2xx status '401' response during hetzner service discovery with role robot") require.Empty(t, targetGroups) } diff --git a/discovery/http/http.go b/discovery/http/http.go index ff76fd7627..667fd36f6c 100644 --- a/discovery/http/http.go +++ b/discovery/http/http.go @@ -19,17 +19,18 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "net/url" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/discovery" @@ -40,8 +41,8 @@ import ( var ( // DefaultSDConfig is the default HTTP SD configuration. DefaultSDConfig = SDConfig{ - RefreshInterval: model.Duration(60 * time.Second), HTTPClientConfig: config.DefaultHTTPClientConfig, + RefreshInterval: model.Duration(60 * time.Second), } userAgent = fmt.Sprintf("Prometheus/%s", version.Version) matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`) @@ -85,17 +86,17 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if c.URL == "" { - return fmt.Errorf("URL is missing") + return errors.New("URL is missing") } parsedURL, err := url.Parse(c.URL) if err != nil { return err } if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { - return fmt.Errorf("URL scheme must be 'http' or 'https'") + return errors.New("URL scheme must be 'http' or 'https'") } if parsedURL.Host == "" { - return fmt.Errorf("host is missing in URL") + return errors.New("host is missing in URL") } return c.HTTPClientConfig.Validate() } @@ -114,14 +115,14 @@ type Discovery struct { } // NewDiscovery returns a new HTTP discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*httpMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http", clientOpts...) diff --git a/discovery/http/http_test.go b/discovery/http/http_test.go index 0cafe035dc..9d3a3fb5e7 100644 --- a/discovery/http/http_test.go +++ b/discovery/http/http_test.go @@ -21,11 +21,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -49,7 +49,7 @@ func TestHTTPValidRefresh(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -94,7 +94,7 @@ func TestHTTPInvalidCode(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -123,7 +123,7 @@ func TestHTTPInvalidFormat(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) ctx := context.Background() @@ -442,7 +442,7 @@ func TestSourceDisappeared(t *testing.T) { require.NoError(t, metrics.Register()) defer metrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics) require.NoError(t, err) for _, test := range cases { ctx := context.Background() diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go index c8b4f7f8e5..1badda48cb 100644 --- a/discovery/ionos/ionos.go +++ b/discovery/ionos/ionos.go @@ -15,10 +15,9 @@ package ionos import ( "errors" - "fmt" + "log/slog" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -43,10 +42,10 @@ func init() { type Discovery struct{} // NewDiscovery returns a new refresh.Discovery for IONOS Cloud. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ionosMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if conf.ionosEndpoint == "" { diff --git a/discovery/ionos/server.go b/discovery/ionos/server.go index a850fbbfb4..18e89b1d43 100644 --- a/discovery/ionos/server.go +++ b/discovery/ionos/server.go @@ -16,13 +16,13 @@ package ionos import ( "context" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" ionoscloud "github.com/ionos-cloud/sdk-go/v6" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -60,7 +60,7 @@ type serverDiscovery struct { datacenterID string } -func newServerDiscovery(conf *SDConfig, _ log.Logger) (*serverDiscovery, error) { +func newServerDiscovery(conf *SDConfig, _ *slog.Logger) (*serverDiscovery, error) { d := &serverDiscovery{ port: conf.Port, datacenterID: conf.DatacenterID, diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go index c7a60ae6d3..14d3bc7a99 100644 --- a/discovery/kubernetes/endpoints.go +++ b/discovery/kubernetes/endpoints.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,7 +33,7 @@ import ( // Endpoints discovers new endpoint targets. type Endpoints struct { - logger log.Logger + logger *slog.Logger endpointsInf cache.SharedIndexInformer serviceInf cache.SharedInformer @@ -49,9 +49,9 @@ type Endpoints struct { } // NewEndpoints returns a new endpoints discovery. -func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { +func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd) @@ -92,26 +92,23 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding endpoints event handler.", "err", err) + l.Error("Error adding endpoints event handler.", "err", err) } serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) + e.logger.Error("converting to Service object failed", "err", err) return } - ep := &apiv1.Endpoints{} - ep.Namespace = svc.Namespace - ep.Name = svc.Name - obj, exists, err := e.endpointsStore.Get(ep) + obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name)) if exists && err == nil { e.enqueue(obj.(*apiv1.Endpoints)) } if err != nil { - level.Error(e.logger).Log("msg", "retrieving endpoints failed", "err", err) + e.logger.Error("retrieving endpoints failed", "err", err) } } _, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -131,7 +128,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } _, err = e.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: func(old, cur interface{}) { @@ -154,7 +151,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } if e.withNodeMetadata { _, err = e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -167,12 +164,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } } @@ -182,7 +182,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca func (e *Endpoints) enqueueNode(nodeName string) { endpoints, err := e.endpointsInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err) return } @@ -194,7 +194,7 @@ func (e *Endpoints) enqueueNode(nodeName string) { func (e *Endpoints) enqueuePod(podNamespacedName string) { endpoints, err := e.endpointsInf.GetIndexer().ByIndex(podIndex, podNamespacedName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for pod", "pod", podNamespacedName, "err", err) + e.logger.Error("Error getting endpoints for pod", "pod", podNamespacedName, "err", err) return } @@ -223,7 +223,7 @@ func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache") + e.logger.Error("endpoints informer unable to sync cache") } return } @@ -247,13 +247,13 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - level.Error(e.logger).Log("msg", "splitting key failed", "key", key) + e.logger.Error("splitting key failed", "key", key) return true } o, exists, err := e.endpointsStore.GetByKey(key) if err != nil { - level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) + e.logger.Error("getting object from store failed", "key", key) return true } if !exists { @@ -262,7 +262,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group) } eps, err := convertToEndpoints(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Endpoints object failed", "err", err) + e.logger.Error("converting to Endpoints object failed", "err", err) return true } send(ctx, ch, e.buildEndpoints(eps)) @@ -361,16 +361,19 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. - for _, c := range pod.Spec.Containers { + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { if port.Port == cport.ContainerPort { ports := strconv.FormatUint(uint64(port.Port), 10) + isInit := i >= len(pod.Spec.Containers) target[podContainerNameLabel] = lv(c.Name) target[podContainerImageLabel] = lv(c.Image) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(port.Protocol)) + target[podContainerIsInit] = lv(strconv.FormatBool(isInit)) break } } @@ -397,10 +400,10 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { v := eps.Labels[apiv1.EndpointsOverCapacity] if v == "truncated" { - level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name) + e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name) } if v == "warning" { - level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name) + e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name) } // For all seen pods, check all container ports. If they were not covered @@ -411,7 +414,8 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { continue } - for _, c := range pe.pod.Spec.Containers { + containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { @@ -428,6 +432,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) + isInit := i >= len(pe.pod.Spec.Containers) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), @@ -435,6 +440,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group { podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), + podContainerIsInit: lv(strconv.FormatBool(isInit)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } @@ -448,13 +454,10 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { - level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) + e.logger.Error("resolving pod ref failed", "err", err) return nil } if !exists { @@ -464,31 +467,27 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { } func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) { - svc := &apiv1.Service{} - svc.Namespace = ns - svc.Name = name - - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { - level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) + e.logger.Error("retrieving service failed", "err", err) return } if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } -func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.Logger, nodeName *string) model.LabelSet { +func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger *slog.Logger, nodeName *string) model.LabelSet { if nodeName == nil { return tg } obj, exists, err := nodeInf.GetStore().GetByKey(*nodeName) if err != nil { - level.Error(logger).Log("msg", "Error getting node", "node", *nodeName, "err", err) + logger.Error("Error getting node", "node", *nodeName, "err", err) return tg } diff --git a/discovery/kubernetes/endpoints_test.go b/discovery/kubernetes/endpoints_test.go index 3ea98c5db9..a1ac6e5d48 100644 --- a/discovery/kubernetes/endpoints_test.go +++ b/discovery/kubernetes/endpoints_test.go @@ -18,10 +18,12 @@ import ( "testing" "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -244,6 +246,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", @@ -259,6 +262,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9001", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -821,6 +825,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1078,6 +1083,7 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) { "__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1089,3 +1095,186 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) { }, }.Run(t) } + +func TestEndpointsDiscoverySidecarContainer(t *testing.T) { + objs := []runtime.Object{ + &v1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testsidecar", + Namespace: "default", + }, + Subsets: []v1.EndpointSubset{ + { + Addresses: []v1.EndpointAddress{ + { + IP: "4.3.2.1", + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "default", + }, + }, + }, + Ports: []v1.EndpointPort{ + { + Name: "testport", + Port: 9000, + Protocol: v1.ProtocolTCP, + }, + { + Name: "initport", + Port: 9111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + }, + &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: "default", + UID: types.UID("deadbeef"), + }, + Spec: v1.PodSpec{ + NodeName: "testnode", + InitContainers: []v1.Container{ + { + Name: "ic1", + Image: "ic1:latest", + Ports: []v1.ContainerPort{ + { + Name: "initport", + ContainerPort: 1111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + { + Name: "ic2", + Image: "ic2:latest", + Ports: []v1.ContainerPort{ + { + Name: "initport", + ContainerPort: 9111, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + Containers: []v1.Container{ + { + Name: "c1", + Image: "c1:latest", + Ports: []v1.ContainerPort{ + { + Name: "mainport", + ContainerPort: 9000, + Protocol: v1.ProtocolTCP, + }, + }, + }, + }, + }, + Status: v1.PodStatus{ + HostIP: "2.3.4.5", + PodIP: "4.3.2.1", + }, + }, + } + + n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpoints/default/testsidecar": { + Targets: []model.LabelSet{ + { + "__address__": "4.3.2.1:9000", + "__meta_kubernetes_endpoint_address_target_kind": "Pod", + "__meta_kubernetes_endpoint_address_target_name": "testpod", + "__meta_kubernetes_endpoint_port_name": "testport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_pod_container_image": "c1:latest", + "__meta_kubernetes_pod_container_name": "c1", + "__meta_kubernetes_pod_container_port_name": "mainport", + "__meta_kubernetes_pod_container_port_number": "9000", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", + }, + { + "__address__": "4.3.2.1:9111", + "__meta_kubernetes_endpoint_address_target_kind": "Pod", + "__meta_kubernetes_endpoint_address_target_name": "testpod", + "__meta_kubernetes_endpoint_port_name": "initport", + "__meta_kubernetes_endpoint_port_protocol": "TCP", + "__meta_kubernetes_endpoint_ready": "true", + "__meta_kubernetes_pod_container_image": "ic2:latest", + "__meta_kubernetes_pod_container_name": "ic2", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "9111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + { + "__address__": "4.3.2.1:1111", + "__meta_kubernetes_pod_container_image": "ic1:latest", + "__meta_kubernetes_pod_container_name": "ic1", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "1111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpoints_name": "testsidecar", + "__meta_kubernetes_namespace": "default", + }, + Source: "endpoints/default/testsidecar", + }, + }, + }.Run(t) +} + +func BenchmarkResolvePodRef(b *testing.B) { + indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil) + e := &Endpoints{ + podStore: indexer, + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + p := e.resolvePodRef(&v1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "foo", + }) + require.Nil(b, p) + } +} diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go index 7a70255c12..45bc43eff9 100644 --- a/discovery/kubernetes/endpointslice.go +++ b/discovery/kubernetes/endpointslice.go @@ -17,16 +17,15 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -36,7 +35,7 @@ import ( // EndpointSlice discovers new endpoint targets. type EndpointSlice struct { - logger log.Logger + logger *slog.Logger endpointSliceInf cache.SharedIndexInformer serviceInf cache.SharedInformer @@ -52,9 +51,9 @@ type EndpointSlice struct { } // NewEndpointSlice returns a new endpointslice discovery. -func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { +func NewEndpointSlice(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd) @@ -93,23 +92,23 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod }, }) if err != nil { - level.Error(l).Log("msg", "Error adding endpoint slices event handler.", "err", err) + l.Error("Error adding endpoint slices event handler.", "err", err) } serviceUpdate := func(o interface{}) { svc, err := convertToService(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err) + e.logger.Error("converting to Service object failed", "err", err) return } - // TODO(brancz): use cache.Indexer to index endpoints by - // disv1beta1.LabelServiceName so this operation doesn't have to - // iterate over all endpoint objects. + // TODO(brancz): use cache.Indexer to index endpointslices by + // LabelServiceName so this operation doesn't have to iterate over all + // endpoint objects. for _, obj := range e.endpointSliceStore.List() { esa, err := e.getEndpointSliceAdaptor(obj) if err != nil { - level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err) + e.logger.Error("converting to EndpointSlice object failed", "err", err) continue } if lv, exists := esa.labels()[esa.labelServiceName()]; exists && lv == svc.Name { @@ -132,7 +131,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } if e.withNodeMetadata { @@ -146,12 +145,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod e.enqueueNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - e.enqueueNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + e.enqueueNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } } @@ -161,7 +163,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod func (e *EndpointSlice) enqueueNode(nodeName string) { endpoints, err := e.endpointSliceInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err) + e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err) return } @@ -189,7 +191,7 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group) } if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache") + e.logger.Error("endpointslice informer unable to sync cache") } return } @@ -213,13 +215,13 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr namespace, name, err := cache.SplitMetaNamespaceKey(key) if err != nil { - level.Error(e.logger).Log("msg", "splitting key failed", "key", key) + e.logger.Error("splitting key failed", "key", key) return true } o, exists, err := e.endpointSliceStore.GetByKey(key) if err != nil { - level.Error(e.logger).Log("msg", "getting object from store failed", "key", key) + e.logger.Error("getting object from store failed", "key", key) return true } if !exists { @@ -229,7 +231,7 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr esa, err := e.getEndpointSliceAdaptor(o) if err != nil { - level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err) + e.logger.Error("converting to EndpointSlice object failed", "err", err) return true } @@ -241,8 +243,6 @@ func (e *EndpointSlice) getEndpointSliceAdaptor(o interface{}) (endpointSliceAda switch endpointSlice := o.(type) { case *v1.EndpointSlice: return newEndpointSliceAdaptorFromV1(endpointSlice), nil - case *v1beta1.EndpointSlice: - return newEndpointSliceAdaptorFromV1beta1(endpointSlice), nil default: return nil, fmt.Errorf("received unexpected object: %v", o) } @@ -380,19 +380,23 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou target = target.Merge(podLabels(pod)) // Attach potential container port labels matching the endpoint port. - for _, c := range pod.Spec.Containers { + containers := append(pod.Spec.Containers, pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { if port.port() == nil { continue } + if *port.port() == cport.ContainerPort { ports := strconv.FormatUint(uint64(*port.port()), 10) + isInit := i >= len(pod.Spec.Containers) target[podContainerNameLabel] = lv(c.Name) target[podContainerImageLabel] = lv(c.Image) target[podContainerPortNameLabel] = lv(cport.Name) target[podContainerPortNumberLabel] = lv(ports) target[podContainerPortProtocolLabel] = lv(string(cport.Protocol)) + target[podContainerIsInit] = lv(strconv.FormatBool(isInit)) break } } @@ -420,7 +424,8 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou continue } - for _, c := range pe.pod.Spec.Containers { + containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...) + for i, c := range containers { for _, cport := range c.Ports { hasSeenPort := func() bool { for _, eport := range pe.servicePorts { @@ -440,6 +445,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10)) ports := strconv.FormatUint(uint64(cport.ContainerPort), 10) + isInit := i >= len(pe.pod.Spec.Containers) target := model.LabelSet{ model.AddressLabel: lv(a), podContainerNameLabel: lv(c.Name), @@ -447,6 +453,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou podContainerPortNameLabel: lv(cport.Name), podContainerPortNumberLabel: lv(ports), podContainerPortProtocolLabel: lv(string(cport.Protocol)), + podContainerIsInit: lv(strconv.FormatBool(isInit)), } tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod))) } @@ -460,13 +467,10 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { if ref == nil || ref.Kind != "Pod" { return nil } - p := &apiv1.Pod{} - p.Namespace = ref.Namespace - p.Name = ref.Name - obj, exists, err := e.podStore.Get(p) + obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name)) if err != nil { - level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) + e.logger.Error("resolving pod ref failed", "err", err) return nil } if !exists { @@ -477,27 +481,27 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod { func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) { var ( - svc = &apiv1.Service{} found bool + name string ) - svc.Namespace = esa.namespace() + ns := esa.namespace() // Every EndpointSlice object has the Service they belong to in the // kubernetes.io/service-name label. - svc.Name, found = esa.labels()[esa.labelServiceName()] + name, found = esa.labels()[esa.labelServiceName()] if !found { return } - obj, exists, err := e.serviceStore.Get(svc) + obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name)) if err != nil { - level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) + e.logger.Error("retrieving service failed", "err", err) return } if !exists { return } - svc = obj.(*apiv1.Service) + svc := obj.(*apiv1.Service) tg.Labels = tg.Labels.Merge(serviceLabels(svc)) } diff --git a/discovery/kubernetes/endpointslice_adaptor.go b/discovery/kubernetes/endpointslice_adaptor.go index edd64fcb32..81243e2ce0 100644 --- a/discovery/kubernetes/endpointslice_adaptor.go +++ b/discovery/kubernetes/endpointslice_adaptor.go @@ -16,7 +16,6 @@ package kubernetes import ( corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -109,59 +108,6 @@ func (e *endpointSliceAdaptorV1) labelServiceName() string { return v1.LabelServiceName } -// Adaptor for k8s.io/api/discovery/v1beta1. -type endpointSliceAdaptorV1Beta1 struct { - endpointSlice *v1beta1.EndpointSlice -} - -func newEndpointSliceAdaptorFromV1beta1(endpointSlice *v1beta1.EndpointSlice) endpointSliceAdaptor { - return &endpointSliceAdaptorV1Beta1{endpointSlice: endpointSlice} -} - -func (e *endpointSliceAdaptorV1Beta1) get() interface{} { - return e.endpointSlice -} - -func (e *endpointSliceAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { - return e.endpointSlice.ObjectMeta -} - -func (e *endpointSliceAdaptorV1Beta1) name() string { - return e.endpointSlice.Name -} - -func (e *endpointSliceAdaptorV1Beta1) namespace() string { - return e.endpointSlice.Namespace -} - -func (e *endpointSliceAdaptorV1Beta1) addressType() string { - return string(e.endpointSlice.AddressType) -} - -func (e *endpointSliceAdaptorV1Beta1) endpoints() []endpointSliceEndpointAdaptor { - eps := make([]endpointSliceEndpointAdaptor, 0, len(e.endpointSlice.Endpoints)) - for i := 0; i < len(e.endpointSlice.Endpoints); i++ { - eps = append(eps, newEndpointSliceEndpointAdaptorFromV1beta1(e.endpointSlice.Endpoints[i])) - } - return eps -} - -func (e *endpointSliceAdaptorV1Beta1) ports() []endpointSlicePortAdaptor { - ports := make([]endpointSlicePortAdaptor, 0, len(e.endpointSlice.Ports)) - for i := 0; i < len(e.endpointSlice.Ports); i++ { - ports = append(ports, newEndpointSlicePortAdaptorFromV1beta1(e.endpointSlice.Ports[i])) - } - return ports -} - -func (e *endpointSliceAdaptorV1Beta1) labels() map[string]string { - return e.endpointSlice.Labels -} - -func (e *endpointSliceAdaptorV1Beta1) labelServiceName() string { - return v1beta1.LabelServiceName -} - type endpointSliceEndpointAdaptorV1 struct { endpoint v1.Endpoint } @@ -218,62 +164,6 @@ func (e *endpointSliceEndpointConditionsAdaptorV1) terminating() *bool { return e.endpointConditions.Terminating } -type endpointSliceEndpointAdaptorV1beta1 struct { - endpoint v1beta1.Endpoint -} - -func newEndpointSliceEndpointAdaptorFromV1beta1(endpoint v1beta1.Endpoint) endpointSliceEndpointAdaptor { - return &endpointSliceEndpointAdaptorV1beta1{endpoint: endpoint} -} - -func (e *endpointSliceEndpointAdaptorV1beta1) addresses() []string { - return e.endpoint.Addresses -} - -func (e *endpointSliceEndpointAdaptorV1beta1) hostname() *string { - return e.endpoint.Hostname -} - -func (e *endpointSliceEndpointAdaptorV1beta1) nodename() *string { - return e.endpoint.NodeName -} - -func (e *endpointSliceEndpointAdaptorV1beta1) zone() *string { - return nil -} - -func (e *endpointSliceEndpointAdaptorV1beta1) conditions() endpointSliceEndpointConditionsAdaptor { - return newEndpointSliceEndpointConditionsAdaptorFromV1beta1(e.endpoint.Conditions) -} - -func (e *endpointSliceEndpointAdaptorV1beta1) targetRef() *corev1.ObjectReference { - return e.endpoint.TargetRef -} - -func (e *endpointSliceEndpointAdaptorV1beta1) topology() map[string]string { - return e.endpoint.Topology -} - -type endpointSliceEndpointConditionsAdaptorV1beta1 struct { - endpointConditions v1beta1.EndpointConditions -} - -func newEndpointSliceEndpointConditionsAdaptorFromV1beta1(endpointConditions v1beta1.EndpointConditions) endpointSliceEndpointConditionsAdaptor { - return &endpointSliceEndpointConditionsAdaptorV1beta1{endpointConditions: endpointConditions} -} - -func (e *endpointSliceEndpointConditionsAdaptorV1beta1) ready() *bool { - return e.endpointConditions.Ready -} - -func (e *endpointSliceEndpointConditionsAdaptorV1beta1) serving() *bool { - return e.endpointConditions.Serving -} - -func (e *endpointSliceEndpointConditionsAdaptorV1beta1) terminating() *bool { - return e.endpointConditions.Terminating -} - type endpointSlicePortAdaptorV1 struct { endpointPort v1.EndpointPort } @@ -298,28 +188,3 @@ func (e *endpointSlicePortAdaptorV1) protocol() *string { func (e *endpointSlicePortAdaptorV1) appProtocol() *string { return e.endpointPort.AppProtocol } - -type endpointSlicePortAdaptorV1beta1 struct { - endpointPort v1beta1.EndpointPort -} - -func newEndpointSlicePortAdaptorFromV1beta1(port v1beta1.EndpointPort) endpointSlicePortAdaptor { - return &endpointSlicePortAdaptorV1beta1{endpointPort: port} -} - -func (e *endpointSlicePortAdaptorV1beta1) name() *string { - return e.endpointPort.Name -} - -func (e *endpointSlicePortAdaptorV1beta1) port() *int32 { - return e.endpointPort.Port -} - -func (e *endpointSlicePortAdaptorV1beta1) protocol() *string { - val := string(*e.endpointPort.Protocol) - return &val -} - -func (e *endpointSlicePortAdaptorV1beta1) appProtocol() *string { - return e.endpointPort.AppProtocol -} diff --git a/discovery/kubernetes/endpointslice_adaptor_test.go b/discovery/kubernetes/endpointslice_adaptor_test.go index 1ee3337193..de33c64b66 100644 --- a/discovery/kubernetes/endpointslice_adaptor_test.go +++ b/discovery/kubernetes/endpointslice_adaptor_test.go @@ -18,7 +18,6 @@ import ( "github.com/stretchr/testify/require" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" ) func Test_EndpointSliceAdaptor_v1(t *testing.T) { @@ -48,31 +47,3 @@ func Test_EndpointSliceAdaptor_v1(t *testing.T) { require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol()) } } - -func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) { - endpointSlice := makeEndpointSliceV1beta1() - adaptor := newEndpointSliceAdaptorFromV1beta1(endpointSlice) - - require.Equal(t, endpointSlice.ObjectMeta.Name, adaptor.name()) - require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace()) - require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType())) - require.Equal(t, endpointSlice.Labels, adaptor.labels()) - require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName]) - - for i, endpointAdaptor := range adaptor.endpoints() { - require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses()) - require.Equal(t, endpointSlice.Endpoints[i].Hostname, endpointAdaptor.hostname()) - require.Equal(t, endpointSlice.Endpoints[i].Conditions.Ready, endpointAdaptor.conditions().ready()) - require.Equal(t, endpointSlice.Endpoints[i].Conditions.Serving, endpointAdaptor.conditions().serving()) - require.Equal(t, endpointSlice.Endpoints[i].Conditions.Terminating, endpointAdaptor.conditions().terminating()) - require.Equal(t, endpointSlice.Endpoints[i].TargetRef, endpointAdaptor.targetRef()) - require.Equal(t, endpointSlice.Endpoints[i].Topology, endpointAdaptor.topology()) - } - - for i, portAdaptor := range adaptor.ports() { - require.Equal(t, endpointSlice.Ports[i].Name, portAdaptor.name()) - require.Equal(t, endpointSlice.Ports[i].Port, portAdaptor.port()) - require.EqualValues(t, endpointSlice.Ports[i].Protocol, portAdaptor.protocol()) - require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol()) - } -} diff --git a/discovery/kubernetes/endpointslice_test.go b/discovery/kubernetes/endpointslice_test.go index 6ef83081be..cc92c7ddaa 100644 --- a/discovery/kubernetes/endpointslice_test.go +++ b/discovery/kubernetes/endpointslice_test.go @@ -21,7 +21,6 @@ import ( "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/discovery/v1" - "k8s.io/api/discovery/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -114,62 +113,8 @@ func makeEndpointSliceV1() *v1.EndpointSlice { } } -func makeEndpointSliceV1beta1() *v1beta1.EndpointSlice { - return &v1beta1.EndpointSlice{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testendpoints", - Namespace: "default", - Labels: map[string]string{ - v1beta1.LabelServiceName: "testendpoints", - }, - Annotations: map[string]string{ - "test.annotation": "test", - }, - }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ - { - Name: strptr("testport"), - Port: int32ptr(9000), - Protocol: protocolptr(corev1.ProtocolTCP), - }, - }, - Endpoints: []v1beta1.Endpoint{ - { - Addresses: []string{"1.2.3.4"}, - Hostname: strptr("testendpoint1"), - }, { - Addresses: []string{"2.3.4.5"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(true), - Serving: boolptr(true), - Terminating: boolptr(false), - }, - }, { - Addresses: []string{"3.4.5.6"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(false), - Serving: boolptr(true), - Terminating: boolptr(true), - }, - }, { - Addresses: []string{"4.5.6.7"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(true), - Serving: boolptr(true), - Terminating: boolptr(false), - }, - TargetRef: &corev1.ObjectReference{ - Kind: "Node", - Name: "barbaz", - }, - }, - }, - } -} - func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.25.0") + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}) k8sDiscoveryTest{ discovery: n, @@ -249,71 +194,6 @@ func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) { }.Run(t) } -func TestEndpointSliceDiscoveryBeforeRunV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "1.20.0") - - k8sDiscoveryTest{ - discovery: n, - beforeRun: func() { - obj := makeEndpointSliceV1beta1() - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: map[string]*targetgroup.Group{ - "endpointslice/default/testendpoints": { - Targets: []model.LabelSet{ - { - "__address__": "1.2.3.4:9000", - "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "2.3.4.5:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - }, - Labels: model.LabelSet{ - "__meta_kubernetes_endpointslice_address_type": "IPv4", - "__meta_kubernetes_namespace": "default", - "__meta_kubernetes_endpointslice_name": "testendpoints", - "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints", - "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", - "__meta_kubernetes_endpointslice_annotation_test_annotation": "test", - "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true", - }, - Source: "endpointslice/default/testendpoints", - }, - }, - }.Run(t) -} - func TestEndpointSliceDiscoveryAdd(t *testing.T) { obj := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -353,25 +233,25 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { PodIP: "1.2.3.4", }, } - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.20.0", obj) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, obj) k8sDiscoveryTest{ discovery: n, afterStart: func() { - obj := &v1beta1.EndpointSlice{ + obj := &v1.EndpointSlice{ ObjectMeta: metav1.ObjectMeta{ Name: "testendpoints", Namespace: "default", }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ + AddressType: v1.AddressTypeIPv4, + Ports: []v1.EndpointPort{ { Name: strptr("testport"), Port: int32ptr(9000), Protocol: protocolptr(corev1.ProtocolTCP), }, }, - Endpoints: []v1beta1.Endpoint{ + Endpoints: []v1.Endpoint{ { Addresses: []string{"4.3.2.1"}, TargetRef: &corev1.ObjectReference{ @@ -379,13 +259,13 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { Name: "testpod", Namespace: "default", }, - Conditions: v1beta1.EndpointConditions{ + Conditions: v1.EndpointConditions{ Ready: boolptr(false), }, }, }, } - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) + c.DiscoveryV1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) }, expectedMaxItems: 1, expectedRes: map[string]*targetgroup.Group{ @@ -411,6 +291,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, { "__address__": "1.2.3.4:9001", @@ -426,6 +307,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -440,118 +322,34 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) { } func TestEndpointSliceDiscoveryDelete(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, afterStart: func() { obj := makeEndpointSliceV1() - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{}) + c.DiscoveryV1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpointslice/default/testendpoints": { Source: "endpointslice/default/testendpoints", - Targets: []model.LabelSet{ - { - "__address__": "1.2.3.4:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "", - "__meta_kubernetes_endpointslice_address_target_name": "", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", - "__meta_kubernetes_endpointslice_endpoint_node_name": "foobar", - "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true", - "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "2.3.4.5:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - }, - Labels: map[model.LabelName]model.LabelValue{ - "__meta_kubernetes_endpointslice_address_type": "IPv4", - "__meta_kubernetes_endpointslice_name": "testendpoints", - "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints", - "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", - "__meta_kubernetes_endpointslice_annotation_test_annotation": "test", - "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true", - "__meta_kubernetes_namespace": "default", - }, }, }, }.Run(t) } func TestEndpointSliceDiscoveryUpdate(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, afterStart: func() { - obj := &v1beta1.EndpointSlice{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testendpoints", - Namespace: "default", - }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ - { - Name: strptr("testport"), - Port: int32ptr(9000), - Protocol: protocolptr(corev1.ProtocolTCP), - }, - }, - Endpoints: []v1beta1.Endpoint{ - { - Addresses: []string{"1.2.3.4"}, - Hostname: strptr("testendpoint1"), - }, { - Addresses: []string{"2.3.4.5"}, - Conditions: v1beta1.EndpointConditions{ - Ready: boolptr(true), - }, - }, - }, - } - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) + obj := makeEndpointSliceV1() + obj.ObjectMeta.Labels = nil + obj.ObjectMeta.Annotations = nil + obj.Endpoints = obj.Endpoints[0:2] + c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ @@ -586,39 +384,11 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) { "__meta_kubernetes_endpointslice_port_name": "testport", "__meta_kubernetes_endpointslice_port_protocol": "TCP", }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, }, Labels: model.LabelSet{ - "__meta_kubernetes_endpointslice_address_type": "IPv4", - "__meta_kubernetes_endpointslice_name": "testendpoints", - "__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints", - "__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true", - "__meta_kubernetes_endpointslice_annotation_test_annotation": "test", - "__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true", - "__meta_kubernetes_namespace": "default", + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testendpoints", + "__meta_kubernetes_namespace": "default", }, }, }, @@ -626,85 +396,18 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) { } func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, afterStart: func() { - obj := &v1beta1.EndpointSlice{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testendpoints", - Namespace: "default", - }, - AddressType: v1beta1.AddressTypeIPv4, - Ports: []v1beta1.EndpointPort{ - { - Name: strptr("testport"), - Port: int32ptr(9000), - Protocol: protocolptr(corev1.ProtocolTCP), - }, - }, - Endpoints: []v1beta1.Endpoint{}, - } - c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) + obj := makeEndpointSliceV1() + obj.Endpoints = []v1.Endpoint{} + c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{}) }, expectedMaxItems: 2, expectedRes: map[string]*targetgroup.Group{ "endpointslice/default/testendpoints": { - Targets: []model.LabelSet{ - { - "__address__": "1.2.3.4:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "", - "__meta_kubernetes_endpointslice_address_target_name": "", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1", - "__meta_kubernetes_endpointslice_endpoint_node_name": "foobar", - "__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true", - "__meta_kubernetes_endpointslice_endpoint_topology_topology": "value", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "2.3.4.5:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "3.4.5.6:9000", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - { - "__address__": "4.5.6.7:9000", - "__meta_kubernetes_endpointslice_address_target_kind": "Node", - "__meta_kubernetes_endpointslice_address_target_name": "barbaz", - "__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true", - "__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false", - "__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a", - "__meta_kubernetes_endpointslice_port": "9000", - "__meta_kubernetes_endpointslice_port_app_protocol": "http", - "__meta_kubernetes_endpointslice_port_name": "testport", - "__meta_kubernetes_endpointslice_port_protocol": "TCP", - }, - }, Labels: model.LabelSet{ "__meta_kubernetes_endpointslice_address_type": "IPv4", "__meta_kubernetes_endpointslice_name": "testendpoints", @@ -721,7 +424,7 @@ func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) { } func TestEndpointSliceDiscoveryWithService(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, @@ -813,7 +516,7 @@ func TestEndpointSliceDiscoveryWithService(t *testing.T) { } func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1()) + n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1()) k8sDiscoveryTest{ discovery: n, @@ -1285,6 +988,7 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) { "__meta_kubernetes_pod_phase": "", "__meta_kubernetes_pod_ready": "unknown", "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", }, }, Labels: model.LabelSet{ @@ -1498,3 +1202,165 @@ func TestEndpointSliceInfIndexersCount(t *testing.T) { }) } } + +func TestEndpointSliceDiscoverySidecarContainer(t *testing.T) { + objs := []runtime.Object{ + &v1.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testsidecar", + Namespace: "default", + }, + AddressType: v1.AddressTypeIPv4, + Ports: []v1.EndpointPort{ + { + Name: strptr("testport"), + Port: int32ptr(9000), + Protocol: protocolptr(corev1.ProtocolTCP), + }, + { + Name: strptr("initport"), + Port: int32ptr(9111), + Protocol: protocolptr(corev1.ProtocolTCP), + }, + }, + Endpoints: []v1.Endpoint{ + { + Addresses: []string{"4.3.2.1"}, + TargetRef: &corev1.ObjectReference{ + Kind: "Pod", + Name: "testpod", + Namespace: "default", + }, + }, + }, + }, + &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "testpod", + Namespace: "default", + UID: types.UID("deadbeef"), + }, + Spec: corev1.PodSpec{ + NodeName: "testnode", + InitContainers: []corev1.Container{ + { + Name: "ic1", + Image: "ic1:latest", + Ports: []corev1.ContainerPort{ + { + Name: "initport", + ContainerPort: 1111, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + { + Name: "ic2", + Image: "ic2:latest", + Ports: []corev1.ContainerPort{ + { + Name: "initport", + ContainerPort: 9111, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "c1", + Image: "c1:latest", + Ports: []corev1.ContainerPort{ + { + Name: "mainport", + ContainerPort: 9000, + Protocol: corev1.ProtocolTCP, + }, + }, + }, + }, + }, + Status: corev1.PodStatus{ + HostIP: "2.3.4.5", + PodIP: "4.3.2.1", + }, + }, + } + + n, _ := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{}, objs...) + + k8sDiscoveryTest{ + discovery: n, + expectedMaxItems: 1, + expectedRes: map[string]*targetgroup.Group{ + "endpointslice/default/testsidecar": { + Targets: []model.LabelSet{ + { + "__address__": "4.3.2.1:9000", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "testpod", + "__meta_kubernetes_endpointslice_port": "9000", + "__meta_kubernetes_endpointslice_port_name": "testport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_pod_container_image": "c1:latest", + "__meta_kubernetes_pod_container_name": "c1", + "__meta_kubernetes_pod_container_port_name": "mainport", + "__meta_kubernetes_pod_container_port_number": "9000", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "false", + }, + { + "__address__": "4.3.2.1:9111", + "__meta_kubernetes_endpointslice_address_target_kind": "Pod", + "__meta_kubernetes_endpointslice_address_target_name": "testpod", + "__meta_kubernetes_endpointslice_port": "9111", + "__meta_kubernetes_endpointslice_port_name": "initport", + "__meta_kubernetes_endpointslice_port_protocol": "TCP", + "__meta_kubernetes_pod_container_image": "ic2:latest", + "__meta_kubernetes_pod_container_name": "ic2", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "9111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + { + "__address__": "4.3.2.1:1111", + "__meta_kubernetes_pod_container_image": "ic1:latest", + "__meta_kubernetes_pod_container_name": "ic1", + "__meta_kubernetes_pod_container_port_name": "initport", + "__meta_kubernetes_pod_container_port_number": "1111", + "__meta_kubernetes_pod_container_port_protocol": "TCP", + "__meta_kubernetes_pod_host_ip": "2.3.4.5", + "__meta_kubernetes_pod_ip": "4.3.2.1", + "__meta_kubernetes_pod_name": "testpod", + "__meta_kubernetes_pod_node_name": "testnode", + "__meta_kubernetes_pod_phase": "", + "__meta_kubernetes_pod_ready": "unknown", + "__meta_kubernetes_pod_uid": "deadbeef", + "__meta_kubernetes_pod_container_init": "true", + }, + }, + Labels: model.LabelSet{ + "__meta_kubernetes_endpointslice_address_type": "IPv4", + "__meta_kubernetes_endpointslice_name": "testsidecar", + "__meta_kubernetes_namespace": "default", + }, + Source: "endpointslice/default/testsidecar", + }, + }, + }.Run(t) +} diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go index 7b6366b257..1b7847c5c4 100644 --- a/discovery/kubernetes/ingress.go +++ b/discovery/kubernetes/ingress.go @@ -17,14 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" v1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,14 +31,14 @@ import ( // Ingress implements discovery of Kubernetes ingress. type Ingress struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewIngress returns a new ingress discovery. -func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { +func NewIngress(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress { ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd) ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate) ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete) @@ -67,7 +65,7 @@ func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C }, }) if err != nil { - level.Error(l).Log("msg", "Error adding ingresses event handler.", "err", err) + l.Error("Error adding ingresses event handler.", "err", err) } return s } @@ -87,7 +85,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(i.logger).Log("msg", "ingress informer unable to sync cache") + i.logger.Error("ingress informer unable to sync cache") } return } @@ -127,10 +125,8 @@ func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) b switch ingress := o.(type) { case *v1.Ingress: ia = newIngressAdaptorFromV1(ingress) - case *v1beta1.Ingress: - ia = newIngressAdaptorFromV1beta1(ingress) default: - level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err", + i.logger.Error("converting to Ingress object failed", "err", fmt.Errorf("received unexpected object: %v", o)) return true } diff --git a/discovery/kubernetes/ingress_adaptor.go b/discovery/kubernetes/ingress_adaptor.go index d1a7b7f2a2..84281196b4 100644 --- a/discovery/kubernetes/ingress_adaptor.go +++ b/discovery/kubernetes/ingress_adaptor.go @@ -15,7 +15,6 @@ package kubernetes import ( v1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -89,56 +88,3 @@ func (i *ingressRuleAdaptorV1) paths() []string { } func (i *ingressRuleAdaptorV1) host() string { return i.rule.Host } - -// Adaptor for networking.k8s.io/v1beta1. -type ingressAdaptorV1Beta1 struct { - ingress *v1beta1.Ingress -} - -func newIngressAdaptorFromV1beta1(ingress *v1beta1.Ingress) ingressAdaptor { - return &ingressAdaptorV1Beta1{ingress: ingress} -} -func (i *ingressAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta } -func (i *ingressAdaptorV1Beta1) name() string { return i.ingress.Name } -func (i *ingressAdaptorV1Beta1) namespace() string { return i.ingress.Namespace } -func (i *ingressAdaptorV1Beta1) labels() map[string]string { return i.ingress.Labels } -func (i *ingressAdaptorV1Beta1) annotations() map[string]string { return i.ingress.Annotations } -func (i *ingressAdaptorV1Beta1) ingressClassName() *string { return i.ingress.Spec.IngressClassName } - -func (i *ingressAdaptorV1Beta1) tlsHosts() []string { - var hosts []string - for _, tls := range i.ingress.Spec.TLS { - hosts = append(hosts, tls.Hosts...) - } - return hosts -} - -func (i *ingressAdaptorV1Beta1) rules() []ingressRuleAdaptor { - var rules []ingressRuleAdaptor - for _, rule := range i.ingress.Spec.Rules { - rules = append(rules, newIngressRuleAdaptorFromV1Beta1(rule)) - } - return rules -} - -type ingressRuleAdaptorV1Beta1 struct { - rule v1beta1.IngressRule -} - -func newIngressRuleAdaptorFromV1Beta1(rule v1beta1.IngressRule) ingressRuleAdaptor { - return &ingressRuleAdaptorV1Beta1{rule: rule} -} - -func (i *ingressRuleAdaptorV1Beta1) paths() []string { - rv := i.rule.IngressRuleValue - if rv.HTTP == nil { - return nil - } - paths := make([]string, len(rv.HTTP.Paths)) - for n, p := range rv.HTTP.Paths { - paths[n] = p.Path - } - return paths -} - -func (i *ingressRuleAdaptorV1Beta1) host() string { return i.rule.Host } diff --git a/discovery/kubernetes/ingress_test.go b/discovery/kubernetes/ingress_test.go index 8e6654c2cc..9bddfb1e14 100644 --- a/discovery/kubernetes/ingress_test.go +++ b/discovery/kubernetes/ingress_test.go @@ -20,7 +20,6 @@ import ( "github.com/prometheus/common/model" v1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -89,60 +88,6 @@ func makeIngress(tls TLSMode) *v1.Ingress { return ret } -func makeIngressV1beta1(tls TLSMode) *v1beta1.Ingress { - ret := &v1beta1.Ingress{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testingress", - Namespace: "default", - Labels: map[string]string{"test/label": "testvalue"}, - Annotations: map[string]string{"test/annotation": "testannotationvalue"}, - }, - Spec: v1beta1.IngressSpec{ - IngressClassName: classString("testclass"), - TLS: nil, - Rules: []v1beta1.IngressRule{ - { - Host: "example.com", - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{ - Paths: []v1beta1.HTTPIngressPath{ - {Path: "/"}, - {Path: "/foo"}, - }, - }, - }, - }, - { - // No backend config, ignored - Host: "nobackend.example.com", - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{}, - }, - }, - { - Host: "test.example.com", - IngressRuleValue: v1beta1.IngressRuleValue{ - HTTP: &v1beta1.HTTPIngressRuleValue{ - Paths: []v1beta1.HTTPIngressPath{{}}, - }, - }, - }, - }, - }, - } - - switch tls { - case TLSYes: - ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com", "test.example.com"}}} - case TLSMixed: - ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com"}}} - case TLSWildcard: - ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"*.example.com"}}} - } - - return ret -} - func classString(v string) *string { return &v } @@ -212,20 +157,6 @@ func TestIngressDiscoveryAdd(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryAddV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0") - - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - obj := makeIngressV1beta1(TLSNo) - c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: expectedTargetGroups("default", TLSNo), - }.Run(t) -} - func TestIngressDiscoveryAddTLS(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) @@ -240,20 +171,6 @@ func TestIngressDiscoveryAddTLS(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryAddTLSV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0") - - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - obj := makeIngressV1beta1(TLSYes) - c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: expectedTargetGroups("default", TLSYes), - }.Run(t) -} - func TestIngressDiscoveryAddMixed(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) @@ -268,20 +185,6 @@ func TestIngressDiscoveryAddMixed(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryAddMixedV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0") - - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - obj := makeIngressV1beta1(TLSMixed) - c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{}) - }, - expectedMaxItems: 1, - expectedRes: expectedTargetGroups("default", TLSMixed), - }.Run(t) -} - func TestIngressDiscoveryNamespaces(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) @@ -303,27 +206,6 @@ func TestIngressDiscoveryNamespaces(t *testing.T) { }.Run(t) } -func TestIngressDiscoveryNamespacesV1beta1(t *testing.T) { - n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, "v1.18.0") - - expected := expectedTargetGroups("ns1", TLSNo) - for k, v := range expectedTargetGroups("ns2", TLSNo) { - expected[k] = v - } - k8sDiscoveryTest{ - discovery: n, - afterStart: func() { - for _, ns := range []string{"ns1", "ns2"} { - obj := makeIngressV1beta1(TLSNo) - obj.Namespace = ns - c.NetworkingV1beta1().Ingresses(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{}) - } - }, - expectedMaxItems: 2, - expectedRes: expected, - }.Run(t) -} - func TestIngressDiscoveryOwnNamespace(t *testing.T) { n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{IncludeOwnNamespace: true}) diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go index a8b6f85899..9aff89e4fa 100644 --- a/discovery/kubernetes/kubernetes.go +++ b/discovery/kubernetes/kubernetes.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "os" "reflect" "strings" @@ -25,23 +26,18 @@ import ( "github.com/prometheus/prometheus/util/strutil" - disv1beta1 "k8s.io/api/discovery/v1beta1" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" apiv1 "k8s.io/api/core/v1" disv1 "k8s.io/api/discovery/v1" networkv1 "k8s.io/api/networking/v1" - "k8s.io/api/networking/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" - utilversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -177,7 +173,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if c.Role == "" { - return fmt.Errorf("role missing (one of: pod, service, endpoints, endpointslice, node, ingress)") + return errors.New("role missing (one of: pod, service, endpoints, endpointslice, node, ingress)") } err = c.HTTPClientConfig.Validate() if err != nil { @@ -185,20 +181,20 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { } if c.APIServer.URL != nil && c.KubeConfig != "" { // Api-server and kubeconfig_file are mutually exclusive - return fmt.Errorf("cannot use 'kubeconfig_file' and 'api_server' simultaneously") + return errors.New("cannot use 'kubeconfig_file' and 'api_server' simultaneously") } if c.KubeConfig != "" && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) { // Kubeconfig_file and custom http config are mutually exclusive - return fmt.Errorf("cannot use a custom HTTP client configuration together with 'kubeconfig_file'") + return errors.New("cannot use a custom HTTP client configuration together with 'kubeconfig_file'") } if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) { - return fmt.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly") + return errors.New("to use custom HTTP client configuration please provide the 'api_server' URL explicitly") } if c.APIServer.URL != nil && c.NamespaceDiscovery.IncludeOwnNamespace { - return fmt.Errorf("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously") + return errors.New("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously") } if c.KubeConfig != "" && c.NamespaceDiscovery.IncludeOwnNamespace { - return fmt.Errorf("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously") + return errors.New("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously") } foundSelectorRoles := make(map[Role]struct{}) @@ -264,7 +260,7 @@ type Discovery struct { sync.RWMutex client kubernetes.Interface role Role - logger log.Logger + logger *slog.Logger namespaceDiscovery *NamespaceDiscovery discoverers []discovery.Discoverer selectors roleSelector @@ -289,14 +285,14 @@ func (d *Discovery) getNamespaces() []string { } // New creates a new Kubernetes discovery for the given role. -func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { +func New(l *slog.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) { m, ok := metrics.(*kubernetesMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } var ( kcfg *rest.Config @@ -328,7 +324,7 @@ func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Di ownNamespace = string(ownNamespaceContents) } - level.Info(l).Log("msg", "Using pod service account via in-cluster config") + l.Info("Using pod service account via in-cluster config") default: rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd") if err != nil { @@ -401,55 +397,22 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { switch d.role { case RoleEndpointSlice: - // Check "networking.k8s.io/v1" availability with retries. - // If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility - var v1Supported bool - if retryOnError(ctx, 10*time.Second, - func() (err error) { - v1Supported, err = checkDiscoveryV1Supported(d.client) - if err != nil { - level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err) - } - return err - }, - ) { - d.Unlock() - return - } - for _, namespace := range namespaces { var informer cache.SharedIndexInformer - if v1Supported { - e := d.client.DiscoveryV1().EndpointSlices(namespace) - elw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.Watch(ctx, options) - }, - } - informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{}) - } else { - e := d.client.DiscoveryV1beta1().EndpointSlices(namespace) - elw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.endpointslice.field - options.LabelSelector = d.selectors.endpointslice.label - return e.Watch(ctx, options) - }, - } - informer = d.newEndpointSlicesByNodeInformer(elw, &disv1beta1.EndpointSlice{}) + e := d.client.DiscoveryV1().EndpointSlices(namespace) + elw := &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = d.selectors.endpointslice.field + options.LabelSelector = d.selectors.endpointslice.label + return e.List(ctx, options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = d.selectors.endpointslice.field + options.LabelSelector = d.selectors.endpointslice.label + return e.Watch(ctx, options) + }, } + informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{}) s := d.client.CoreV1().Services(namespace) slw := &cache.ListWatch{ @@ -483,7 +446,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { go nodeInf.Run(ctx.Done()) } eps := NewEndpointSlice( - log.With(d.logger, "role", "endpointslice"), + d.logger.With("role", "endpointslice"), informer, d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), @@ -543,7 +506,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } eps := NewEndpoints( - log.With(d.logger, "role", "endpoint"), + d.logger.With("role", "endpoint"), d.newEndpointsByNodeInformer(elw), d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled), @@ -577,7 +540,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { }, } pod := NewPod( - log.With(d.logger, "role", "pod"), + d.logger.With("role", "pod"), d.newPodsByNodeInformer(plw), nodeInformer, d.metrics.eventCount, @@ -601,7 +564,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { }, } svc := NewService( - log.With(d.logger, "role", "service"), + d.logger.With("role", "service"), d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled), d.metrics.eventCount, ) @@ -609,57 +572,24 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { go svc.informer.Run(ctx.Done()) } case RoleIngress: - // Check "networking.k8s.io/v1" availability with retries. - // If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility - var v1Supported bool - if retryOnError(ctx, 10*time.Second, - func() (err error) { - v1Supported, err = checkNetworkingV1Supported(d.client) - if err != nil { - level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err) - } - return err - }, - ) { - d.Unlock() - return - } - for _, namespace := range namespaces { var informer cache.SharedInformer - if v1Supported { - i := d.client.NetworkingV1().Ingresses(namespace) - ilw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.Watch(ctx, options) - }, - } - informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled) - } else { - i := d.client.NetworkingV1beta1().Ingresses(namespace) - ilw := &cache.ListWatch{ - ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.List(ctx, options) - }, - WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - options.FieldSelector = d.selectors.ingress.field - options.LabelSelector = d.selectors.ingress.label - return i.Watch(ctx, options) - }, - } - informer = d.mustNewSharedInformer(ilw, &v1beta1.Ingress{}, resyncDisabled) + i := d.client.NetworkingV1().Ingresses(namespace) + ilw := &cache.ListWatch{ + ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { + options.FieldSelector = d.selectors.ingress.field + options.LabelSelector = d.selectors.ingress.label + return i.List(ctx, options) + }, + WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { + options.FieldSelector = d.selectors.ingress.field + options.LabelSelector = d.selectors.ingress.label + return i.Watch(ctx, options) + }, } + informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled) ingress := NewIngress( - log.With(d.logger, "role", "ingress"), + d.logger.With("role", "ingress"), informer, d.metrics.eventCount, ) @@ -668,11 +598,11 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } case RoleNode: nodeInformer := d.newNodeInformer(ctx) - node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.metrics.eventCount) + node := NewNode(d.logger.With("role", "node"), nodeInformer, d.metrics.eventCount) d.discoverers = append(d.discoverers, node) go node.informer.Run(ctx.Done()) default: - level.Error(d.logger).Log("msg", "unknown Kubernetes discovery kind", "role", d.role) + d.logger.Error("unknown Kubernetes discovery kind", "role", d.role) } var wg sync.WaitGroup @@ -720,20 +650,6 @@ func retryOnError(ctx context.Context, interval time.Duration, f func() error) ( } } -func checkNetworkingV1Supported(client kubernetes.Interface) (bool, error) { - k8sVer, err := client.Discovery().ServerVersion() - if err != nil { - return false, err - } - semVer, err := utilversion.ParseSemantic(k8sVer.String()) - if err != nil { - return false, err - } - // networking.k8s.io/v1 is available since Kubernetes v1.19 - // https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md - return semVer.Major() >= 1 && semVer.Minor() >= 19, nil -} - func (d *Discovery) newNodeInformer(ctx context.Context) cache.SharedInformer { nlw := &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { @@ -756,7 +672,7 @@ func (d *Discovery) newPodsByNodeInformer(plw *cache.ListWatch) cache.SharedInde indexers[nodeIndex] = func(obj interface{}) ([]string, error) { pod, ok := obj.(*apiv1.Pod) if !ok { - return nil, fmt.Errorf("object is not a pod") + return nil, errors.New("object is not a pod") } return []string{pod.Spec.NodeName}, nil } @@ -770,7 +686,7 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share indexers[podIndex] = func(obj interface{}) ([]string, error) { e, ok := obj.(*apiv1.Endpoints) if !ok { - return nil, fmt.Errorf("object is not endpoints") + return nil, errors.New("object is not endpoints") } var pods []string for _, target := range e.Subsets { @@ -789,7 +705,7 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share indexers[nodeIndex] = func(obj interface{}) ([]string, error) { e, ok := obj.(*apiv1.Endpoints) if !ok { - return nil, fmt.Errorf("object is not endpoints") + return nil, errors.New("object is not endpoints") } var nodes []string for _, target := range e.Subsets { @@ -834,21 +750,8 @@ func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object } } } - case *disv1beta1.EndpointSlice: - for _, target := range e.Endpoints { - if target.TargetRef != nil { - switch target.TargetRef.Kind { - case "Pod": - if target.NodeName != nil { - nodes = append(nodes, *target.NodeName) - } - case "Node": - nodes = append(nodes, target.TargetRef.Name) - } - } - } default: - return nil, fmt.Errorf("object is not an endpointslice") + return nil, errors.New("object is not an endpointslice") } return nodes, nil @@ -882,21 +785,6 @@ func (d *Discovery) mustNewSharedIndexInformer(lw cache.ListerWatcher, exampleOb return informer } -func checkDiscoveryV1Supported(client kubernetes.Interface) (bool, error) { - k8sVer, err := client.Discovery().ServerVersion() - if err != nil { - return false, err - } - semVer, err := utilversion.ParseSemantic(k8sVer.String()) - if err != nil { - return false, err - } - // The discovery.k8s.io/v1beta1 API version of EndpointSlice will no longer be served in v1.25. - // discovery.k8s.io/v1 is available since Kubernetes v1.21 - // https://kubernetes.io/docs/reference/using-api/deprecation-guide/#v1-25 - return semVer.Major() >= 1 && semVer.Minor() >= 21, nil -} - func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, role Role) { labelSet[model.LabelName(metaLabelPrefix+string(role)+"_name")] = lv(objectMeta.Name) @@ -916,3 +804,13 @@ func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, func namespacedName(namespace, name string) string { return namespace + "/" + name } + +// nodeName knows how to handle the cache.DeletedFinalStateUnknown tombstone. +// It assumes the MetaNamespaceKeyFunc keyFunc is used, which uses the node name as the tombstone key. +func nodeName(o interface{}) (string, error) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(o) + if err != nil { + return "", err + } + return key, nil +} diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go index 50f25a20ab..a14f2b3d1b 100644 --- a/discovery/kubernetes/kubernetes_test.go +++ b/discovery/kubernetes/kubernetes_test.go @@ -20,10 +20,12 @@ import ( "testing" "time" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + apiv1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/version" "k8s.io/apimachinery/pkg/watch" @@ -46,7 +48,7 @@ func TestMain(m *testing.M) { // makeDiscovery creates a kubernetes.Discovery instance for testing. func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) { - return makeDiscoveryWithVersion(role, nsDiscovery, "v1.22.0", objects...) + return makeDiscoveryWithVersion(role, nsDiscovery, "v1.25.0", objects...) } // makeDiscoveryWithVersion creates a kubernetes.Discovery instance with the specified kubernetes version for testing. @@ -71,7 +73,7 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer d := &Discovery{ client: clientset, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), role: role, namespaceDiscovery: &nsDiscovery, ownNamespace: "own-ns", @@ -285,40 +287,6 @@ func TestRetryOnError(t *testing.T) { } } -func TestCheckNetworkingV1Supported(t *testing.T) { - tests := []struct { - version string - wantSupported bool - wantErr bool - }{ - {version: "v1.18.0", wantSupported: false, wantErr: false}, - {version: "v1.18.1", wantSupported: false, wantErr: false}, - // networking v1 is supported since Kubernetes v1.19 - {version: "v1.19.0", wantSupported: true, wantErr: false}, - {version: "v1.20.0-beta.2", wantSupported: true, wantErr: false}, - // error patterns - {version: "", wantSupported: false, wantErr: true}, - {version: "<>", wantSupported: false, wantErr: true}, - } - - for _, tc := range tests { - tc := tc - t.Run(tc.version, func(t *testing.T) { - clientset := fake.NewSimpleClientset() - fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery) - fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: tc.version} - supported, err := checkNetworkingV1Supported(clientset) - - if tc.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - } - require.Equal(t, tc.wantSupported, supported) - }) - } -} - func TestFailuresCountMetric(t *testing.T) { tests := []struct { role Role @@ -354,3 +322,18 @@ func TestFailuresCountMetric(t *testing.T) { }) } } + +func TestNodeName(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + } + name, err := nodeName(node) + require.NoError(t, err) + require.Equal(t, "foo", name) + + name, err = nodeName(cache.DeletedFinalStateUnknown{Key: "bar"}) + require.NoError(t, err) + require.Equal(t, "bar", name) +} diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go index 74d87e22c4..0e0c5745f2 100644 --- a/discovery/kubernetes/node.go +++ b/discovery/kubernetes/node.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -38,16 +38,16 @@ const ( // Node discovers Kubernetes nodes. type Node struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewNode returns a new node discovery. -func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { +func NewNode(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd) @@ -76,13 +76,13 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun }, }) if err != nil { - level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err) + l.Error("Error adding nodes event handler.", "err", err) } return n } func (n *Node) enqueue(obj interface{}) { - key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + key, err := nodeName(obj) if err != nil { return } @@ -96,7 +96,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(n.logger).Log("msg", "node informer unable to sync cache") + n.logger.Error("node informer unable to sync cache") } return } @@ -133,7 +133,7 @@ func (n *Node) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool } node, err := convertToNode(o) if err != nil { - level.Error(n.logger).Log("msg", "converting to Node object failed", "err", err) + n.logger.Error("converting to Node object failed", "err", err) return true } send(ctx, ch, n.buildNode(node)) @@ -181,7 +181,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group { addr, addrMap, err := nodeAddress(node) if err != nil { - level.Warn(n.logger).Log("msg", "No node address found", "err", err) + n.logger.Warn("No node address found", "err", err) return nil } addr = net.JoinHostPort(addr, strconv.FormatInt(int64(node.Status.DaemonEndpoints.KubeletEndpoint.Port), 10)) diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go index 02990e415f..8704a66239 100644 --- a/discovery/kubernetes/pod.go +++ b/discovery/kubernetes/pod.go @@ -17,14 +17,14 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" @@ -44,14 +44,14 @@ type Pod struct { nodeInf cache.SharedInformer withNodeMetadata bool store cache.Store - logger log.Logger + logger *slog.Logger queue *workqueue.Type } // NewPod creates a new pod discovery. -func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { +func NewPod(l *slog.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd) @@ -81,7 +81,7 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } if p.withNodeMetadata { @@ -95,12 +95,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo p.enqueuePodsForNode(node.Name) }, DeleteFunc: func(o interface{}) { - node := o.(*apiv1.Node) - p.enqueuePodsForNode(node.Name) + nodeName, err := nodeName(o) + if err != nil { + l.Error("Error getting Node name", "err", err) + } + p.enqueuePodsForNode(nodeName) }, }) if err != nil { - level.Error(l).Log("msg", "Error adding pods event handler.", "err", err) + l.Error("Error adding pods event handler.", "err", err) } } @@ -127,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(p.logger).Log("msg", "pod informer unable to sync cache") + p.logger.Error("pod informer unable to sync cache") } return } @@ -164,7 +167,7 @@ func (p *Pod) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool } pod, err := convertToPod(o) if err != nil { - level.Error(p.logger).Log("msg", "converting to Pod object failed", "err", err) + p.logger.Error("converting to Pod object failed", "err", err) return true } send(ctx, ch, p.buildPod(pod)) @@ -246,7 +249,7 @@ func (p *Pod) findPodContainerStatus(statuses *[]apiv1.ContainerStatus, containe func (p *Pod) findPodContainerID(statuses *[]apiv1.ContainerStatus, containerName string) string { cStatus, err := p.findPodContainerStatus(statuses, containerName) if err != nil { - level.Debug(p.logger).Log("msg", "cannot find container ID", "err", err) + p.logger.Debug("cannot find container ID", "err", err) return "" } return cStatus.ContainerID @@ -315,7 +318,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group { func (p *Pod) enqueuePodsForNode(nodeName string) { pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName) if err != nil { - level.Error(p.logger).Log("msg", "Error getting pods for node", "node", nodeName, "err", err) + p.logger.Error("Error getting pods for node", "node", nodeName, "err", err) return } diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go index 51204a5a1a..e666497c86 100644 --- a/discovery/kubernetes/service.go +++ b/discovery/kubernetes/service.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" apiv1 "k8s.io/api/core/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -33,16 +33,16 @@ import ( // Service implements discovery of Kubernetes services. type Service struct { - logger log.Logger + logger *slog.Logger informer cache.SharedInformer store cache.Store queue *workqueue.Type } // NewService returns a new service discovery. -func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { +func NewService(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd) @@ -71,7 +71,7 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C }, }) if err != nil { - level.Error(l).Log("msg", "Error adding services event handler.", "err", err) + l.Error("Error adding services event handler.", "err", err) } return s } @@ -91,7 +91,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(s.logger).Log("msg", "service informer unable to sync cache") + s.logger.Error("service informer unable to sync cache") } return } @@ -128,7 +128,7 @@ func (s *Service) process(ctx context.Context, ch chan<- []*targetgroup.Group) b } eps, err := convertToService(o) if err != nil { - level.Error(s.logger).Log("msg", "converting to Service object failed", "err", err) + s.logger.Error("converting to Service object failed", "err", err) return true } send(ctx, ch, s.buildService(eps)) diff --git a/discovery/legacymanager/manager.go b/discovery/legacymanager/manager.go deleted file mode 100644 index 6fc61485d1..0000000000 --- a/discovery/legacymanager/manager.go +++ /dev/null @@ -1,332 +0,0 @@ -// Copyright 2016 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package legacymanager - -import ( - "context" - "fmt" - "reflect" - "sync" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/client_golang/prometheus" - - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/targetgroup" -) - -type poolKey struct { - setName string - provider string -} - -// provider holds a Discoverer instance, its configuration and its subscribers. -type provider struct { - name string - d discovery.Discoverer - subs []string - config interface{} -} - -// NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]discovery.DiscovererMetrics, options ...func(*Manager)) *Manager { - if logger == nil { - logger = log.NewNopLogger() - } - mgr := &Manager{ - logger: logger, - syncCh: make(chan map[string][]*targetgroup.Group), - targets: make(map[poolKey]map[string]*targetgroup.Group), - discoverCancel: []context.CancelFunc{}, - ctx: ctx, - updatert: 5 * time.Second, - triggerSend: make(chan struct{}, 1), - registerer: registerer, - sdMetrics: sdMetrics, - } - for _, option := range options { - option(mgr) - } - - // Register the metrics. - // We have to do this after setting all options, so that the name of the Manager is set. - if metrics, err := discovery.NewManagerMetrics(registerer, mgr.name); err == nil { - mgr.metrics = metrics - } else { - level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) - return nil - } - - return mgr -} - -// Name sets the name of the manager. -func Name(n string) func(*Manager) { - return func(m *Manager) { - m.mtx.Lock() - defer m.mtx.Unlock() - m.name = n - } -} - -// Manager maintains a set of discovery providers and sends each update to a map channel. -// Targets are grouped by the target set name. -type Manager struct { - logger log.Logger - name string - mtx sync.RWMutex - ctx context.Context - discoverCancel []context.CancelFunc - - // Some Discoverers(eg. k8s) send only the updates for a given target group - // so we use map[tg.Source]*targetgroup.Group to know which group to update. - targets map[poolKey]map[string]*targetgroup.Group - // providers keeps track of SD providers. - providers []*provider - // The sync channel sends the updates as a map where the key is the job value from the scrape config. - syncCh chan map[string][]*targetgroup.Group - - // How long to wait before sending updates to the channel. The variable - // should only be modified in unit tests. - updatert time.Duration - - // The triggerSend channel signals to the manager that new updates have been received from providers. - triggerSend chan struct{} - - // A registerer for all service discovery metrics. - registerer prometheus.Registerer - - metrics *discovery.Metrics - sdMetrics map[string]discovery.DiscovererMetrics -} - -// Run starts the background processing. -func (m *Manager) Run() error { - go m.sender() - <-m.ctx.Done() - m.cancelDiscoverers() - return m.ctx.Err() -} - -// SyncCh returns a read only channel used by all the clients to receive target updates. -func (m *Manager) SyncCh() <-chan map[string][]*targetgroup.Group { - return m.syncCh -} - -// ApplyConfig removes all running discovery providers and starts new ones using the provided config. -func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error { - m.mtx.Lock() - defer m.mtx.Unlock() - - for pk := range m.targets { - if _, ok := cfg[pk.setName]; !ok { - m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName) - } - } - m.cancelDiscoverers() - m.targets = make(map[poolKey]map[string]*targetgroup.Group) - m.providers = nil - m.discoverCancel = nil - - failedCount := 0 - for name, scfg := range cfg { - failedCount += m.registerProviders(scfg, name) - m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0) - } - m.metrics.FailedConfigs.Set(float64(failedCount)) - - for _, prov := range m.providers { - m.startProvider(m.ctx, prov) - } - - return nil -} - -// StartCustomProvider is used for sdtool. Only use this if you know what you're doing. -func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker discovery.Discoverer) { - p := &provider{ - name: name, - d: worker, - subs: []string{name}, - } - m.providers = append(m.providers, p) - m.startProvider(ctx, p) -} - -func (m *Manager) startProvider(ctx context.Context, p *provider) { - level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) - ctx, cancel := context.WithCancel(ctx) - updates := make(chan []*targetgroup.Group) - - m.discoverCancel = append(m.discoverCancel, cancel) - - go p.d.Run(ctx, updates) - go m.updater(ctx, p, updates) -} - -func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) { - for { - select { - case <-ctx.Done(): - return - case tgs, ok := <-updates: - m.metrics.ReceivedUpdates.Inc() - if !ok { - level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) - return - } - - for _, s := range p.subs { - m.updateGroup(poolKey{setName: s, provider: p.name}, tgs) - } - - select { - case m.triggerSend <- struct{}{}: - default: - } - } - } -} - -func (m *Manager) sender() { - ticker := time.NewTicker(m.updatert) - defer ticker.Stop() - - for { - select { - case <-m.ctx.Done(): - return - case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker. - select { - case <-m.triggerSend: - m.metrics.SentUpdates.Inc() - select { - case m.syncCh <- m.allGroups(): - default: - m.metrics.DelayedUpdates.Inc() - level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") - select { - case m.triggerSend <- struct{}{}: - default: - } - } - default: - } - } - } -} - -func (m *Manager) cancelDiscoverers() { - for _, c := range m.discoverCancel { - c() - } -} - -func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) { - m.mtx.Lock() - defer m.mtx.Unlock() - - if _, ok := m.targets[poolKey]; !ok { - m.targets[poolKey] = make(map[string]*targetgroup.Group) - } - for _, tg := range tgs { - if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics. - m.targets[poolKey][tg.Source] = tg - } - } -} - -func (m *Manager) allGroups() map[string][]*targetgroup.Group { - m.mtx.RLock() - defer m.mtx.RUnlock() - - tSets := map[string][]*targetgroup.Group{} - n := map[string]int{} - for pkey, tsets := range m.targets { - for _, tg := range tsets { - // Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager' - // to signal that it needs to stop all scrape loops for this target set. - tSets[pkey.setName] = append(tSets[pkey.setName], tg) - n[pkey.setName] += len(tg.Targets) - } - } - for setName, v := range n { - m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v)) - } - return tSets -} - -// registerProviders returns a number of failed SD config. -func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int { - var ( - failed int - added bool - ) - add := func(cfg discovery.Config) { - for _, p := range m.providers { - if reflect.DeepEqual(cfg, p.config) { - p.subs = append(p.subs, setName) - added = true - return - } - } - typ := cfg.Name() - d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{ - Logger: log.With(m.logger, "discovery", typ, "config", setName), - Metrics: m.sdMetrics[typ], - }) - if err != nil { - level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) - failed++ - return - } - m.providers = append(m.providers, &provider{ - name: fmt.Sprintf("%s/%d", typ, len(m.providers)), - d: d, - config: cfg, - subs: []string{setName}, - }) - added = true - } - for _, cfg := range cfgs { - add(cfg) - } - if !added { - // Add an empty target group to force the refresh of the corresponding - // scrape pool and to notify the receiver that this target set has no - // current targets. - // It can happen because the combined set of SD configurations is empty - // or because we fail to instantiate all the SD configurations. - add(discovery.StaticConfig{{}}) - } - return failed -} - -// StaticProvider holds a list of target groups that never change. -type StaticProvider struct { - TargetGroups []*targetgroup.Group -} - -// Run implements the Worker interface. -func (sd *StaticProvider) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { - // We still have to consider that the consumer exits right away in which case - // the context will be canceled. - select { - case ch <- sd.TargetGroups: - case <-ctx.Done(): - } - close(ch) -} diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go deleted file mode 100644 index f1be963113..0000000000 --- a/discovery/legacymanager/manager_test.go +++ /dev/null @@ -1,1185 +0,0 @@ -// Copyright 2016 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package legacymanager - -import ( - "context" - "fmt" - "sort" - "strconv" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" - client_testutil "github.com/prometheus/client_golang/prometheus/testutil" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/require" - - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/prometheus/prometheus/util/testutil" -) - -func TestMain(m *testing.M) { - testutil.TolerantVerifyLeak(m) -} - -func newTestMetrics(t *testing.T, reg prometheus.Registerer) (*discovery.RefreshMetricsManager, map[string]discovery.DiscovererMetrics) { - refreshMetrics := discovery.NewRefreshMetrics(reg) - sdMetrics, err := discovery.RegisterSDMetrics(reg, refreshMetrics) - require.NoError(t, err) - return &refreshMetrics, sdMetrics -} - -// TestTargetUpdatesOrder checks that the target updates are received in the expected order. -func TestTargetUpdatesOrder(t *testing.T) { - // The order by which the updates are send is determined by the interval passed to the mock discovery adapter - // Final targets array is ordered alphabetically by the name of the discoverer. - // For example discoverer "A" with targets "t2,t3" and discoverer "B" with targets "t1,t2" will result in "t2,t3,t1,t2" after the merge. - testCases := []struct { - title string - updates map[string][]update - expectedTargets [][]*targetgroup.Group - }{ - { - title: "Single TP no updates", - updates: map[string][]update{ - "tp1": {}, - }, - expectedTargets: nil, - }, - { - title: "Multiple TPs no updates", - updates: map[string][]update{ - "tp1": {}, - "tp2": {}, - "tp3": {}, - }, - expectedTargets: nil, - }, - { - title: "Single TP empty initials", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{}, - interval: 5 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - {}, - }, - }, - { - title: "Multiple TPs empty initials", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{}, - interval: 5 * time.Millisecond, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{}, - interval: 200 * time.Millisecond, - }, - }, - "tp3": { - { - targetGroups: []targetgroup.Group{}, - interval: 100 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - {}, - {}, - {}, - }, - }, - { - title: "Single TP initials only", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - { - title: "Multiple TPs initials only", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - }, - }, - }, - { - title: "Single TP initials followed by empty updates", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 0, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - }, - }, - { - title: "Single TP initials and new groups", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 0, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - }, - { - title: "Multiple TPs initials and new groups", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group4", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - interval: 500 * time.Millisecond, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - interval: 100 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group3", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group4", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - { - Source: "tp2_group3", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group4", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - { - Source: "tp1_group3", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group4", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - { - Source: "tp2_group3", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group4", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - }, - }, - { - title: "One TP initials arrive after other TP updates.", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 10 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - interval: 150 * time.Millisecond, - }, - }, - "tp2": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - interval: 200 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - interval: 100 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "5"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "6"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - { - Source: "tp2_group1", - Targets: []model.LabelSet{{"__instance__": "7"}}, - }, - { - Source: "tp2_group2", - Targets: []model.LabelSet{{"__instance__": "8"}}, - }, - }, - }, - }, - - { - title: "Single TP empty update in between", - updates: map[string][]update{ - "tp1": { - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - interval: 30 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - interval: 10 * time.Millisecond, - }, - { - targetGroups: []targetgroup.Group{ - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - interval: 300 * time.Millisecond, - }, - }, - }, - expectedTargets: [][]*targetgroup.Group{ - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{}, - }, - }, - { - { - Source: "tp1_group1", - Targets: []model.LabelSet{{"__instance__": "3"}}, - }, - { - Source: "tp1_group2", - Targets: []model.LabelSet{{"__instance__": "4"}}, - }, - }, - }, - }, - } - - for i, tc := range testCases { - tc := tc - t.Run(tc.title, func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - - var totalUpdatesCount int - for _, up := range tc.updates { - if len(up) > 0 { - totalUpdatesCount += len(up) - } - } - provUpdates := make(chan []*targetgroup.Group, totalUpdatesCount) - - for _, up := range tc.updates { - go newMockDiscoveryProvider(up...).Run(ctx, provUpdates) - } - - for x := 0; x < totalUpdatesCount; x++ { - select { - case <-ctx.Done(): - t.Fatalf("%d: no update arrived within the timeout limit", x) - case tgs := <-provUpdates: - discoveryManager.updateGroup(poolKey{setName: strconv.Itoa(i), provider: tc.title}, tgs) - for _, got := range discoveryManager.allGroups() { - assertEqualGroups(t, got, tc.expectedTargets[x]) - } - } - } - }) - } -} - -func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group) { - t.Helper() - - // Need to sort by the groups's source as the received order is not guaranteed. - sort.Sort(byGroupSource(got)) - sort.Sort(byGroupSource(expected)) - - require.Equal(t, expected, got) -} - -func staticConfig(addrs ...string) discovery.StaticConfig { - var cfg discovery.StaticConfig - for i, addr := range addrs { - cfg = append(cfg, &targetgroup.Group{ - Source: strconv.Itoa(i), - Targets: []model.LabelSet{ - {model.AddressLabel: model.LabelValue(addr)}, - }, - }) - } - return cfg -} - -func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) { - t.Helper() - if _, ok := tSets[poolKey]; !ok { - t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets) - } - - match := false - var mergedTargets string - for _, targetGroup := range tSets[poolKey] { - for _, l := range targetGroup.Targets { - mergedTargets = mergedTargets + " " + l.String() - if l.String() == label { - match = true - } - } - } - if match != present { - msg := "" - if !present { - msg = "not" - } - t.Fatalf("%q should %s be present in Targets labels: %q", label, msg, mergedTargets) - } -} - -func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090", "bar:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", true) - - c["prometheus"] = discovery.Configs{ - staticConfig("foo:9090"), - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", false) -} - -func TestDiscovererConfigs(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090", "bar:9090"), - staticConfig("baz:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"bar:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/1"}, "{__address__=\"baz:9090\"}", true) -} - -// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after -// removing all targets from the static_configs sends an update with empty targetGroups. -// This is required to signal the receiver that this target set has no current targets. -func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - - c["prometheus"] = discovery.Configs{ - discovery.StaticConfig{{}}, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - - pkey := poolKey{setName: "prometheus", provider: "static/0"} - targetGroups, ok := discoveryManager.targets[pkey] - if !ok { - t.Fatalf("'%v' should be present in target groups", pkey) - } - group, ok := targetGroups[""] - if !ok { - t.Fatalf("missing '' key in target groups %v", targetGroups) - } - - if len(group.Targets) != 0 { - t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets)) - } -} - -func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, nil, reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - staticConfig("foo:9090"), - }, - "prometheus2": { - staticConfig("foo:9090"), - }, - } - discoveryManager.ApplyConfig(c) - - <-discoveryManager.SyncCh() - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus2", provider: "static/0"}, "{__address__=\"foo:9090\"}", true) - if len(discoveryManager.providers) != 1 { - t.Fatalf("Invalid number of providers: expected 1, got %d", len(discoveryManager.providers)) - } -} - -func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { - originalConfig := discovery.Configs{ - staticConfig("foo:9090", "bar:9090", "baz:9090"), - } - processedConfig := discovery.Configs{ - staticConfig("foo:9090", "bar:9090", "baz:9090"), - } - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - cfgs := map[string]discovery.Configs{ - "prometheus": processedConfig, - } - discoveryManager.ApplyConfig(cfgs) - <-discoveryManager.SyncCh() - - for _, cfg := range cfgs { - require.Equal(t, originalConfig, cfg) - } -} - -type errorConfig struct{ err error } - -func (e errorConfig) Name() string { return "error" } -func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Discoverer, error) { - return nil, e.err -} - -// NewDiscovererMetrics implements discovery.Config. -func (errorConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics { - return &discovery.NoopDiscovererMetrics{} -} - -func TestGaugeFailedConfigs(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) - require.NotNil(t, discoveryManager) - discoveryManager.updatert = 100 * time.Millisecond - go discoveryManager.Run() - - c := map[string]discovery.Configs{ - "prometheus": { - errorConfig{fmt.Errorf("tests error 0")}, - errorConfig{fmt.Errorf("tests error 1")}, - errorConfig{fmt.Errorf("tests error 2")}, - }, - } - discoveryManager.ApplyConfig(c) - <-discoveryManager.SyncCh() - - failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) - if failedCount != 3 { - t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount) - } - - c["prometheus"] = discovery.Configs{ - staticConfig("foo:9090"), - } - discoveryManager.ApplyConfig(c) - <-discoveryManager.SyncCh() - - failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs) - if failedCount != 0 { - t.Fatalf("Expected to get no failed config, got: %v", failedCount) - } -} - -func TestCoordinationWithReceiver(t *testing.T) { - updateDelay := 100 * time.Millisecond - - type expect struct { - delay time.Duration - tgs map[string][]*targetgroup.Group - } - - testCases := []struct { - title string - providers map[string]discovery.Discoverer - expected []expect - }{ - { - title: "Receiver should get all updates even when one provider closes its channel", - providers: map[string]discovery.Discoverer{ - "once1": &onceProvider{ - tgs: []*targetgroup.Group{ - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - "mock1": newMockDiscoveryProvider( - update{ - interval: 2 * updateDelay, - targetGroups: []targetgroup.Group{ - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - ), - }, - expected: []expect{ - { - tgs: map[string][]*targetgroup.Group{ - "once1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - }, - { - tgs: map[string][]*targetgroup.Group{ - "once1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - "mock1": { - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - }, - }, - { - title: "Receiver should get all updates even when the channel is blocked", - providers: map[string]discovery.Discoverer{ - "mock1": newMockDiscoveryProvider( - update{ - targetGroups: []targetgroup.Group{ - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - update{ - interval: 4 * updateDelay, - targetGroups: []targetgroup.Group{ - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - ), - }, - expected: []expect{ - { - delay: 2 * updateDelay, - tgs: map[string][]*targetgroup.Group{ - "mock1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - }, - }, - }, - { - delay: 4 * updateDelay, - tgs: map[string][]*targetgroup.Group{ - "mock1": { - { - Source: "tg1", - Targets: []model.LabelSet{{"__instance__": "1"}}, - }, - { - Source: "tg2", - Targets: []model.LabelSet{{"__instance__": "2"}}, - }, - }, - }, - }, - }, - }, - } - - for _, tc := range testCases { - t.Run(tc.title, func(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - reg := prometheus.NewRegistry() - _, sdMetrics := newTestMetrics(t, reg) - - mgr := NewManager(ctx, nil, reg, sdMetrics) - require.NotNil(t, mgr) - mgr.updatert = updateDelay - go mgr.Run() - - for name, p := range tc.providers { - mgr.StartCustomProvider(ctx, name, p) - } - - for i, expected := range tc.expected { - time.Sleep(expected.delay) - select { - case <-ctx.Done(): - t.Fatalf("step %d: no update received in the expected timeframe", i) - case tgs, ok := <-mgr.SyncCh(): - if !ok { - t.Fatalf("step %d: discovery manager channel is closed", i) - } - if len(tgs) != len(expected.tgs) { - t.Fatalf("step %d: target groups mismatch, got: %d, expected: %d\ngot: %#v\nexpected: %#v", - i, len(tgs), len(expected.tgs), tgs, expected.tgs) - } - for k := range expected.tgs { - if _, ok := tgs[k]; !ok { - t.Fatalf("step %d: target group not found: %s\ngot: %#v", i, k, tgs) - } - assertEqualGroups(t, tgs[k], expected.tgs[k]) - } - } - } - }) - } -} - -type update struct { - targetGroups []targetgroup.Group - interval time.Duration -} - -type mockdiscoveryProvider struct { - updates []update -} - -func newMockDiscoveryProvider(updates ...update) mockdiscoveryProvider { - tp := mockdiscoveryProvider{ - updates: updates, - } - return tp -} - -func (tp mockdiscoveryProvider) Run(ctx context.Context, upCh chan<- []*targetgroup.Group) { - for _, u := range tp.updates { - if u.interval > 0 { - select { - case <-ctx.Done(): - return - case <-time.After(u.interval): - } - } - tgs := make([]*targetgroup.Group, len(u.targetGroups)) - for i := range u.targetGroups { - tgs[i] = &u.targetGroups[i] - } - upCh <- tgs - } - <-ctx.Done() -} - -// byGroupSource implements sort.Interface so we can sort by the Source field. -type byGroupSource []*targetgroup.Group - -func (a byGroupSource) Len() int { return len(a) } -func (a byGroupSource) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a byGroupSource) Less(i, j int) bool { return a[i].Source < a[j].Source } - -// onceProvider sends updates once (if any) and closes the update channel. -type onceProvider struct { - tgs []*targetgroup.Group -} - -func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) { - if len(o.tgs) > 0 { - ch <- o.tgs - } - close(ch) -} diff --git a/discovery/legacymanager/registry.go b/discovery/legacymanager/registry.go deleted file mode 100644 index 955705394d..0000000000 --- a/discovery/legacymanager/registry.go +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2020 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package legacymanager - -import ( - "errors" - "fmt" - "reflect" - "sort" - "strconv" - "strings" - "sync" - - "gopkg.in/yaml.v2" - - "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/discovery/targetgroup" -) - -const ( - configFieldPrefix = "AUTO_DISCOVERY_" - staticConfigsKey = "static_configs" - staticConfigsFieldName = configFieldPrefix + staticConfigsKey -) - -var ( - configNames = make(map[string]discovery.Config) - configFieldNames = make(map[reflect.Type]string) - configFields []reflect.StructField - - configTypesMu sync.Mutex - configTypes = make(map[reflect.Type]reflect.Type) - - emptyStructType = reflect.TypeOf(struct{}{}) - configsType = reflect.TypeOf(discovery.Configs{}) -) - -// RegisterConfig registers the given Config type for YAML marshaling and unmarshaling. -func RegisterConfig(config discovery.Config) { - registerConfig(config.Name()+"_sd_configs", reflect.TypeOf(config), config) -} - -func init() { - // N.B.: static_configs is the only Config type implemented by default. - // All other types are registered at init by their implementing packages. - elemTyp := reflect.TypeOf(&targetgroup.Group{}) - registerConfig(staticConfigsKey, elemTyp, discovery.StaticConfig{}) -} - -func registerConfig(yamlKey string, elemType reflect.Type, config discovery.Config) { - name := config.Name() - if _, ok := configNames[name]; ok { - panic(fmt.Sprintf("discovery: Config named %q is already registered", name)) - } - configNames[name] = config - - fieldName := configFieldPrefix + yamlKey // Field must be exported. - configFieldNames[elemType] = fieldName - - // Insert fields in sorted order. - i := sort.Search(len(configFields), func(k int) bool { - return fieldName < configFields[k].Name - }) - configFields = append(configFields, reflect.StructField{}) // Add empty field at end. - copy(configFields[i+1:], configFields[i:]) // Shift fields to the right. - configFields[i] = reflect.StructField{ // Write new field in place. - Name: fieldName, - Type: reflect.SliceOf(elemType), - Tag: reflect.StructTag(`yaml:"` + yamlKey + `,omitempty"`), - } -} - -func getConfigType(out reflect.Type) reflect.Type { - configTypesMu.Lock() - defer configTypesMu.Unlock() - if typ, ok := configTypes[out]; ok { - return typ - } - // Initial exported fields map one-to-one. - var fields []reflect.StructField - for i, n := 0, out.NumField(); i < n; i++ { - switch field := out.Field(i); { - case field.PkgPath == "" && field.Type != configsType: - fields = append(fields, field) - default: - fields = append(fields, reflect.StructField{ - Name: "_" + field.Name, // Field must be unexported. - PkgPath: out.PkgPath(), - Type: emptyStructType, - }) - } - } - // Append extra config fields on the end. - fields = append(fields, configFields...) - typ := reflect.StructOf(fields) - configTypes[out] = typ - return typ -} - -// UnmarshalYAMLWithInlineConfigs helps implement yaml.Unmarshal for structs -// that have a Configs field that should be inlined. -func UnmarshalYAMLWithInlineConfigs(out interface{}, unmarshal func(interface{}) error) error { - outVal := reflect.ValueOf(out) - if outVal.Kind() != reflect.Ptr { - return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out) - } - outVal = outVal.Elem() - if outVal.Kind() != reflect.Struct { - return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out) - } - outTyp := outVal.Type() - - cfgTyp := getConfigType(outTyp) - cfgPtr := reflect.New(cfgTyp) - cfgVal := cfgPtr.Elem() - - // Copy shared fields (defaults) to dynamic value. - var configs *discovery.Configs - for i, n := 0, outVal.NumField(); i < n; i++ { - if outTyp.Field(i).Type == configsType { - configs = outVal.Field(i).Addr().Interface().(*discovery.Configs) - continue - } - if cfgTyp.Field(i).PkgPath != "" { - continue // Field is unexported: ignore. - } - cfgVal.Field(i).Set(outVal.Field(i)) - } - if configs == nil { - return fmt.Errorf("discovery: Configs field not found in type: %T", out) - } - - // Unmarshal into dynamic value. - if err := unmarshal(cfgPtr.Interface()); err != nil { - return replaceYAMLTypeError(err, cfgTyp, outTyp) - } - - // Copy shared fields from dynamic value. - for i, n := 0, outVal.NumField(); i < n; i++ { - if cfgTyp.Field(i).PkgPath != "" { - continue // Field is unexported: ignore. - } - outVal.Field(i).Set(cfgVal.Field(i)) - } - - var err error - *configs, err = readConfigs(cfgVal, outVal.NumField()) - return err -} - -func readConfigs(structVal reflect.Value, startField int) (discovery.Configs, error) { - var ( - configs discovery.Configs - targets []*targetgroup.Group - ) - for i, n := startField, structVal.NumField(); i < n; i++ { - field := structVal.Field(i) - if field.Kind() != reflect.Slice { - panic("discovery: internal error: field is not a slice") - } - for k := 0; k < field.Len(); k++ { - val := field.Index(k) - if val.IsZero() || (val.Kind() == reflect.Ptr && val.Elem().IsZero()) { - key := configFieldNames[field.Type().Elem()] - key = strings.TrimPrefix(key, configFieldPrefix) - return nil, fmt.Errorf("empty or null section in %s", key) - } - switch c := val.Interface().(type) { - case *targetgroup.Group: - // Add index to the static config target groups for unique identification - // within scrape pool. - c.Source = strconv.Itoa(len(targets)) - // Coalesce multiple static configs into a single static config. - targets = append(targets, c) - case discovery.Config: - configs = append(configs, c) - default: - panic("discovery: internal error: slice element is not a Config") - } - } - } - if len(targets) > 0 { - configs = append(configs, discovery.StaticConfig(targets)) - } - return configs, nil -} - -// MarshalYAMLWithInlineConfigs helps implement yaml.Marshal for structs -// that have a Configs field that should be inlined. -func MarshalYAMLWithInlineConfigs(in interface{}) (interface{}, error) { - inVal := reflect.ValueOf(in) - for inVal.Kind() == reflect.Ptr { - inVal = inVal.Elem() - } - inTyp := inVal.Type() - - cfgTyp := getConfigType(inTyp) - cfgPtr := reflect.New(cfgTyp) - cfgVal := cfgPtr.Elem() - - // Copy shared fields to dynamic value. - var configs *discovery.Configs - for i, n := 0, inTyp.NumField(); i < n; i++ { - if inTyp.Field(i).Type == configsType { - configs = inVal.Field(i).Addr().Interface().(*discovery.Configs) - } - if cfgTyp.Field(i).PkgPath != "" { - continue // Field is unexported: ignore. - } - cfgVal.Field(i).Set(inVal.Field(i)) - } - if configs == nil { - return nil, fmt.Errorf("discovery: Configs field not found in type: %T", in) - } - - if err := writeConfigs(cfgVal, *configs); err != nil { - return nil, err - } - - return cfgPtr.Interface(), nil -} - -func writeConfigs(structVal reflect.Value, configs discovery.Configs) error { - targets := structVal.FieldByName(staticConfigsFieldName).Addr().Interface().(*[]*targetgroup.Group) - for _, c := range configs { - if sc, ok := c.(discovery.StaticConfig); ok { - *targets = append(*targets, sc...) - continue - } - fieldName, ok := configFieldNames[reflect.TypeOf(c)] - if !ok { - return fmt.Errorf("discovery: cannot marshal unregistered Config type: %T", c) - } - field := structVal.FieldByName(fieldName) - field.Set(reflect.Append(field, reflect.ValueOf(c))) - } - return nil -} - -func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error { - var e *yaml.TypeError - if errors.As(err, &e) { - oldStr := oldTyp.String() - newStr := newTyp.String() - for i, s := range e.Errors { - e.Errors[i] = strings.ReplaceAll(s, oldStr, newStr) - } - } - return err -} diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 634a6b1d4b..90a91fc920 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -17,13 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/linode/linodego" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -138,10 +138,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*linodeMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go index 3c10650653..7bcaa05ba4 100644 --- a/discovery/linode/linode_test.go +++ b/discovery/linode/linode_test.go @@ -19,10 +19,10 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -238,7 +238,7 @@ func TestLinodeSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdmock.Endpoint()) require.NoError(t, err) diff --git a/discovery/manager.go b/discovery/manager.go index cefa90a866..87e0ecc44b 100644 --- a/discovery/manager.go +++ b/discovery/manager.go @@ -16,14 +16,14 @@ package discovery import ( "context" "fmt" + "log/slog" "reflect" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -81,9 +81,9 @@ func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]Discovere } // NewManager is the Discovery Manager constructor. -func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager { +func NewManager(ctx context.Context, logger *slog.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } mgr := &Manager{ logger: logger, @@ -104,7 +104,7 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re if metrics, err := NewManagerMetrics(registerer, mgr.name); err == nil { mgr.metrics = metrics } else { - level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err) + logger.Error("Failed to create discovery manager metrics", "manager", mgr.name, "err", err) return nil } @@ -141,7 +141,7 @@ func HTTPClientOptions(opts ...config.HTTPClientOption) func(*Manager) { // Manager maintains a set of discovery providers and sends each update to a map channel. // Targets are grouped by the target set name. type Manager struct { - logger log.Logger + logger *slog.Logger name string httpOpts []config.HTTPClientOption mtx sync.RWMutex @@ -294,7 +294,7 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D } func (m *Manager) startProvider(ctx context.Context, p *Provider) { - level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) + m.logger.Debug("Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs)) ctx, cancel := context.WithCancel(ctx) updates := make(chan []*targetgroup.Group) @@ -328,7 +328,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ case tgs, ok := <-updates: m.metrics.ReceivedUpdates.Inc() if !ok { - level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name) + m.logger.Debug("Discoverer channel closed", "provider", p.name) // Wait for provider cancellation to ensure targets are cleaned up when expected. <-ctx.Done() return @@ -364,7 +364,7 @@ func (m *Manager) sender() { case m.syncCh <- m.allGroups(): default: m.metrics.DelayedUpdates.Inc() - level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle") + m.logger.Debug("Discovery receiver's channel was full so will retry the next cycle") select { case m.triggerSend <- struct{}{}: default: @@ -458,12 +458,12 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int { } typ := cfg.Name() d, err := cfg.NewDiscoverer(DiscovererOptions{ - Logger: log.With(m.logger, "discovery", typ, "config", setName), + Logger: m.logger.With("discovery", typ, "config", setName), HTTPClientOptions: m.httpOpts, Metrics: m.sdMetrics[typ], }) if err != nil { - level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName) + m.logger.Error("Cannot create service discovery", "err", err, "type", typ, "config", setName) failed++ return } diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 831cefe514..0ff82d5415 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -15,6 +15,7 @@ package discovery import ( "context" + "errors" "fmt" "sort" "strconv" @@ -22,10 +23,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -675,7 +676,7 @@ func TestTargetUpdatesOrder(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond @@ -791,7 +792,7 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -828,7 +829,7 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -868,7 +869,7 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -911,7 +912,7 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -979,7 +980,7 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1023,7 +1024,7 @@ func TestDiscovererConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1060,7 +1061,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1141,7 +1142,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1202,16 +1203,16 @@ func TestGaugeFailedConfigs(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() c := map[string]Configs{ "prometheus": { - errorConfig{fmt.Errorf("tests error 0")}, - errorConfig{fmt.Errorf("tests error 1")}, - errorConfig{fmt.Errorf("tests error 2")}, + errorConfig{errors.New("tests error 0")}, + errorConfig{errors.New("tests error 1")}, + errorConfig{errors.New("tests error 2")}, }, } discoveryManager.ApplyConfig(c) @@ -1454,7 +1455,7 @@ func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) { reg := prometheus.NewRegistry() _, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) require.NotNil(t, discoveryManager) discoveryManager.updatert = 100 * time.Millisecond go discoveryManager.Run() @@ -1551,7 +1552,7 @@ func TestUnregisterMetrics(t *testing.T) { refreshMetrics, sdMetrics := NewTestMetrics(t, reg) - discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics) + discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics) // discoveryManager will be nil if there was an error configuring metrics. require.NotNil(t, discoveryManager) // Unregister all metrics. diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 38b47accff..9c93e43f51 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math/rand" "net" "net/http" @@ -27,7 +28,6 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -140,10 +140,10 @@ type Discovery struct { } // NewDiscovery returns a new Marathon Discovery. -func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*marathonMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd") diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go index 68f6fe3ccc..13cf20d6dd 100644 --- a/discovery/moby/docker.go +++ b/discovery/moby/docker.go @@ -15,7 +15,9 @@ package moby import ( "context" + "errors" "fmt" + "log/slog" "net" "net/http" "net/url" @@ -28,7 +30,6 @@ import ( "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/network" "github.com/docker/docker/client" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -110,7 +111,7 @@ func (c *DockerSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error return err } if c.Host == "" { - return fmt.Errorf("host missing") + return errors.New("host missing") } if _, err = url.Parse(c.Host); err != nil { return err @@ -128,10 +129,10 @@ type DockerDiscovery struct { } // NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets. -func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { +func NewDockerDiscovery(conf *DockerSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) { m, ok := metrics.(*dockerMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &DockerDiscovery{ diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go index 398393a15a..00e6a3e4f3 100644 --- a/discovery/moby/docker_test.go +++ b/discovery/moby/docker_test.go @@ -19,9 +19,9 @@ import ( "sort" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -226,7 +226,7 @@ host: %s require.NoError(t, metrics.Register()) defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go index b0147467d2..ba42253412 100644 --- a/discovery/moby/dockerswarm.go +++ b/discovery/moby/dockerswarm.go @@ -15,14 +15,15 @@ package moby import ( "context" + "errors" "fmt" + "log/slog" "net/http" "net/url" "time" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/client" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -99,7 +100,7 @@ func (c *DockerSwarmSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) e return err } if c.Host == "" { - return fmt.Errorf("host missing") + return errors.New("host missing") } if _, err = url.Parse(c.Host); err != nil { return err @@ -107,7 +108,7 @@ func (c *DockerSwarmSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) e switch c.Role { case "services", "nodes", "tasks": case "": - return fmt.Errorf("role missing (one of: tasks, services, nodes)") + return errors.New("role missing (one of: tasks, services, nodes)") default: return fmt.Errorf("invalid role %s, expected tasks, services, or nodes", c.Role) } @@ -125,10 +126,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *DockerSwarmSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*dockerswarmMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/moby/mock_test.go b/discovery/moby/mock_test.go index 3f35258c8f..7ef5cb07c3 100644 --- a/discovery/moby/mock_test.go +++ b/discovery/moby/mock_test.go @@ -98,7 +98,7 @@ func (m *SDMock) SetupHandlers() { if len(query) == 2 { h := sha1.New() h.Write([]byte(query[1])) - // Avoing long filenames for Windows. + // Avoiding long filenames for Windows. f += "__" + base64.URLEncoding.EncodeToString(h.Sum(nil))[:10] } } diff --git a/discovery/moby/nodes_test.go b/discovery/moby/nodes_test.go index 4ad1088d1a..973b83c4b6 100644 --- a/discovery/moby/nodes_test.go +++ b/discovery/moby/nodes_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/services_test.go b/discovery/moby/services_test.go index 47ca69e33a..7a966cfeee 100644 --- a/discovery/moby/services_test.go +++ b/discovery/moby/services_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -349,7 +349,7 @@ filters: defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/tasks_test.go b/discovery/moby/tasks_test.go index ef71bc02f5..59d8831c3b 100644 --- a/discovery/moby/tasks_test.go +++ b/discovery/moby/tasks_test.go @@ -18,9 +18,9 @@ import ( "fmt" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" @@ -48,7 +48,7 @@ host: %s defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/moby/testdata/swarmprom/services.json b/discovery/moby/testdata/swarmprom/services.json index 72caa7a7f8..8f6c0793dd 100644 --- a/discovery/moby/testdata/swarmprom/services.json +++ b/discovery/moby/testdata/swarmprom/services.json @@ -224,7 +224,7 @@ "Args": [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", - "--storage.tsdb.retention=24h" + "--storage.tsdb.retention.time=24h" ], "Privileges": { "CredentialSpec": null, diff --git a/discovery/moby/testdata/swarmprom/tasks.json b/discovery/moby/testdata/swarmprom/tasks.json index 33d81f25ce..af5ff9fe28 100644 --- a/discovery/moby/testdata/swarmprom/tasks.json +++ b/discovery/moby/testdata/swarmprom/tasks.json @@ -973,7 +973,7 @@ "Args": [ "--config.file=/etc/prometheus/prometheus.yml", "--storage.tsdb.path=/prometheus", - "--storage.tsdb.retention=24h" + "--storage.tsdb.retention.time=24h" ], "Privileges": { "CredentialSpec": null, diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go index d9c48120ae..7516308026 100644 --- a/discovery/nomad/nomad.go +++ b/discovery/nomad/nomad.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" nomad "github.com/hashicorp/nomad/api" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -121,10 +121,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*nomadMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go index 357d4a8e9b..32b087524c 100644 --- a/discovery/nomad/nomad_test.go +++ b/discovery/nomad/nomad_test.go @@ -21,9 +21,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -160,7 +160,7 @@ func TestNomadSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) tgs, err := d.refresh(context.Background()) diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go index 8964da9294..ec127b1861 100644 --- a/discovery/openstack/hypervisor.go +++ b/discovery/openstack/hypervisor.go @@ -16,10 +16,10 @@ package openstack import ( "context" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors" @@ -43,14 +43,14 @@ type HypervisorDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string - logger log.Logger + logger *slog.Logger port int availability gophercloud.Availability } // newHypervisorDiscovery returns a new hypervisor discovery. func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, - port int, region string, availability gophercloud.Availability, l log.Logger, + port int, region string, availability gophercloud.Availability, l *slog.Logger, ) *HypervisorDiscovery { return &HypervisorDiscovery{ provider: provider, authOpts: opts, diff --git a/discovery/openstack/hypervisor_test.go b/discovery/openstack/hypervisor_test.go index 45684b4a2e..e4a97f32cf 100644 --- a/discovery/openstack/hypervisor_test.go +++ b/discovery/openstack/hypervisor_test.go @@ -93,6 +93,5 @@ func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) + require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) } diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go index 78c669e6f7..2a9e29f2ef 100644 --- a/discovery/openstack/instance.go +++ b/discovery/openstack/instance.go @@ -16,17 +16,17 @@ package openstack import ( "context" "fmt" + "log/slog" "net" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/pagination" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/util/strutil" @@ -52,7 +52,7 @@ type InstanceDiscovery struct { provider *gophercloud.ProviderClient authOpts *gophercloud.AuthOptions region string - logger log.Logger + logger *slog.Logger port int allTenants bool availability gophercloud.Availability @@ -60,10 +60,10 @@ type InstanceDiscovery struct { // NewInstanceDiscovery returns a new instance discovery. func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, - port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger, + port int, region string, allTenants bool, availability gophercloud.Availability, l *slog.Logger, ) *InstanceDiscovery { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } return &InstanceDiscovery{ provider: provider, authOpts: opts, @@ -134,7 +134,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, for _, s := range instanceList { if len(s.Addresses) == 0 { - level.Info(i.logger).Log("msg", "Got no IP address", "instance", s.ID) + i.logger.Info("Got no IP address", "instance", s.ID) continue } @@ -151,7 +151,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, if !nameOk { flavorID, idOk := s.Flavor["id"].(string) if !idOk { - level.Warn(i.logger).Log("msg", "Invalid type for both flavor original_name and flavor id, expected string") + i.logger.Warn("Invalid type for both flavor original_name and flavor id, expected string") continue } labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID) @@ -171,22 +171,22 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, for pool, address := range s.Addresses { md, ok := address.([]interface{}) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected array") + i.logger.Warn("Invalid type for address, expected array") continue } if len(md) == 0 { - level.Debug(i.logger).Log("msg", "Got no IP address", "instance", s.ID) + i.logger.Debug("Got no IP address", "instance", s.ID) continue } for _, address := range md { md1, ok := address.(map[string]interface{}) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected dict") + i.logger.Warn("Invalid type for address, expected dict") continue } addr, ok := md1["addr"].(string) if !ok { - level.Warn(i.logger).Log("msg", "Invalid type for address, expected string") + i.logger.Warn("Invalid type for address, expected string") continue } if _, ok := floatingIPPresent[addr]; ok { diff --git a/discovery/openstack/instance_test.go b/discovery/openstack/instance_test.go index 2b5ac1b89e..2617baa4e3 100644 --- a/discovery/openstack/instance_test.go +++ b/discovery/openstack/instance_test.go @@ -134,6 +134,5 @@ func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := hypervisor.refresh(ctx) - require.Error(t, err) - require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) + require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled) } diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go index c98f78788d..cd0bcc1267 100644 --- a/discovery/openstack/openstack.go +++ b/discovery/openstack/openstack.go @@ -17,10 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "time" - "github.com/go-kit/log" "github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud/openstack" "github.com/mwitkow/go-conntrack" @@ -142,10 +142,10 @@ type refresher interface { } // NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, l *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*openstackMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf, l) @@ -163,7 +163,7 @@ func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetr ), nil } -func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) { var opts gophercloud.AuthOptions if conf.IdentityEndpoint == "" { var err error diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go index a70857a08b..15bb9809c9 100644 --- a/discovery/ovhcloud/dedicated_server.go +++ b/discovery/ovhcloud/dedicated_server.go @@ -16,13 +16,12 @@ package ovhcloud import ( "context" "fmt" + "log/slog" "net/netip" "net/url" "path" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/common/model" @@ -55,10 +54,10 @@ type dedicatedServer struct { type dedicatedServerDiscovery struct { *refresh.Discovery config *SDConfig - logger log.Logger + logger *slog.Logger } -func newDedicatedServerDiscovery(conf *SDConfig, logger log.Logger) *dedicatedServerDiscovery { +func newDedicatedServerDiscovery(conf *SDConfig, logger *slog.Logger) *dedicatedServerDiscovery { return &dedicatedServerDiscovery{config: conf, logger: logger} } @@ -115,10 +114,7 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou for _, dedicatedServerName := range dedicatedServerList { dedicatedServer, err := getDedicatedServerDetails(client, dedicatedServerName) if err != nil { - err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error()) - if err != nil { - return nil, err - } + d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error()) continue } dedicatedServerDetailedList = append(dedicatedServerDetailedList, *dedicatedServer) diff --git a/discovery/ovhcloud/dedicated_server_test.go b/discovery/ovhcloud/dedicated_server_test.go index 52311bcc87..f9dbd6af9c 100644 --- a/discovery/ovhcloud/dedicated_server_test.go +++ b/discovery/ovhcloud/dedicated_server_test.go @@ -21,8 +21,8 @@ import ( "os" "testing" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -41,7 +41,7 @@ application_secret: %s consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecretTest, ovhcloudConsumerKeyTest) require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg)) - d, err := newRefresher(&cfg, log.NewNopLogger()) + d, err := newRefresher(&cfg, promslog.NewNopLogger()) require.NoError(t, err) ctx := context.Background() targetGroups, err := d.refresh(ctx) diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go index 988b4482f2..a75e9694fe 100644 --- a/discovery/ovhcloud/ovhcloud.go +++ b/discovery/ovhcloud/ovhcloud.go @@ -17,10 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "net/netip" "time" - "github.com/go-kit/log" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -137,7 +137,7 @@ func parseIPList(ipList []string) ([]netip.Addr, error) { return ipAddresses, nil } -func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { +func newRefresher(conf *SDConfig, logger *slog.Logger) (refresher, error) { switch conf.Service { case "vps": return newVpsDiscovery(conf, logger), nil @@ -148,10 +148,10 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) { } // NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*ovhcloudMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf, logger) diff --git a/discovery/ovhcloud/ovhcloud_test.go b/discovery/ovhcloud/ovhcloud_test.go index 9c95bf90e6..84a35af3ad 100644 --- a/discovery/ovhcloud/ovhcloud_test.go +++ b/discovery/ovhcloud/ovhcloud_test.go @@ -20,11 +20,11 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/discovery" - "github.com/prometheus/prometheus/util/testutil" ) var ( @@ -121,7 +121,7 @@ func TestParseIPs(t *testing.T) { func TestDiscoverer(t *testing.T) { conf, _ := getMockConf("vps") - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() refreshMetrics := discovery.NewRefreshMetrics(reg) diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go index 58ceeabd87..7050f826a5 100644 --- a/discovery/ovhcloud/vps.go +++ b/discovery/ovhcloud/vps.go @@ -16,13 +16,12 @@ package ovhcloud import ( "context" "fmt" + "log/slog" "net/netip" "net/url" "path" "strconv" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/ovh/go-ovh/ovh" "github.com/prometheus/common/model" @@ -68,10 +67,10 @@ type virtualPrivateServer struct { type vpsDiscovery struct { *refresh.Discovery config *SDConfig - logger log.Logger + logger *slog.Logger } -func newVpsDiscovery(conf *SDConfig, logger log.Logger) *vpsDiscovery { +func newVpsDiscovery(conf *SDConfig, logger *slog.Logger) *vpsDiscovery { return &vpsDiscovery{config: conf, logger: logger} } @@ -133,10 +132,7 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) { for _, vpsName := range vpsList { vpsDetailed, err := getVpsDetails(client, vpsName) if err != nil { - err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error()) - if err != nil { - return nil, err - } + d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error()) continue } vpsDetailedList = append(vpsDetailedList, *vpsDetailed) diff --git a/discovery/ovhcloud/vps_test.go b/discovery/ovhcloud/vps_test.go index 2d2d6dcd21..00d59da7f0 100644 --- a/discovery/ovhcloud/vps_test.go +++ b/discovery/ovhcloud/vps_test.go @@ -23,8 +23,8 @@ import ( yaml "gopkg.in/yaml.v2" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) @@ -43,7 +43,7 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg)) - d, err := newRefresher(&cfg, log.NewNopLogger()) + d, err := newRefresher(&cfg, promslog.NewNopLogger()) require.NoError(t, err) ctx := context.Background() targetGroups, err := d.refresh(ctx) diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 8f89acbf93..561bf78ba7 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -17,8 +17,10 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "io" + "log/slog" "net" "net/http" "net/url" @@ -27,11 +29,11 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/discovery" @@ -108,20 +110,20 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if c.URL == "" { - return fmt.Errorf("URL is missing") + return errors.New("URL is missing") } parsedURL, err := url.Parse(c.URL) if err != nil { return err } if parsedURL.Scheme != "http" && parsedURL.Scheme != "https" { - return fmt.Errorf("URL scheme must be 'http' or 'https'") + return errors.New("URL scheme must be 'http' or 'https'") } if parsedURL.Host == "" { - return fmt.Errorf("host is missing in URL") + return errors.New("host is missing in URL") } if c.Query == "" { - return fmt.Errorf("query missing") + return errors.New("query missing") } return c.HTTPClientConfig.Validate() } @@ -138,14 +140,14 @@ type Discovery struct { } // NewDiscovery returns a new PuppetDB discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*puppetdbMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http") diff --git a/discovery/puppetdb/puppetdb_test.go b/discovery/puppetdb/puppetdb_test.go index bf9c7b215e..4585b78223 100644 --- a/discovery/puppetdb/puppetdb_test.go +++ b/discovery/puppetdb/puppetdb_test.go @@ -22,10 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -70,7 +70,7 @@ func TestPuppetSlashInURL(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) require.Equal(t, apiURL, d.url) @@ -94,7 +94,7 @@ func TestPuppetDBRefresh(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -142,7 +142,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -201,7 +201,7 @@ func TestPuppetDBInvalidCode(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() @@ -229,7 +229,7 @@ func TestPuppetDBInvalidFormat(t *testing.T) { metrics := cfg.NewDiscovererMetrics(reg, refreshMetrics) require.NoError(t, metrics.Register()) - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) ctx := context.Background() diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go index f037a90cff..31646c0e4c 100644 --- a/discovery/refresh/refresh.go +++ b/discovery/refresh/refresh.go @@ -16,17 +16,17 @@ package refresh import ( "context" "errors" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) type Options struct { - Logger log.Logger + Logger *slog.Logger Mech string Interval time.Duration RefreshF func(ctx context.Context) ([]*targetgroup.Group, error) @@ -35,7 +35,7 @@ type Options struct { // Discovery implements the Discoverer interface. type Discovery struct { - logger log.Logger + logger *slog.Logger interval time.Duration refreshf func(ctx context.Context) ([]*targetgroup.Group, error) metrics *discovery.RefreshMetrics @@ -45,9 +45,9 @@ type Discovery struct { func NewDiscovery(opts Options) *Discovery { m := opts.MetricsInstantiator.Instantiate(opts.Mech) - var logger log.Logger + var logger *slog.Logger if opts.Logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } else { logger = opts.Logger } @@ -68,7 +68,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tgs, err := d.refresh(ctx) if err != nil { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + d.logger.Error("Unable to refresh target groups", "err", err.Error()) } } else { select { @@ -87,7 +87,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tgs, err := d.refresh(ctx) if err != nil { if !errors.Is(ctx.Err(), context.Canceled) { - level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error()) + d.logger.Error("Unable to refresh target groups", "err", err.Error()) } continue } diff --git a/discovery/refresh/refresh_test.go b/discovery/refresh/refresh_test.go index b70a326355..0d4460ffa6 100644 --- a/discovery/refresh/refresh_test.go +++ b/discovery/refresh/refresh_test.go @@ -15,7 +15,7 @@ package refresh import ( "context" - "fmt" + "errors" "testing" "time" @@ -64,7 +64,7 @@ func TestRefresh(t *testing.T) { case 2: return tg2, nil } - return nil, fmt.Errorf("some error") + return nil, errors.New("some error") } interval := time.Millisecond diff --git a/discovery/registry.go b/discovery/registry.go index 1f491d4ca9..2401d78fba 100644 --- a/discovery/registry.go +++ b/discovery/registry.go @@ -267,7 +267,7 @@ func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error { func RegisterSDMetrics(registerer prometheus.Registerer, rmm RefreshMetricsManager) (map[string]DiscovererMetrics, error) { err := rmm.Register() if err != nil { - return nil, fmt.Errorf("failed to create service discovery refresh metrics") + return nil, errors.New("failed to create service discovery refresh metrics") } metrics := make(map[string]DiscovererMetrics) @@ -275,7 +275,7 @@ func RegisterSDMetrics(registerer prometheus.Registerer, rmm RefreshMetricsManag currentSdMetrics := conf.NewDiscovererMetrics(registerer, rmm) err = currentSdMetrics.Register() if err != nil { - return nil, fmt.Errorf("failed to create service discovery metrics") + return nil, errors.New("failed to create service discovery metrics") } metrics[conf.Name()] = currentSdMetrics } diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go index f8e1a83f5e..bc9282feaa 100644 --- a/discovery/scaleway/scaleway.go +++ b/discovery/scaleway/scaleway.go @@ -17,12 +17,12 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "os" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -185,10 +185,10 @@ func init() { // the Discoverer interface. type Discovery struct{} -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) { m, ok := metrics.(*scalewayMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } r, err := newRefresher(conf) diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go index 675149f2a3..5ec7b65215 100644 --- a/discovery/triton/triton.go +++ b/discovery/triton/triton.go @@ -19,12 +19,12 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "net/url" "strings" "time" - "github.com/go-kit/log" "github.com/mwitkow/go-conntrack" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -146,10 +146,10 @@ type Discovery struct { } // New returns a new Discovery which periodically refreshes its targets. -func New(logger log.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func New(logger *slog.Logger, conf *SDConfig, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*tritonMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } tls, err := config.NewTLSConfig(&conf.TLSConfig) diff --git a/discovery/triton/triton_test.go b/discovery/triton/triton_test.go index e37693e6bf..b2d06afaf6 100644 --- a/discovery/triton/triton_test.go +++ b/discovery/triton/triton_test.go @@ -21,7 +21,6 @@ import ( "net/http/httptest" "net/url" "strconv" - "strings" "testing" "github.com/prometheus/client_golang/prometheus" @@ -182,8 +181,7 @@ func TestTritonSDRefreshNoServer(t *testing.T) { td, m, _ := newTritonDiscovery(conf) _, err := td.refresh(context.Background()) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint")) + require.ErrorContains(t, err, "an error occurred when requesting targets from the discovery endpoint") m.Unregister() } @@ -193,8 +191,7 @@ func TestTritonSDRefreshCancelled(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cancel() _, err := td.refresh(ctx) - require.Error(t, err) - require.True(t, strings.Contains(err.Error(), context.Canceled.Error())) + require.ErrorContains(t, err, context.Canceled.Error()) m.Unregister() } diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index c8af2f1587..1bd0cd2d49 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -17,6 +17,7 @@ import ( "context" "errors" "fmt" + "log/slog" "net/http" "net/url" "path" @@ -24,7 +25,6 @@ import ( "strings" "time" - "github.com/go-kit/log" "github.com/kolo/xmlrpc" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -41,10 +41,10 @@ const ( uyuniMetaLabelPrefix = model.MetaLabelPrefix + "uyuni_" uyuniLabelMinionHostname = uyuniMetaLabelPrefix + "minion_hostname" uyuniLabelPrimaryFQDN = uyuniMetaLabelPrefix + "primary_fqdn" - uyuniLablelSystemID = uyuniMetaLabelPrefix + "system_id" - uyuniLablelGroups = uyuniMetaLabelPrefix + "groups" - uyuniLablelEndpointName = uyuniMetaLabelPrefix + "endpoint_name" - uyuniLablelExporter = uyuniMetaLabelPrefix + "exporter" + uyuniLabelSystemID = uyuniMetaLabelPrefix + "system_id" + uyuniLabelGroups = uyuniMetaLabelPrefix + "groups" + uyuniLabelEndpointName = uyuniMetaLabelPrefix + "endpoint_name" + uyuniLabelExporter = uyuniMetaLabelPrefix + "exporter" uyuniLabelProxyModule = uyuniMetaLabelPrefix + "proxy_module" uyuniLabelMetricsPath = uyuniMetaLabelPrefix + "metrics_path" uyuniLabelScheme = uyuniMetaLabelPrefix + "scheme" @@ -109,7 +109,7 @@ type Discovery struct { entitlement string separator string interval time.Duration - logger log.Logger + logger *slog.Logger } // NewDiscovererMetrics implements discovery.Config. @@ -212,10 +212,10 @@ func getEndpointInfoForSystems( } // NewDiscovery returns a uyuni discovery for the given configuration. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*uyuniMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } apiURL, err := url.Parse(conf.Server) @@ -270,10 +270,10 @@ func (d *Discovery) getEndpointLabels( model.AddressLabel: model.LabelValue(addr), uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname), uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN), - uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), - uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), - uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName), - uyuniLablelExporter: model.LabelValue(endpoint.ExporterName), + uyuniLabelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), + uyuniLabelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), + uyuniLabelEndpointName: model.LabelValue(endpoint.EndpointName), + uyuniLabelExporter: model.LabelValue(endpoint.ExporterName), uyuniLabelProxyModule: model.LabelValue(endpoint.Module), uyuniLabelMetricsPath: model.LabelValue(endpoint.Path), uyuniLabelScheme: model.LabelValue(scheme), diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go index aaa9c64e47..ee92f01699 100644 --- a/discovery/vultr/vultr.go +++ b/discovery/vultr/vultr.go @@ -15,14 +15,15 @@ package vultr import ( "context" + "errors" "fmt" + "log/slog" "net" "net/http" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" @@ -114,10 +115,10 @@ type Discovery struct { } // NewDiscovery returns a new Discovery which periodically refreshes its targets. -func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { m, ok := metrics.(*vultrMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } d := &Discovery{ diff --git a/discovery/vultr/vultr_test.go b/discovery/vultr/vultr_test.go index 2f12a35529..00ef21e38c 100644 --- a/discovery/vultr/vultr_test.go +++ b/discovery/vultr/vultr_test.go @@ -19,9 +19,9 @@ import ( "net/url" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/discovery" @@ -57,7 +57,7 @@ func TestVultrSDRefresh(t *testing.T) { defer metrics.Unregister() defer refreshMetrics.Unregister() - d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics) + d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics) require.NoError(t, err) endpoint, err := url.Parse(sdMock.Mock.Endpoint()) require.NoError(t, err) diff --git a/discovery/xds/client_test.go b/discovery/xds/client_test.go index b699995fb7..2cf5b2f9cb 100644 --- a/discovery/xds/client_test.go +++ b/discovery/xds/client_test.go @@ -52,16 +52,14 @@ func TestMakeXDSResourceHttpEndpointEmptyServerURLScheme(t *testing.T) { endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("127.0.0.1"), "monitoring") require.Empty(t, endpointURL) - require.Error(t, err) - require.Equal(t, "invalid xDS server URL", err.Error()) + require.EqualError(t, err, "invalid xDS server URL") } func TestMakeXDSResourceHttpEndpointEmptyServerURLHost(t *testing.T) { endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("grpc://127.0.0.1"), "monitoring") require.Empty(t, endpointURL) - require.Error(t, err) - require.Contains(t, err.Error(), "must be either 'http' or 'https'") + require.ErrorContains(t, err, "must be either 'http' or 'https'") } func TestMakeXDSResourceHttpEndpoint(t *testing.T) { diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go index d1d540aaf4..6208e6182a 100644 --- a/discovery/xds/kuma.go +++ b/discovery/xds/kuma.go @@ -14,15 +14,16 @@ package xds import ( + "errors" "fmt" + "log/slog" "net/url" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "google.golang.org/protobuf/types/known/anypb" "github.com/prometheus/prometheus/discovery" @@ -99,7 +100,7 @@ func (c *KumaSDConfig) SetDirectory(dir string) { func (c *KumaSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { logger := opts.Logger if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return NewKumaHTTPDiscovery(c, logger, opts.Metrics) @@ -158,10 +159,10 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L return targets, nil } -func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { +func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (discovery.Discoverer, error) { m, ok := metrics.(*xdsMetrics) if !ok { - return nil, fmt.Errorf("invalid discovery metrics type") + return nil, errors.New("invalid discovery metrics type") } // Default to "prometheus" if hostname is unavailable. @@ -170,7 +171,7 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, metrics discove var err error clientID, err = osutil.GetFQDN() if err != nil { - level.Debug(logger).Log("msg", "error getting FQDN", "err", err) + logger.Debug("error getting FQDN", "err", err) clientID = "prometheus" } } diff --git a/discovery/xds/kuma_mads.pb.go b/discovery/xds/kuma_mads.pb.go index b1079bf23f..210a5343a4 100644 --- a/discovery/xds/kuma_mads.pb.go +++ b/discovery/xds/kuma_mads.pb.go @@ -23,13 +23,14 @@ package xds import ( context "context" + reflect "reflect" + sync "sync" + v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" _ "github.com/envoyproxy/protoc-gen-validate/validate" _ "google.golang.org/genproto/googleapis/api/annotations" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" ) const ( diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go index cfb9cbac50..23d754c4b7 100644 --- a/discovery/xds/kuma_test.go +++ b/discovery/xds/kuma_test.go @@ -201,9 +201,8 @@ func TestKumaMadsV1ResourceParserInvalidResources(t *testing.T) { }} groups, err := kumaMadsV1ResourceParser(resources, KumaMadsV1ResourceTypeURL) require.Nil(t, groups) - require.Error(t, err) - require.Contains(t, err.Error(), "cannot parse") + require.ErrorContains(t, err, "cannot parse") } func TestNewKumaHTTPDiscovery(t *testing.T) { diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go index 8191d6be1a..db55a2b6f7 100644 --- a/discovery/xds/xds.go +++ b/discovery/xds/xds.go @@ -15,11 +15,10 @@ package xds import ( "context" + "log/slog" "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/config" "github.com/prometheus/common/model" "google.golang.org/protobuf/encoding/protojson" @@ -104,7 +103,7 @@ type fetchDiscovery struct { refreshInterval time.Duration parseResources resourceParser - logger log.Logger + logger *slog.Logger metrics *xdsMetrics } @@ -140,7 +139,7 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou } if err != nil { - level.Error(d.logger).Log("msg", "error parsing resources", "err", err) + d.logger.Error("error parsing resources", "err", err) d.metrics.fetchFailuresCount.Inc() return } @@ -153,12 +152,12 @@ func (d *fetchDiscovery) poll(ctx context.Context, ch chan<- []*targetgroup.Grou parsedTargets, err := d.parseResources(response.Resources, response.TypeUrl) if err != nil { - level.Error(d.logger).Log("msg", "error parsing resources", "err", err) + d.logger.Error("error parsing resources", "err", err) d.metrics.fetchFailuresCount.Inc() return } - level.Debug(d.logger).Log("msg", "Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets)) + d.logger.Debug("Updated to version", "version", response.VersionInfo, "targets", len(parsedTargets)) select { case <-ctx.Done(): diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go index 7cce021c5f..db10adc1a2 100644 --- a/discovery/xds/xds_test.go +++ b/discovery/xds/xds_test.go @@ -22,9 +22,9 @@ import ( "time" v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "google.golang.org/protobuf/types/known/anypb" @@ -90,7 +90,7 @@ func constantResourceParser(targets []model.LabelSet, err error) resourceParser } } -var nopLogger = log.NewNopLogger() +var nopLogger = promslog.NewNopLogger() type testResourceClient struct { resourceTypeURL string diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go index 92904dd71c..a1cfe3d055 100644 --- a/discovery/zookeeper/zookeeper.go +++ b/discovery/zookeeper/zookeeper.go @@ -18,15 +18,16 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net" "strconv" "strings" "time" - "github.com/go-kit/log" "github.com/go-zookeeper/zk" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -146,16 +147,16 @@ type Discovery struct { treeCaches []*treecache.ZookeeperTreeCache parse func(data []byte, path string) (model.LabelSet, error) - logger log.Logger + logger *slog.Logger } // NewNerveDiscovery returns a new Discovery for the given Nerve config. -func NewNerveDiscovery(conf *NerveSDConfig, logger log.Logger) (*Discovery, error) { +func NewNerveDiscovery(conf *NerveSDConfig, logger *slog.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseNerveMember) } // NewServersetDiscovery returns a new Discovery for the given serverset config. -func NewServersetDiscovery(conf *ServersetSDConfig, logger log.Logger) (*Discovery, error) { +func NewServersetDiscovery(conf *ServersetSDConfig, logger *slog.Logger) (*Discovery, error) { return NewDiscovery(conf.Servers, time.Duration(conf.Timeout), conf.Paths, logger, parseServersetMember) } @@ -165,11 +166,11 @@ func NewDiscovery( srvs []string, timeout time.Duration, paths []string, - logger log.Logger, + logger *slog.Logger, pf func(data []byte, path string) (model.LabelSet, error), ) (*Discovery, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } conn, _, err := zk.Connect( diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 7d9e5a3c80..dd207dc382 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -15,11 +15,15 @@ The Prometheus monitoring server | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | | --version | Show application version. | | | --config.file | Prometheus configuration file path. | `prometheus.yml` | +| --config.auto-reload-interval | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` | | --web.listen-address ... | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` | +| --auto-gomaxprocs | Automatically set GOMAXPROCS to match Linux container CPU quota | `true` | +| --auto-gomemlimit | Automatically set GOMEMLIMIT to match Linux container or system memory limit | `true` | | --auto-gomemlimit.ratio | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` | | --web.config.file | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | | | --web.read-timeout | Maximum duration before timing out read of the request, and closing idle connections. | `5m` | | --web.max-connections | Maximum number of simultaneous connections across all listeners. | `512` | +| --web.max-notifications-subscribers | Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close. | `16` | | --web.external-url | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | | | --web.route-prefix | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | | | --web.user-assets | Path to static asset directory, available at /user. | | @@ -27,13 +31,13 @@ The Prometheus monitoring server | --web.enable-admin-api | Enable API endpoints for admin control actions. | `false` | | --web.enable-remote-write-receiver | Enable API endpoint accepting remote write requests. | `false` | | --web.remote-write-receiver.accepted-protobuf-messages | List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: prometheus.WriteRequest, io.prometheus.write.v2.Request | `prometheus.WriteRequest` | +| --web.enable-otlp-receiver | Enable API endpoint accepting OTLP write requests. | `false` | | --web.console.templates | Path to the console template directory, available at /consoles. | `consoles` | | --web.console.libraries | Path to the console library directory. | `console_libraries` | | --web.page-title | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` | | --web.cors.origin | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1\|domain2)\.com' | `.*` | | --storage.tsdb.path | Base path for metrics storage. Use with server mode only. | `data/` | -| --storage.tsdb.retention | [DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use "storage.tsdb.retention.time" instead. Use with server mode only. | | -| --storage.tsdb.retention.time | How long to retain samples in storage. When this flag is set it overrides "storage.tsdb.retention". If neither this flag nor "storage.tsdb.retention" nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | | +| --storage.tsdb.retention.time | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | | | --storage.tsdb.retention.size | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | | | --storage.tsdb.no-lockfile | Do not create lockfile in data directory. Use with server mode only. | `false` | | --storage.tsdb.head-chunks-write-queue-size | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` | @@ -56,8 +60,8 @@ The Prometheus monitoring server | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | | --query.max-concurrency | Maximum number of queries executed concurrently. Use with server mode only. | `20` | | --query.max-samples | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` | -| --scrape.name-escaping-scheme | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` | -| --enable-feature ... | Comma separated feature names to enable. Valid options: agent, auto-gomaxprocs, auto-gomemlimit, concurrent-rule-eval, created-timestamp-zero-ingestion, delayed-compaction, exemplar-storage, expand-external-labels, extra-scrape-metrics, memory-snapshot-on-shutdown, native-histograms, new-service-discovery-manager, no-default-scrape-port, otlp-write-receiver, promql-experimental-functions, promql-delayed-name-removal, promql-per-step-stats, remote-write-receiver (DEPRECATED), utf8-names. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --enable-feature ... | Comma separated feature names to enable. Valid options: exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | | +| --agent | Run Prometheus in 'Agent mode'. | | | --log.level | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` | | --log.format | Output format of log messages. One of: [logfmt, json] | `logfmt` | diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 6d74200e65..5e2a8f6bb1 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -15,7 +15,7 @@ Tooling for the Prometheus monitoring system. | -h, --help | Show context-sensitive help (also try --help-long and --help-man). | | --version | Show application version. | | --experimental | Enable experimental commands. | -| --enable-feature ... | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. | +| --enable-feature ... | Comma separated feature names to enable. Currently unused. | @@ -462,6 +462,7 @@ Unit tests for rules. | Flag | Description | Default | | --- | --- | --- | | --run ... | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | | +| --debug | Enable unit test debugging. | `false` | | --diff | [Experimental] Print colored differential output between expected & received output. | `false` | @@ -575,7 +576,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | -| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | +| --sandbox-dir-root | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | @@ -602,7 +603,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | -| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | +| --sandbox-dir-root | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match ... | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | diff --git a/docs/configuration/alerting_rules.md b/docs/configuration/alerting_rules.md index 3c1ec84f0f..cd33dba8e3 100644 --- a/docs/configuration/alerting_rules.md +++ b/docs/configuration/alerting_rules.md @@ -21,10 +21,13 @@ An example rules file with an alert would be: ```yaml groups: - name: example + labels: + team: myteam rules: - alert: HighRequestLatency expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 for: 10m + keep_firing_for: 5m labels: severity: page annotations: @@ -38,6 +41,13 @@ the alert continues to be active during each evaluation for 10 minutes before firing the alert. Elements that are active, but not firing yet, are in the pending state. Alerting rules without the `for` clause will become active on the first evaluation. +There is also an optional `keep_firing_for` clause that tells Prometheus to keep +this alert firing for the specified duration after the firing condition was last met. +This can be used to prevent situations such as flapping alerts, false resolutions +due to lack of data loss, etc. Alerting rules without the `keep_firing_for` clause +will deactivate on the first evaluation where the condition is not met (assuming +any optional `for` duration desribed above has been satisfied). + The `labels` clause allows specifying a set of additional labels to be attached to the alert. Any existing conflicting labels will be overwritten. The label values can be templated. diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index a42126cf22..32da2c61c6 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -71,12 +71,19 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] - # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. - # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. + # Offset the rule evaluation timestamp of this particular group by the + # specified duration into the past to ensure the underlying metrics have + # been received. Metric availability delays are more likely to occur when + # Prometheus is running as a remote write target, but can also occur when + # there's anomalies with scraping. [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with - # external systems (federation, remote storage, Alertmanager). + # external systems (federation, remote storage, Alertmanager). + # Environment variable references `${var}` or `$var` are replaced according + # to the values of the current environment variables. + # References to undefined variables are replaced by the empty string. + # The `$` character can be escaped by using `$$`. external_labels: [ : ... ] @@ -84,33 +91,39 @@ global: # Reloading the configuration will reopen the file. [ query_log_file: ] + # File to which scrape failures are logged. + # Reloading the configuration will reopen the file. + [ scrape_failure_log_file: ] + # An uncompressed response body larger than this many bytes will cause the # scrape to fail. 0 means no limit. Example: 100MB. # This is an experimental feature, this behaviour could # change or be removed in the future. [ body_size_limit: | default = 0 ] - # Per-scrape limit on number of scraped samples that will be accepted. + # Per-scrape limit on the number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabeling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] - # Per-scrape limit on number of labels that will be accepted for a sample. If - # more than this number of labels are present post metric-relabeling, the - # entire scrape will be treated as failed. 0 means no limit. + # Limit on the number of labels that will be accepted per sample. If more + # than this number of labels are present on any sample post metric-relabeling, + # the entire scrape will be treated as failed. 0 means no limit. [ label_limit: | default = 0 ] - # Per-scrape limit on length of labels name that will be accepted for a sample. - # If a label name is longer than this number post metric-relabeling, the entire - # scrape will be treated as failed. 0 means no limit. + # Limit on the length (in bytes) of each individual label name. If any label + # name in a scrape is longer than this number post metric-relabeling, the + # entire scrape will be treated as failed. Note that label names are UTF-8 + # encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_name_length_limit: | default = 0 ] - # Per-scrape limit on length of labels value that will be accepted for a sample. - # If a label value is longer than this number post metric-relabeling, the - # entire scrape will be treated as failed. 0 means no limit. + # Limit on the length (in bytes) of each individual label value. If any label + # value in a scrape is longer than this number post metric-relabeling, the + # entire scrape will be treated as failed. Note that label values are UTF-8 + # encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_value_length_limit: | default = 0 ] - # Per-scrape config limit on number of unique targets that will be + # Limit per scrape config on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. # 0 means no limit. This is an experimental feature, this behaviour could @@ -122,9 +135,9 @@ global: [ keep_dropped_targets: | default = 0 ] # Specifies the validation scheme for metric and label names. Either blank or - # "legacy" for letters, numbers, colons, and underscores; or "utf8" for full - # UTF-8 support. - [ metric_name_validation_scheme | default "legacy" ] + # "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons, + # and underscores. + [ metric_name_validation_scheme | default "utf8" ] runtime: # Configure the Go garbage collector GOGC parameter @@ -158,8 +171,21 @@ remote_write: [ - ... ] # Settings related to the OTLP receiver feature. +# See https://prometheus.io/docs/guides/opentelemetry/ for best practices. otlp: [ promote_resource_attributes: [, ...] | default = [ ] ] + # Configures translation of OTLP metrics when received through the OTLP metrics + # endpoint. Available values: + # - "UnderscoreEscapingWithSuffixes" refers to commonly agreed normalization used + # by OpenTelemetry in https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/translator/prometheus + # - "NoUTF8EscapingWithSuffixes" is a mode that relies on UTF-8 support in Prometheus. + # It preserves all special characters like dots, but it still add required suffixes + # for units and _total like in UnderscoreEscapingWithSuffixes. + [ translation_strategy: | default = "UnderscoreEscapingWithSuffixes" ] + # Enables adding "service.name", "service.namespace" and "service.instance.id" + # resource attributes to the "target_info" metric, on top of converting + # them into the "instance" and "job" labels. + [ keep_identifying_resource_attributes: | default = false] # Settings related to the remote read feature. remote_read: @@ -199,12 +225,18 @@ job_name: # The protocols to negotiate during a scrape with the client. # Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, -# OpenMetricsText1.0.0, PrometheusText0.0.4. +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. [ scrape_protocols: [, ...] | default = ] -# Whether to scrape a classic histogram that is also exposed as a native +# Fallback protocol to use if a scrape returns blank, unparseable, or otherwise +# invalid Content-Type. +# Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1, +# OpenMetricsText1.0.0, PrometheusText0.0.4, PrometheusText1.0.0. +[ fallback_scrape_protocol: ] + +# Whether to scrape a classic histogram, even if it is also exposed as a native # histogram (has no effect without --enable-feature=native-histograms). -[ scrape_classic_histograms: | default = false ] +[ always_scrape_classic_histograms: | default = false ] # The HTTP resource path on which to fetch metrics from targets. [ metrics_path: | default = /metrics ] @@ -260,64 +292,13 @@ params: # response from the scraped target. [ enable_compression: | default = true ] -# Sets the `Authorization` header on every scrape request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] +# File to which scrape failures are logged. +# Reloading the configuration will reopen the file. +[ scrape_failure_log_file: ] -# Sets the `Authorization` header on every scrape request with -# the configured credentials. -authorization: - # Sets the authentication type of the request. - [ type: | default: Bearer ] - # Sets the credentials of the request. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials of the request with the credentials read from the - # configured file. It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether scrape requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Custom HTTP headers to be sent along with each request. -# Headers that are set by Prometheus itself can't be overwritten. -http_headers: - # Header name. - [ : - # Header values. - [ values: [, ...] ] - # Headers values. Hidden in configuration page. - [ secrets: [, ...] ] - # Files to read header values from. - [ files: [, ...] ] ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] # List of Azure service discovery configurations. azure_sd_configs: @@ -446,41 +427,43 @@ metric_relabel_configs: # change or be removed in the future. [ body_size_limit: | default = 0 ] -# Per-scrape limit on number of scraped samples that will be accepted. +# Per-scrape limit on the number of scraped samples that will be accepted. # If more than this number of samples are present after metric relabeling # the entire scrape will be treated as failed. 0 means no limit. [ sample_limit: | default = 0 ] -# Per-scrape limit on number of labels that will be accepted for a sample. If -# more than this number of labels are present post metric-relabeling, the -# entire scrape will be treated as failed. 0 means no limit. +# Limit on the number of labels that will be accepted per sample. If more +# than this number of labels are present on any sample post metric-relabeling, +# the entire scrape will be treated as failed. 0 means no limit. [ label_limit: | default = 0 ] -# Per-scrape limit on length of labels name that will be accepted for a sample. -# If a label name is longer than this number post metric-relabeling, the entire -# scrape will be treated as failed. 0 means no limit. +# Limit on the length (in bytes) of each individual label name. If any label +# name in a scrape is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. Note that label names are UTF-8 +# encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_name_length_limit: | default = 0 ] -# Per-scrape limit on length of labels value that will be accepted for a sample. -# If a label value is longer than this number post metric-relabeling, the -# entire scrape will be treated as failed. 0 means no limit. +# Limit on the length (in bytes) of each individual label value. If any label +# value in a scrape is longer than this number post metric-relabeling, the +# entire scrape will be treated as failed. Note that label values are UTF-8 +# encoded, and characters can take up to 4 bytes. 0 means no limit. [ label_value_length_limit: | default = 0 ] -# Per-scrape config limit on number of unique targets that will be +# Limit per scrape config on number of unique targets that will be # accepted. If more than this number of targets are present after target # relabeling, Prometheus will mark the targets as failed without scraping them. # 0 means no limit. This is an experimental feature, this behaviour could # change in the future. [ target_limit: | default = 0 ] -# Per-job limit on the number of targets dropped by relabeling +# Limit per scrape config on the number of targets dropped by relabeling # that will be kept in memory. 0 means no limit. [ keep_dropped_targets: | default = 0 ] -# Specifies the validation scheme for metric and label names. Either blank or -# "legacy" for letters, numbers, colons, and underscores; or "utf8" for full -# UTF-8 support. -[ metric_name_validation_scheme | default "legacy" ] +# Specifies the validation scheme for metric and label names. Either blank or +# "utf8" for full UTF-8 support, or "legacy" for letters, numbers, colons, and +# underscores. +[ metric_name_validation_scheme | default "utf8" ] # Limit on total number of positive and negative buckets allowed in a single # native histogram. The resolution of a histogram with more buckets will be @@ -532,6 +515,73 @@ metric_relabel_configs: Where `` must be unique across all scrape configurations. +### `` + +A `http_config` allows configuring HTTP requests. + +``` +# Sets the `Authorization` header on every request with the +# configured username and password. +# username and username_file are mutually exclusive. +# password and password_file are mutually exclusive. +basic_auth: + [ username: ] + [ username_file: ] + [ password: ] + [ password_file: ] + +# Sets the `Authorization` header on every request with +# the configured credentials. +authorization: + # Sets the authentication type of the request. + [ type: | default: Bearer ] + # Sets the credentials of the request. It is mutually exclusive with + # `credentials_file`. + [ credentials: ] + # Sets the credentials of the request with the credentials read from the + # configured file. It is mutually exclusive with `credentials`. + [ credentials_file: ] + +# Optional OAuth 2.0 configuration. +# Cannot be used at the same time as basic_auth or authorization. +oauth2: + [ ] + +# Configure whether requests follow HTTP 3xx redirects. +[ follow_redirects: | default = true ] + +# Whether to enable HTTP2. +[ enable_http2: | default: true ] + +# Configures the request's TLS settings. +tls_config: + [ ] + +# Optional proxy URL. +[ proxy_url: ] +# Comma-separated string that can contain IPs, CIDR notation, domain names +# that should be excluded from proxying. IP and domain names can +# contain port numbers. +[ no_proxy: ] +# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) +[ proxy_from_environment: | default: false ] +# Specifies headers to send to proxies during CONNECT requests. +[ proxy_connect_header: + [ : [, ...] ] ] + +# Custom HTTP headers to be sent along with each request. +# Headers that are set by Prometheus itself can't be overwritten. +http_headers: + # Header name. + [ : + # Header values. + [ values: [, ...] ] + # Headers values. Hidden in configuration page. + [ secrets: [, ...] ] + # Files to read header values from. + [ files: [, ...] ] ] +``` + ### `` A `tls_config` allows configuring TLS connections. @@ -608,6 +658,18 @@ tls_config: # Specifies headers to send to proxies during CONNECT requests. [ proxy_connect_header: [ : [, ...] ] ] + +# Custom HTTP headers to be sent along with each request. +# Headers that are set by Prometheus itself can't be overwritten. +http_headers: + # Header name. + [ : + # Header values. + [ values: [, ...] ] + # Headers values. Hidden in configuration page. + [ secrets: [, ...] ] + # Files to read header values from. + [ files: [, ...] ] ] ``` ### `` @@ -661,53 +723,9 @@ subscription_id: # instead be specified in the relabeling rule. [ port: | default = 80 ] -# Authentication information used to authenticate to the Azure API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not support by Azure. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by Azure. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by Azure. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -753,14 +771,17 @@ The following meta labels are available on targets during [relabeling](#relabel_ services: [ - ] -# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more -# about the possible filters that can be used. +# A Consul Filter expression used to filter the catalog results +# See https://www.consul.io/api-docs/catalog#list-services to know more +# about the filter expressions that can be used. +[ filter: ] +# The `tags` and `node_meta` fields are deprecated in Consul in favor of `filter`. # An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list. tags: [ - ] -# Node metadata key/value pairs to filter nodes for a given service. +# Node metadata key/value pairs to filter nodes for a given service. As of Consul 1.14, consider `filter` instead. [ node_meta: [ : ... ] ] @@ -774,53 +795,9 @@ tags: # On large setup it might be a good idea to increase this value because the catalog will change all the time. [ refresh_interval: | default = 30s ] -# Authentication information used to authenticate to the consul server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` Note that the IP number and port used to scrape the targets is assembled as @@ -860,60 +837,15 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_digitalocean_vpc`: the id of the droplet's VPC ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, not currently supported by DigitalOcean. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the droplets are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -945,22 +877,6 @@ See below for the configuration options for Docker discovery: # Address of the Docker daemon. host: -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from, when `role` is nodes, and for discovered # tasks and services that don't have published ports. [ port: | default = 80 ] @@ -984,39 +900,9 @@ tls_config: # The time after which the containers are refreshed. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1125,22 +1011,6 @@ See below for the configuration options for Docker Swarm discovery: # Address of the Docker daemon. host: -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - # Role of the targets to retrieve. Must be `services`, `tasks`, or `nodes`. role: @@ -1161,39 +1031,9 @@ role: # The time after which the service discovery data is refreshed. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1308,53 +1148,9 @@ filters: [ - name: values: , [...] ] -# Authentication information used to authenticate to the EC2 API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not supported by AWS. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by AWS. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutuall exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by AWS. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful @@ -1583,51 +1379,9 @@ query: # The port to scrape metrics from. [ port: | default = 80 ] -# TLS configuration to connect to the PuppetDB. -tls_config: - [ ] - -# basic_auth, authorization, and oauth2, are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# `Authorization` HTTP header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [this example Prometheus configuration file](/documentation/examples/prometheus-puppetdb.yml) @@ -1808,62 +1562,15 @@ The labels below are only available for targets with `role` set to `robot`: # One of robot or hcloud. role: -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, required when role is robot -# Role hcloud does not support basic auth. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, required when role is -# hcloud. Role robot does not support bearer token authentication. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the servers are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -1905,53 +1612,9 @@ url: # Refresh interval to re-query the endpoint. [ refresh_interval: | default = 60s ] -# Authentication information used to authenticate to the API server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -1985,62 +1648,15 @@ following meta labels are available on all targets during # The unique ID of the data center. datacenter_id: -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, required when using IONOS -# Cloud username and password as authentication method. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, required when using IONOS -# Cloud token as authentication method. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the servers are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2158,6 +1774,8 @@ The `endpointslice` role discovers targets from existing endpointslices. For eac address referenced in the endpointslice object one target is discovered. If the endpoint is backed by a pod, all additional container ports of the pod, not bound to an endpoint port, are discovered as targets as well. +The role requires the `discovery.k8s.io/v1` API version (available since Kubernetes v1.21). + Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the endpoints object. @@ -2178,7 +1796,7 @@ Available meta labels: * `__meta_kubernetes_endpointslice_endpoint_topology_present_kubernetes_io_hostname`: Flag that shows if the referenced object has a kubernetes.io/hostname annotation. * `__meta_kubernetes_endpointslice_endpoint_hostname`: Hostname of the referenced endpoint. * `__meta_kubernetes_endpointslice_endpoint_node_name`: Name of the Node hosting the referenced endpoint. - * `__meta_kubernetes_endpointslice_endpoint_zone`: Zone the referenced endpoint exists in (only available when using the `discovery.k8s.io/v1` API group). + * `__meta_kubernetes_endpointslice_endpoint_zone`: Zone the referenced endpoint exists in. * `__meta_kubernetes_endpointslice_port`: Port of the referenced endpoint. * `__meta_kubernetes_endpointslice_port_name`: Named port of the referenced endpoint. * `__meta_kubernetes_endpointslice_port_protocol`: Protocol of the referenced endpoint. @@ -2191,6 +1809,8 @@ The `ingress` role discovers a target for each path of each ingress. This is generally useful for blackbox monitoring of an ingress. The address will be set to the host specified in the ingress spec. +The role requires the `networking.k8s.io/v1` API version (available since Kubernetes v1.19). + Available meta labels: * `__meta_kubernetes_namespace`: The namespace of the ingress object. @@ -2222,54 +1842,6 @@ role: # Note that api_server and kube_config are mutually exclusive. [ kubeconfig_file: ] -# Optional authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # Optional namespace discovery. If omitted, all namespaces are used. namespaces: own_namespace: @@ -2299,6 +1871,10 @@ attach_metadata: # Attaches node metadata to discovered targets. Valid for roles: pod, endpoints, endpointslice. # When set to true, Prometheus must have permissions to get Nodes. [ node: | default = false ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml) @@ -2339,54 +1915,9 @@ server: # The time after which the monitoring assignments are refreshed. [ fetch_timeout: | default = 2m ] -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] - -# Authentication information used to authenticate to the Docker daemon. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional the `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` The [relabeling phase](#relabel_config) is the preferred and more powerful way @@ -2440,53 +1971,9 @@ See below for the configuration options for Lightsail discovery: # instead be specified in the relabeling rule. [ port: | default = 80 ] -# Authentication information used to authenticate to the Lightsail API. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information, currently not supported by AWS. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by AWS. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutuall exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by AWS. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2497,6 +1984,8 @@ This service discovery uses the public IPv4 address by default, by that can be changed with relabeling, as demonstrated in [the Prometheus linode-sd configuration file](/documentation/examples/prometheus-linode.yml). +Linode APIv4 Token must be created with scopes: `linodes:read_only`, `ips:read_only`, and `events:read_only`. + The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_linode_instance_id`: the id of the linode instance @@ -2524,59 +2013,10 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_linode_ipv6_ranges`: a list of IPv6 ranges with mask assigned to the linode instance joined by the tag separator ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. -# Note: Linode APIv4 Token must be created with scopes: 'linodes:read_only', 'ips:read_only', and 'events:read_only' - -# Optional HTTP basic authentication information, not currently supported by Linode APIv4. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional the `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials with the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] # Optional region to filter on. [ region: ] -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] @@ -2585,6 +2025,10 @@ tls_config: # The time after which the linode instances are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2625,55 +2069,9 @@ servers: # It is mutually exclusive with `auth_token` and other authentication mechanisms. [ auth_token_file: ] -# Sets the `Authorization` header on every request with the -# configured username and password. -# This is mutually exclusive with other authentication mechanisms. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -# NOTE: The current version of DC/OS marathon (v1.11.0) does not support -# standard `Authentication` header, use `auth_token` or `auth_token_file` -# instead. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration for connecting to marathon servers -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` By default every app listed in Marathon will be scraped by Prometheus. If not all @@ -2735,53 +2133,9 @@ The following meta labels are available on targets during [relabeling](#relabel_ [ server: ] [ tag_separator: | default = ,] -# Authentication information used to authenticate to the nomad server. -# Note that `basic_auth`, `authorization` and `oauth2` options are -# mutually exclusive. -# `password` and `password_file` are mutually exclusive. - -# Optional HTTP basic authentication information. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -2919,54 +2273,12 @@ See below for the configuration options for Eureka discovery: # The URL to connect to the Eureka server. server: -# Sets the `Authorization` header on every request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - # Refresh interval to re-read the app instance list. [ refresh_interval: | default = 30s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [the Prometheus eureka-sd configuration file](/documentation/examples/prometheus-eureka.yml) @@ -3067,27 +2379,9 @@ tags_filter: # Refresh interval to re-read the targets list. [ refresh_interval: | default = 60s ] -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` ### `` @@ -3127,49 +2421,9 @@ password: # Refresh interval to re-read the managed targets list. [ refresh_interval: | default = 60s ] -# Optional HTTP basic authentication information, currently not supported by Uyuni. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration, currently not supported by Uyuni. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration, currently not supported by Uyuni. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` See [the Prometheus uyuni-sd configuration file](/documentation/examples/prometheus-uyuni.yml) @@ -3204,60 +2458,15 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_vultr_instance_allowed_bandwidth_gb` : Monthly bandwidth quota in GB. ```yaml -# Authentication information used to authenticate to the API server. -# Note that `basic_auth` and `authorization` options are -# mutually exclusive. -# password and password_file are mutually exclusive. - -# Optional HTTP basic authentication information, not currently supported by Vultr. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - -# TLS configuration. -tls_config: - [ ] - # The port to scrape metrics from. [ port: | default = 80 ] # The time after which the instances are refreshed. [ refresh_interval: | default = 60s ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` @@ -3409,25 +2618,6 @@ through the `__alerts_path__` label. # Configures the protocol scheme used for requests. [ scheme: | default = http ] -# Sets the `Authorization` header on every request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - # Optionally configures AWS's Signature Verification 4 signing process to sign requests. # Cannot be set at the same time as basic_auth, authorization, oauth2, azuread or google_iam. # To use the default credentials from the AWS SDK, use `sigv4: {}`. @@ -3447,32 +2637,9 @@ sigv4: # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the scrape request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] # List of Azure service discovery configurations. azure_sd_configs: @@ -3643,24 +2810,11 @@ write_relabel_configs: # For the `io.prometheus.write.v2.Request` message, this option is noop (always true). [ send_native_histograms: | default = false ] -# Sets the `Authorization` header on every remote write request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default = Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] +# When enabled, remote-write will resolve the URL host name via DNS, choose one of the IP addresses at random, and connect to it. +# When disabled, remote-write relies on Go's standard behavior, which is to try to connect to each address in turn. +# The connection timeout applies to the whole operation, i.e. in the latter case it is spread over all attempt. +# This is an experimental feature, and its behavior might still change, or even get removed. +[ round_robin_dns: | default = false ] # Optionally configures AWS's Signature Verification 4 signing process to # sign requests. Cannot be set at the same time as basic_auth, authorization, oauth2, or azuread. @@ -3681,11 +2835,6 @@ sigv4: # AWS Role ARN, an alternative to using AWS API keys. [ role_arn: ] -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth, authorization, sigv4, azuread or google_iam. -oauth2: - [ ] - # Optional AzureAD configuration. # Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or google_iam. azuread: @@ -3712,31 +2861,9 @@ azuread: # Cannot be used at the same time as basic_auth, authorization, oauth2, sigv4 or azuread. # To use the default credentials from the Google Cloud SDK, use `google_iam: {}`. google_iam: - # Service account key with monitoring write permessions. + # Service account key with monitoring write permissions. credentials_file: -# Configures the remote write request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default = false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default = true ] - # Configures the queue used to write to remote storage. queue_config: # Number of samples to buffer per shard before we block reading of more @@ -3778,6 +2905,11 @@ metadata_config: [ send_interval: | default = 1m ] # Maximum number of samples per send. [ max_samples_per_send: | default = 500] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +# enable_http2 defaults to false for remote-write. +[ ] ``` There is a list of @@ -3812,54 +2944,12 @@ headers: # the local storage should have complete data for. [ read_recent: | default = false ] -# Sets the `Authorization` header on every remote read request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [ username: ] - [ password: ] - [ password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [ type: | default: Bearer ] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [ credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [ credentials_file: ] - -# Optional OAuth 2.0 configuration. -# Cannot be used at the same time as basic_auth or authorization. -oauth2: - [ ] - -# Configures the remote read request's TLS settings. -tls_config: - [ ] - -# Optional proxy URL. -[ proxy_url: ] -# Comma-separated string that can contain IPs, CIDR notation, domain names -# that should be excluded from proxying. IP and domain names can -# contain port numbers. -[ no_proxy: ] -# Use proxy URL indicated by environment variables (HTTP_PROXY, https_proxy, HTTPs_PROXY, https_proxy, and no_proxy) -[ proxy_from_environment: | default: false ] -# Specifies headers to send to proxies during CONNECT requests. -[ proxy_connect_header: - [ : [, ...] ] ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[ follow_redirects: | default = true ] - -# Whether to enable HTTP2. -[ enable_http2: | default: true ] - # Whether to use the external labels as selectors for the remote read endpoint. [ filter_external_labels: | default = true ] + +# HTTP client settings, including authentication methods (such as basic auth and +# authorization), proxy configurations, TLS options, custom HTTP headers, etc. +[ ] ``` There is a list of @@ -3870,8 +2960,6 @@ with this feature. `tsdb` lets you configure the runtime-reloadable configuration settings of the TSDB. -NOTE: Out-of-order ingestion is an experimental feature, but you do not need any additional flag to enable it. Setting `out_of_order_time_window` to a positive duration enables it. - ```yaml # Configures how old an out-of-order/out-of-bounds sample can be w.r.t. the TSDB max time. # An out-of-order/out-of-bounds sample is ingested into the TSDB as long as the timestamp diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 9aa226bbc0..9a8e7a70c9 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -89,6 +89,11 @@ name: # Offset the rule evaluation timestamp of this particular group by the specified duration into the past. [ query_offset: | default = global.rule_query_offset ] +# Labels to add or overwrite before storing the result for its rules. +# Labels defined in will override the key if it has a collision. +labels: + [ : ] + rules: [ - ... ] ``` diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 7b07a04d0e..8c0e319f9c 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -11,23 +11,6 @@ Their behaviour can change in future releases which will be communicated via the You can enable them using the `--enable-feature` flag with a comma separated list of features. They may be enabled by default in future versions. -## Expand environment variables in external labels - -`--enable-feature=expand-external-labels` - -Replace `${var}` or `$var` in the [`external_labels`](configuration/configuration.md#configuration-file) -values according to the values of the current environment variables. References -to undefined variables are replaced by the empty string. -The `$` character can be escaped by using `$$`. - -## Remote Write Receiver - -`--enable-feature=remote-write-receiver` - -The remote write receiver allows Prometheus to accept remote write requests from other Prometheus servers. More details can be found [here](storage.md#overview). - -Activating the remote write receiver via a feature flag is deprecated. Use `--web.enable-remote-write-receiver` instead. This feature flag will be ignored in future versions of Prometheus. - ## Exemplars storage `--enable-feature=exemplar-storage` @@ -40,9 +23,8 @@ Exemplar storage is implemented as a fixed size circular buffer that stores exem `--enable-feature=memory-snapshot-on-shutdown` -This takes the snapshot of the chunks that are in memory along with the series information when shutting down and stores -it on disk. This will reduce the startup time since the memory state can be restored with this snapshot and m-mapped -chunks without the need of WAL replay. +This takes a snapshot of the chunks that are in memory along with the series information when shutting down and stores it on disk. This will reduce the startup time since the memory state can now be restored with this snapshot +and m-mapped chunks, while a WAL replay from disk is only needed for the parts of the WAL that are not part of the snapshot. ## Extra scrape metrics @@ -55,30 +37,6 @@ When enabled, for each instance scrape, Prometheus stores a sample in the follow to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`. - `scrape_body_size_bytes`. The uncompressed size of the most recent scrape response, if successful. Scrapes failing because `body_size_limit` is exceeded report `-1`, other scrape failures report `0`. -## New service discovery manager - -`--enable-feature=new-service-discovery-manager` - -When enabled, Prometheus uses a new service discovery manager that does not -restart unchanged discoveries upon reloading. This makes reloads faster and reduces -pressure on service discoveries' sources. - -Users are encouraged to test the new service discovery manager and report any -issues upstream. - -In future releases, this new service discovery manager will become the default and -this feature flag will be ignored. - -## Prometheus agent - -`--enable-feature=agent` - -When enabled, Prometheus runs in agent mode. The agent mode is limited to -discovery, scrape and remote write. - -This is useful when you do not need to query the Prometheus data locally, but -only from a central [remote endpoint](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). - ## Per-step stats `--enable-feature=promql-per-step-stats` @@ -89,29 +47,6 @@ statistics. Currently this is limited to totalQueryableSamples. When disabled in either the engine or the query, per-step statistics are not computed at all. -## Auto GOMAXPROCS - -`--enable-feature=auto-gomaxprocs` - -When enabled, GOMAXPROCS variable is automatically set to match Linux container CPU quota. - -## Auto GOMEMLIMIT - -`--enable-feature=auto-gomemlimit` - -When enabled, the GOMEMLIMIT variable is automatically set to match the Linux container memory limit. If there is no container limit, or the process is running outside of containers, the system memory total is used. - -There is also an additional tuning flag, `--auto-gomemlimit.ratio`, which allows controlling how much of the memory is used for Prometheus. The remainder is reserved for memory outside the process. For example, kernel page cache. Page cache is important for Prometheus TSDB query performance. The default is `0.9`, which means 90% of the memory limit will be used for Prometheus. - -## No default scrape port - -`--enable-feature=no-default-scrape-port` - -When enabled, the default ports for HTTP (`:80`) or HTTPS (`:443`) will _not_ be added to -the address used to scrape a target (the value of the `__address_` label), contrary to the default behavior. -In addition, if a default HTTP or HTTPS port has already been added either in a static configuration or -by a service discovery mechanism and the respective scheme is specified (`http` or `https`), that port will be removed. - ## Native Histograms `--enable-feature=native-histograms` @@ -134,67 +69,7 @@ those classic histograms that do not come with a corresponding native histogram. However, if a native histogram is present, Prometheus will ignore the corresponding classic histogram, with the notable exception of exemplars, which are always ingested. To keep the classic histograms as well, enable -`scrape_classic_histograms` in the scrape job. - -_Note about the format of `le` and `quantile` label values:_ - -In certain situations, the protobuf parsing changes the number formatting of -the `le` labels of classic histograms and the `quantile` labels of -summaries. Typically, this happens if the scraped target is instrumented with -[client_golang](https://github.com/prometheus/client_golang) provided that -[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts) -is set to `false`. In such a case, integer label values are represented in the -text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing -changes the representation to float-like (following the OpenMetrics -specification), so the examples above become `quantile="1.0"` and `le="2.0"` after -ingestion into Prometheus, which changes the identity of the metric compared to -what was ingested before via the text format. - -The effect of this change is that alerts, recording rules and dashboards that -directly reference label values as whole numbers such as `le="1"` will stop -working. - -Aggregation by the `le` and `quantile` labels for vectors that contain the old and -new formatting will lead to unexpected results, and range vectors that span the -transition between the different formatting will contain additional series. -The most common use case for both is the quantile calculation via -`histogram_quantile`, e.g. -`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`. -The `histogram_quantile` function already tries to mitigate the effects to some -extent, but there will be inaccuracies, in particular for shorter ranges that -cover only a few samples. - -Ways to deal with this change either globally or on a per metric basis: - -- Fix references to integer `le`, `quantile` label values, but otherwise do -nothing and accept that some queries that span the transition time will produce -inaccurate or unexpected results. -_This is the recommended solution, to get consistently normalized label values._ -Also Prometheus 3.0 is expected to enforce normalization of these label values. -- Use `metric_relabel_config` to retain the old labels when scraping targets. -This should **only** be applied to metrics that currently produce such labels. - - -```yaml - metric_relabel_configs: - - source_labels: - - quantile - target_label: quantile - regex: (\d+)\.0+ - - source_labels: - - le - - __name__ - target_label: le - regex: (\d+)\.0+;.*_bucket -``` - -## OTLP Receiver - -`--enable-feature=otlp-write-receiver` - -The OTLP receiver allows Prometheus to accept [OpenTelemetry](https://opentelemetry.io/) metrics writes. -Prometheus is best used as a Pull based system, and staleness, `up` metric, and other Pull enabled features -won't work when you push OTLP metrics. +`always_scrape_classic_histograms` in the scrape job. ## Experimental PromQL functions @@ -226,6 +101,12 @@ This has the potential to improve rule group evaluation latency and resource uti The number of concurrent rule evaluations can be configured with `--rules.max-concurrent-rule-evals`, which is set to `4` by default. +## Serve old Prometheus UI + +Fall back to serving the old (Prometheus 2.x) web UI instead of the new UI. The new UI that was released as part of Prometheus 3.0 is a complete rewrite and aims to be cleaner, less cluttered, and more modern under the hood. However, it is not fully feature complete and battle-tested yet, so some users may still prefer using the old UI. + +`--enable-feature=old-ui` + ## Metadata WAL Records `--enable-feature=metadata-wal-records` @@ -258,10 +139,15 @@ When enabled, Prometheus will change the way in which the `__name__` label is re This allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the "vector cannot contain metrics with the same labelset" error, which can happen when applying a regex-matcher to the `__name__` label. -## UTF-8 Name Support +## Auto Reload Config -`--enable-feature=utf8-names` +`--enable-feature=auto-reload-config` -When enabled, changes the metric and label name validation scheme inside Prometheus to allow the full UTF-8 character set. -By itself, this flag does not enable the request of UTF-8 names via content negotiation. -Users will also have to set `metric_name_validation_scheme` in scrape configs to enable the feature either on the global config or on a per-scrape config basis. +When enabled, Prometheus will automatically reload its configuration file at a +specified interval. The interval is defined by the +`--config.auto-reload-interval` flag, which defaults to `30s`. + +Configuration reloads are triggered by detecting changes in the checksum of the +main configuration file or any referenced files, such as rule and scrape +configurations. To ensure consistency and avoid issues during reloads, it's +recommended to update these files atomically. diff --git a/docs/migration.md b/docs/migration.md index cb88bbfd6f..e2d53472f3 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -3,198 +3,216 @@ title: Migration sort_rank: 10 --- -# Prometheus 2.0 migration guide +# Prometheus 3.0 migration guide -In line with our [stability promise](https://prometheus.io/blog/2016/07/18/prometheus-1-0-released/#fine-print), -the Prometheus 2.0 release contains a number of backwards incompatible changes. -This document offers guidance on migrating from Prometheus 1.8 to Prometheus 2.0 and newer versions. +In line with our [stability promise](https://prometheus.io/docs/prometheus/latest/stability/), +the Prometheus 3.0 release contains a number of backwards incompatible changes. +This document offers guidance on migrating from Prometheus 2.x to Prometheus 3.0 and newer versions. ## Flags -The format of Prometheus command line flags has changed. Instead of a -single dash, all flags now use a double dash. Common flags (`--config.file`, -`--web.listen-address` and `--web.external-url`) remain but -almost all storage-related flags have been removed. +- The following feature flags have been removed and they have been added to the + default behavior of Prometheus v3: + - `promql-at-modifier` + - `promql-negative-offset` + - `remote-write-receiver` + - `new-service-discovery-manager` + - `expand-external-labels` + - Environment variable references `${var}` or `$var` in external label values + are replaced according to the values of the current environment variables. + - References to undefined variables are replaced by the empty string. + The `$` character can be escaped by using `$$`. + - `no-default-scrape-port` + - Prometheus v3 will no longer add ports to scrape targets according to the + specified scheme. Target will now appear in labels as configured. + - If you rely on scrape targets like + `https://example.com/metrics` or `http://exmaple.com/metrics` to be + represented as `https://example.com/metrics:443` and + `http://example.com/metrics:80` respectively, add them to your target URLs + - `agent` + - Instead use the dedicated `--agent` CLI flag. + - `auto-gomemlimit` + - Prometheus v3 will automatically set `GOMEMLIMIT` to match the Linux + container memory limit. If there is no container limit, or the process is + running outside of containers, the system memory total is used. To disable + this, `--no-auto-gomemlimit` is available. + - `auto-gomaxprocs` + - Prometheus v3 will automatically set `GOMAXPROCS` to match the Linux + container CPU quota. To disable this, `--no-auto-gomaxprocs` is available. -Some notable flags which have been removed: + Prometheus v3 will log a warning if you continue to pass these to + `--enable-feature`. -- `-alertmanager.url` In Prometheus 2.0, the command line flags for configuring - a static Alertmanager URL have been removed. Alertmanager must now be - discovered via service discovery, see [Alertmanager service discovery](#alertmanager-service-discovery). +## Configuration -- `-log.format` In Prometheus 2.0 logs can only be streamed to standard error. - -- `-query.staleness-delta` has been renamed to `--query.lookback-delta`; Prometheus - 2.0 introduces a new mechanism for handling staleness, see [staleness](querying/basics.md#staleness). - -- `-storage.local.*` Prometheus 2.0 introduces a new storage engine; as such all - flags relating to the old engine have been removed. For information on the - new engine, see [Storage](#storage). - -- `-storage.remote.*` Prometheus 2.0 has removed the deprecated remote - storage flags, and will fail to start if they are supplied. To write to - InfluxDB, Graphite, or OpenTSDB use the relevant storage adapter. - -## Alertmanager service discovery - -Alertmanager service discovery was introduced in Prometheus 1.4, allowing Prometheus -to dynamically discover Alertmanager replicas using the same mechanism as scrape -targets. In Prometheus 2.0, the command line flags for static Alertmanager config -have been removed, so the following command line flag: - -``` -./prometheus -alertmanager.url=http://alertmanager:9093/ -``` - -Would be replaced with the following in the `prometheus.yml` config file: - -```yaml -alerting: - alertmanagers: - - static_configs: - - targets: - - alertmanager:9093 -``` - -You can also use all the usual Prometheus service discovery integrations and -relabeling in your Alertmanager configuration. This snippet instructs -Prometheus to search for Kubernetes pods, in the `default` namespace, with the -label `name: alertmanager` and with a non-empty port. - -```yaml -alerting: - alertmanagers: - - kubernetes_sd_configs: - - role: pod - tls_config: - ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - relabel_configs: - - source_labels: [__meta_kubernetes_pod_label_name] - regex: alertmanager - action: keep - - source_labels: [__meta_kubernetes_namespace] - regex: default - action: keep - - source_labels: [__meta_kubernetes_pod_container_port_number] - regex: - action: drop -``` - -## Recording rules and alerts - -The format for configuring alerting and recording rules has been changed to YAML. -An example of a recording rule and alert in the old format: - -``` -job:request_duration_seconds:histogram_quantile99 = - histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - -ALERT FrontendRequestLatency - IF job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - FOR 5m - ANNOTATIONS { - summary = "High frontend request latency", - } -``` - -Would look like this: - -```yaml -groups: -- name: example.rules - rules: - - record: job:request_duration_seconds:histogram_quantile99 - expr: histogram_quantile(0.99, sum by (le, job) (rate(request_duration_seconds_bucket[1m]))) - - alert: FrontendRequestLatency - expr: job:request_duration_seconds:histogram_quantile99{job="frontend"} > 0.1 - for: 5m - annotations: - summary: High frontend request latency -``` - -To help with the change, the `promtool` tool has a mode to automate the rules conversion. Given a `.rules` file, it will output a `.rules.yml` file in the -new format. For example: - -``` -$ promtool update rules example.rules -``` - -You will need to use `promtool` from [Prometheus 2.5](https://github.com/prometheus/prometheus/releases/tag/v2.5.0) as later versions no longer contain the above subcommand. - -## Storage - -The data format in Prometheus 2.0 has completely changed and is not backwards -compatible with 1.8 and older versions. To retain access to your historic monitoring data we -recommend you run a non-scraping Prometheus instance running at least version -1.8.1 in parallel with your Prometheus 2.0 instance, and have the new server -read existing data from the old one via the remote read protocol. - -Your Prometheus 1.8 instance should be started with the following flags and an -config file containing only the `external_labels` setting (if any): - -``` -$ ./prometheus-1.8.1.linux-amd64/prometheus -web.listen-address ":9094" -config.file old.yml -``` - -Prometheus 2.0 can then be started (on the same machine) with the following flags: - -``` -$ ./prometheus-2.0.0.linux-amd64/prometheus --config.file prometheus.yml -``` - -Where `prometheus.yml` contains in addition to your full existing configuration, the stanza: - -```yaml -remote_read: - - url: "http://localhost:9094/api/v1/read" -``` +- The scrape job level configuration option `scrape_classic_histograms` has been + renamed to `always_scrape_classic_histograms`. If you use the + `--enable-feature=native-histograms` feature flag to ingest native histograms + and you also want to ingest classic histograms that an endpoint might expose + along with native histograms, be sure to add this configuration or change your + configuration from the old name. +- The `http_config.enable_http2` in `remote_write` items default has been + changed to `false`. In Prometheus v2 the remote write http client would + default to use http2. In order to parallelize multiple remote write queues + across multiple sockets its preferable to not default to http2. + If you prefer to use http2 for remote write you must now set + `http_config.enable_http2: true` in your `remote_write` configuration section. ## PromQL -The following features have been removed from PromQL: +- The `.` pattern in regular expressions in PromQL matches newline characters. + With this change a regular expressions like `.*` matches strings that include + `\n`. This applies to matchers in queries and relabel configs. + - For example, the following regular expressions now match the accompanying + strings, whereas in Prometheus v2 these combinations didn't match. + - `.*` additionally matches `foo\n` and `Foo\nBar` + - `foo.?bar` additionally matches `foo\nbar` + - `foo.+bar` additionally matches `foo\nbar` + - If you want Prometheus v3 to behave like v2, you will have to change your + regular expressions by replacing all `.` patterns with `[^\n]`, e.g. + `foo[^\n]*`. +- Lookback and range selectors are left open and right closed (previously left + closed and right closed). This change affects queries when the evaluation time + perfectly aligns with the sample timestamps. For example assume querying a + timeseries with evenly spaced samples exactly 1 minute apart. Before Prometheus + v3, a range query with `5m` would usually return 5 samples. But if the query + evaluation aligns perfectly with a scrape, it would return 6 samples. In + Prometheus v3 queries like this will always return 5 samples. + This change has likely few effects for everyday use, except for some subquery + use cases. + Query front-ends that align queries usually align subqueries to multiples of + the step size. These subqueries will likely be affected. + Tests are more likely to affected. To fix those either adjust the expected + number of samples or extend the range by less than one sample interval. +- The `holt_winters` function has been renamed to `double_exponential_smoothing` + and is now guarded by the `promql-experimental-functions` feature flag. + If you want to keep using `holt_winters`, you have to do both of these things: + - Rename `holt_winters` to `double_exponential_smoothing` in your queries. + - Pass `--enable-feature=promql-experimental-functions` in your Prometheus + CLI invocation. -- `drop_common_labels` function - the `without` aggregation modifier should be used - instead. -- `keep_common` aggregation modifier - the `by` modifier should be used instead. -- `count_scalar` function - use cases are better handled by `absent()` or correct - propagation of labels in operations. +## Scrape protocols +Prometheus v3 is more strict concerning the Content-Type header received when +scraping. Prometheus v2 would default to the standard Prometheus text protocol +if the target being scraped did not specify a Content-Type header or if the +header was unparsable or unrecognised. This could lead to incorrect data being +parsed in the scrape. Prometheus v3 will now fail the scrape in such cases. -See [issue #3060](https://github.com/prometheus/prometheus/issues/3060) for more -details. +If a scrape target is not providing the correct Content-Type header the +fallback protocol can be specified using the `fallback_scrape_protocol` +parameter. See [Prometheus scrape_config documentation.](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config) + +This is a breaking change as scrapes that may have succeeded with Prometheus v2 +may now fail if this fallback protocol is not specified. ## Miscellaneous -### Prometheus non-root user +### TSDB format and downgrade -The Prometheus Docker image is now built to [run Prometheus -as a non-root user](https://github.com/prometheus/prometheus/pull/2859). If you -want the Prometheus UI/API to listen on a low port number (say, port 80), you'll -need to override it. For Kubernetes, you would use the following YAML: +The TSDB format has been changed slightly in Prometheus v2.55 in preparation for changes +to the index format. Consequently, a Prometheus v3 TSDB can only be read by a +Prometheus v2.55 or newer. Keep that in mind when upgrading to v3 -- you will be only +able to downgrade to v2.55, not lower, without losing your TSDB persitent data. + +As an extra safety measure, you could optionally consider upgrading to v2.55 first and +confirm Prometheus works as expected, before upgrading to v3. + +### TSDB storage contract + +TSDB compatible storage is now expected to return results matching the specified +selectors. This might impact some third party implementations, most likely +implementing `remote_read`. + +This contract is not explicitly enforced, but can cause undefined behavior. + +### UTF-8 names + +Prometheus v3 supports UTF-8 in metric and label names. This means metric and +label names can change after upgrading according to what is exposed by +endpoints. Furthermore, metric and label names that would have previously been +flagged as invalid no longer will be. + +Users wishing to preserve the original validation behavior can update their +Prometheus yaml configuration to specify the legacy validation scheme: + +``` +global: + metric_name_validation_scheme: legacy +``` + +Or on a per-scrape basis: + +``` +scrape_configs: + - job_name: job1 + metric_name_validation_scheme: utf8 + - job_name: job2 + metric_name_validation_scheme: legacy +``` + +### Log message format +Prometheus v3 has adopted `log/slog` over the previous `go-kit/log`. This +results in a change of log message format. An example of the old log format is: + +``` +ts=2024-10-23T22:01:06.074Z caller=main.go:627 level=info msg="No time or size retention was set so using the default time retention" duration=15d +ts=2024-10-23T22:01:06.074Z caller=main.go:671 level=info msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=91d80252c3e528728b0f88d254dd720f6be07cb8-modified)" +ts=2024-10-23T22:01:06.074Z caller=main.go:676 level=info build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" +ts=2024-10-23T22:01:06.074Z caller=main.go:677 level=info host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" +``` + +a similar sequence in the new log format looks like this: + +``` +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:640 msg="No time or size retention was set so using the default time retention" duration=15d +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:681 msg="Starting Prometheus Server" mode=server version="(version=, branch=, revision=7c7116fea8343795cae6da42960cacd0207a2af8)" +time=2024-10-24T00:03:07.542+02:00 level=INFO source=/home/user/go/src/github.com/prometheus/prometheus/cmd/prometheus/main.go:686 msg="operational information" build_context="(go=go1.23.0, platform=linux/amd64, user=, date=, tags=unknown)" host_details="(Linux 5.15.0-124-generic #134-Ubuntu SMP Fri Sep 27 20:20:17 UTC 2024 x86_64 gigafips (none))" fd_limits="(soft=1048576, hard=1048576)" vm_limits="(soft=unlimited, hard=unlimited)" +``` + +### `le` and `quantile` label values +In Prometheus v3, the values of the `le` label of classic histograms and the +`quantile` label of summaries are normalized upon ingestion. In Prometheus v2 +the value of these labels depended on the scrape protocol (protobuf vs text +format) in some situations. This led to label values changing based on the +scrape protocol. E.g. a metric exposed as `my_classic_hist{le="1"}` would be +ingested as `my_classic_hist{le="1"}` via the text format, but as +`my_classic_hist{le="1.0"}` via protobuf. This changed the identity of the +metric and caused problems when querying the metric. +In Prometheus v3 these label values will always be normalized to a float like +representation. I.e. the above example will always result in +`my_classic_hist{le="1.0"}` being ingested into prometheus, no matter via which +protocol. The effect of this change is that alerts, recording rules and +dashboards that directly reference label values as whole numbers such as +`le="1"` will stop working. + +Ways to deal with this change either globally or on a per metric basis: + +- Fix references to integer `le`, `quantile` label values, but otherwise do +nothing and accept that some queries that span the transition time will produce +inaccurate or unexpected results. +_This is the recommended solution._ +- Use `metric_relabel_config` to retain the old labels when scraping targets. +This should **only** be applied to metrics that currently produce such labels. ```yaml -apiVersion: v1 -kind: Pod -metadata: - name: security-context-demo-2 -spec: - securityContext: - runAsUser: 0 -... + metric_relabel_configs: + - source_labels: + - quantile + target_label: quantile + regex: (\d+)\.0+ + - source_labels: + - le + - __name__ + target_label: le + regex: (\d+)\.0+;.*_bucket ``` -See [Configure a Security Context for a Pod or Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) -for more details. +### Disallow configuring Alertmanager with the v1 API +Prometheus 3 no longer supports Alertmanager's v1 API. Effectively Prometheus 3 +requires [Alertmanager 0.16.0](https://github.com/prometheus/alertmanager/releases/tag/v0.16.0) or later. Users with older Alertmanager +versions or configurations that use `alerting: alertmanagers: [api_version: v1]` +need to upgrade Alertmanager and change their configuration to use `api_version: v2`. -If you're using Docker, then the following snippet would be used: +# Prometheus 2.0 migration guide -``` -docker run -p 9090:9090 prom/prometheus:latest -``` - -### Prometheus lifecycle - -If you use the Prometheus `/-/reload` HTTP endpoint to [automatically reload your -Prometheus config when it changes](configuration/configuration.md), -these endpoints are disabled by default for security reasons in Prometheus 2.0. -To enable them, set the `--web.enable-lifecycle` flag. +For the Prometheus 1.8 to 2.0 please refer to the [Prometheus v2.55 documentation](https://prometheus.io/docs/prometheus/2.55/migration/). diff --git a/docs/querying/api.md b/docs/querying/api.md index efa244fbc8..87de463288 100644 --- a/docs/querying/api.md +++ b/docs/querying/api.md @@ -59,7 +59,7 @@ timestamps are always represented as Unix timestamps in seconds. * ``: Prometheus [time series selectors](basics.md#time-series-selectors) like `http_requests_total` or `http_requests_total{method=~"(GET|POST)"}` and need to be URL-encoded. -* ``: [Prometheus duration strings](basics.md#time-durations). +* ``: [the subset of Prometheus float literals using time units](basics.md#float-literals-and-time-durations). For example, `5m` refers to a duration of 5 minutes. * ``: boolean values (strings `true` and `false`). @@ -239,6 +239,75 @@ $ curl 'http://localhost:9090/api/v1/format_query?query=foo/bar' } ``` +## Parsing a PromQL expressions into a abstract syntax tree (AST) + +This endpoint is **experimental** and might change in the future. It is currently only meant to be used by Prometheus' own web UI, and the endpoint name and exact format returned may change from one Prometheus version to another. It may also be removed again in case it is no longer needed by the UI. + +The following endpoint parses a PromQL expression and returns it as a JSON-formatted AST (abstract syntax tree) representation: + +``` +GET /api/v1/parse_query +POST /api/v1/parse_query +``` + +URL query parameters: + +- `query=`: Prometheus expression query string. + +You can URL-encode these parameters directly in the request body by using the `POST` method and +`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large +query that may breach server-side URL character limits. + +The `data` section of the query result is a string containing the AST of the parsed query expression. + +The following example parses the expression `foo/bar`: + +```json +$ curl 'http://localhost:9090/api/v1/parse_query?query=foo/bar' +{ + "data" : { + "bool" : false, + "lhs" : { + "matchers" : [ + { + "name" : "__name__", + "type" : "=", + "value" : "foo" + } + ], + "name" : "foo", + "offset" : 0, + "startOrEnd" : null, + "timestamp" : null, + "type" : "vectorSelector" + }, + "matching" : { + "card" : "one-to-one", + "include" : [], + "labels" : [], + "on" : false + }, + "op" : "/", + "rhs" : { + "matchers" : [ + { + "name" : "__name__", + "type" : "=", + "value" : "bar" + } + ], + "name" : "bar", + "offset" : 0, + "startOrEnd" : null, + "timestamp" : null, + "type" : "vectorSelector" + }, + "type" : "binaryExpr" + }, + "status" : "success" +} +``` + ## Querying metadata Prometheus offers a set of API endpoints to query metadata about series and their labels. @@ -364,18 +433,40 @@ URL query parameters: series from which to read the label values. Optional. - `limit=`: Maximum number of returned series. Optional. 0 means disabled. - The `data` section of the JSON response is a list of string label values. -This example queries for all label values for the `job` label: +This example queries for all label values for the `http_status_code` label: ```json -$ curl http://localhost:9090/api/v1/label/job/values +$ curl http://localhost:9090/api/v1/label/http_status_code/values { "status" : "success", "data" : [ - "node", - "prometheus" + "200", + "504" + ] +} +``` + +Label names can optionally be encoded using the Values Escaping method, and is necessary if a name includes the `/` character. To encode a name in this way: + +* Prepend the label with `U__`. +* Letters, numbers, and colons appear as-is. +* Convert single underscores to double underscores. +* For all other characters, use the UTF-8 codepoint as a hex integer, surrounded + by underscores. So ` ` becomes `_20_` and a `.` becomes `_2e_`. + + More information about text escaping can be found in the original UTF-8 [Proposal document](https://github.com/prometheus/proposals/blob/main/proposals/2023-08-21-utf8.md#text-escaping). + +This example queries for all label values for the `http.status_code` label: + +```json +$ curl http://localhost:9090/api/v1/label/U__http_2e_status_code/values +{ + "status" : "success", + "data" : [ + "200", + "404" ] } ``` @@ -499,7 +590,7 @@ Instant vectors are returned as result type `vector`. The corresponding Each series could have the `"value"` key, or the `"histogram"` key, but not both. Series are not guaranteed to be returned in any particular order unless a function -such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label)` +such as [`sort`](functions.md#sort) or [`sort_by_label`](functions.md#sort_by_label) is used. ### Scalars @@ -693,8 +784,10 @@ URL query parameters: - `rule_name[]=`: only return rules with the given rule name. If the parameter is repeated, rules with any of the provided names are returned. If we've filtered out all the rules of a group, the group is not returned. When the parameter is absent or empty, no filtering is done. - `rule_group[]=`: only return rules with the given rule group name. If the parameter is repeated, rules with any of the provided rule group names are returned. When the parameter is absent or empty, no filtering is done. - `file[]=`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done. -- `exclude_alerts=`: only return rules, do not return active alerts. +- `exclude_alerts=`: only return rules, do not return active alerts. - `match[]=`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional. +- `group_limit=`: The `group_limit` parameter allows you to specify a limit for the number of rule groups that is returned in a single response. If the total number of rule groups exceeds the specified `group_limit` value, the response will include a `groupNextToken` property. You can use the value of this `groupNextToken` property in subsequent requests in the `group_next_token` parameter to paginate over the remaining rule groups. The `groupNextToken` property will not be present in the final response, indicating that you have retrieved all the available rule groups. Please note that there are no guarantees regarding the consistency of the response if the rule groups are being modified during the pagination process. +- `group_next_token`: the pagination token that was returned in previous request when the `group_limit` property is set. The pagination token is used to iteratively paginate over a large number of rule groups. To use the `group_next_token` parameter, the `group_limit` parameter also need to be present. If a rule group that coincides with the next token is removed while you are paginating over the rule groups, a response with status code 400 will be returned. ```json $ curl http://localhost:9090/api/v1/rules @@ -834,7 +927,7 @@ curl -G http://localhost:9091/api/v1/targets/metadata \ ``` The following example returns metadata for all metrics for all targets with -label `instance="127.0.0.1:9090`. +label `instance="127.0.0.1:9090"`. ```json curl -G http://localhost:9091/api/v1/targets/metadata \ @@ -1119,9 +1212,11 @@ The following endpoint returns various cardinality statistics about the Promethe GET /api/v1/status/tsdb ``` URL query parameters: + - `limit=`: Limit the number of returned items to a given number for each set of statistics. By default, 10 items are returned. -The `data` section of the query result consists of +The `data` section of the query result consists of: + - **headStats**: This provides the following data about the head block of the TSDB: - **numSeries**: The number of series. - **chunkCount**: The number of chunks. @@ -1197,13 +1292,13 @@ The following endpoint returns information about the WAL replay: GET /api/v1/status/walreplay ``` -**read**: The number of segments replayed so far. -**total**: The total number segments needed to be replayed. -**progress**: The progress of the replay (0 - 100%). -**state**: The state of the replay. Possible states: -- **waiting**: Waiting for the replay to start. -- **in progress**: The replay is in progress. -- **done**: The replay has finished. +- **read**: The number of segments replayed so far. +- **total**: The total number segments needed to be replayed. +- **progress**: The progress of the replay (0 - 100%). +- **state**: The state of the replay. Possible states: + - **waiting**: Waiting for the replay to start. + - **in progress**: The replay is in progress. + - **done**: The replay has finished. ```json $ curl http://localhost:9090/api/v1/status/walreplay @@ -1319,8 +1414,74 @@ is not considered an efficient way of ingesting samples. Use it with caution for specific low-volume use cases. It is not suitable for replacing the ingestion via scraping. -Enable the OTLP receiver by the feature flag -`--enable-feature=otlp-write-receiver`. When enabled, the OTLP receiver +Enable the OTLP receiver by setting +`--web.enable-otlp-receiver`. When enabled, the OTLP receiver endpoint is `/api/v1/otlp/v1/metrics`. *New in v2.47* + +## Notifications + +The following endpoints provide information about active status notifications concerning the Prometheus server itself. +Notifications are used in the web UI. + +These endpoints are **experimental**. They may change in the future. + +### Active Notifications + +The `/api/v1/notifications` endpoint returns a list of all currently active notifications. + +``` +GET /api/v1/notifications +``` + +Example: + +``` +$ curl http://localhost:9090/api/v1/notifications +{ + "status": "success", + "data": [ + { + "text": "Prometheus is shutting down and gracefully stopping all operations.", + "date": "2024-10-07T12:33:08.551376578+02:00", + "active": true + } + ] +} +``` + +*New in v3.0* + +### Live Notifications + +The `/api/v1/notifications/live` endpoint streams live notifications as they occur, using [Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events). Deleted notifications are sent with `active: false`. Active notifications will be sent when connecting to the endpoint. + +``` +GET /api/v1/notifications/live +``` + +Example: + +``` +$ curl http://localhost:9090/api/v1/notifications/live +data: { + "status": "success", + "data": [ + { + "text": "Prometheus is shutting down and gracefully stopping all operations.", + "date": "2024-10-07T12:33:08.551376578+02:00", + "active": true + } + ] +} +``` + +**Note:** The `/notifications/live` endpoint will return a `204 No Content` response if the maximum number of subscribers has been reached. You can set the maximum number of listeners with the flag `--web.max-notifications-subscribers`, which defaults to 16. + +``` +GET /api/v1/notifications/live +204 No Content +``` + +*New in v3.0* diff --git a/docs/querying/basics.md b/docs/querying/basics.md index 81ffb4e0f3..c1c76e7e04 100644 --- a/docs/querying/basics.md +++ b/docs/querying/basics.md @@ -35,8 +35,9 @@ evaluate to one of four types: Depending on the use-case (e.g. when graphing vs. displaying the output of an expression), only some of these types are legal as the result of a -user-specified expression. For example, an expression that returns an instant -vector is the only type which can be graphed. +user-specified expression. +For [instant queries](api.md#instant-queries), any of the above data types are allowed as the root of the expression. +[Range queries](api.md#range-queries) only support scalar-typed and instant-vector-typed expressions. _Notes about the experimental native histograms:_ @@ -68,9 +69,10 @@ Example: 'these are unescaped: \n \\ \t' `these are not unescaped: \n ' " \t` -### Float literals +### Float literals and time durations -Scalar float values can be written as literal integer or floating-point numbers in the format (whitespace only included for better readability): +Scalar float values can be written as literal integer or floating-point numbers +in the format (whitespace only included for better readability): [-+]?( [0-9]*\.?[0-9]+([eE][-+]?[0-9]+)? @@ -87,16 +89,53 @@ Examples: 0x8f -Inf NaN - -As of version 2.54, float literals can also be represented using the syntax of time durations, where the time duration is converted into a float value corresponding to the number of seconds the time duration represents. This is an experimental feature and might still change. +Additionally, underscores (`_`) can be used in between decimal or hexadecimal +digits to improve readability. Examples: - 1s # Equivalent to 1.0 - 2m # Equivalent to 120.0 - 1ms # Equivalent to 0.001 - + 1_000_000 + .123_456_789 + 0x_53_AB_F3_82 + +Float literals are also used to specify durations in seconds. For convenience, +decimal integer numbers may be combined with the following +time units: + +* `ms` – milliseconds +* `s` – seconds – 1s equals 1000ms +* `m` – minutes – 1m equals 60s (ignoring leap seconds) +* `h` – hours – 1h equals 60m +* `d` – days – 1d equals 24h (ignoring so-called daylight saving time) +* `w` – weeks – 1w equals 7d +* `y` – years – 1y equals 365d (ignoring leap days) + +Suffixing a decimal integer number with one of the units above is a different +representation of the equivalent number of seconds as a bare float literal. + +Examples: + + 1s # Equivalent to 1. + 2m # Equivalent to 120. + 1ms # Equivalent to 0.001. + -2h # Equivalent to -7200. + +The following examples do _not_ work: + + 0xABm # No suffixing of hexadecimal numbers. + 1.5h # Time units cannot be combined with a floating point. + +Infd # No suffixing of ±Inf or NaN. + +Multiple units can be combined by concatenation of suffixed integers. Units +must be ordered from the longest to the shortest. A given unit must only appear +once per float literal. + +Examples: + + 1h30m # Equivalent to 5400s and thus 5400. + 12h34m56s # Equivalent to 45296s and thus 45296. + 54s321ms # Equivalent to 54.321. ## Time series selectors @@ -109,8 +148,16 @@ single sample value for each at a given timestamp (point in time). In the simpl form, only a metric name is specified, which results in an instant vector containing elements for all time series that have this metric name. +The value returned will be that of the most recent sample at or before the +query's evaluation timestamp (in the case of an +[instant query](api.md#instant-queries)) +or the current step within the query (in the case of a +[range query](api.md#range-queries)). +The [`@` modifier](#modifier) allows overriding the timestamp relative to which +the selection takes place. Time series are only returned if their most recent sample is less than the [lookback period](#staleness) ago. + This example selects all time series that have the `http_requests_total` metric -name: +name, returning the most recent sample for each: http_requests_total @@ -131,7 +178,7 @@ against regular expressions. The following label matching operators exist: * `=~`: Select labels that regex-match the provided string. * `!~`: Select labels that do not regex-match the provided string. -Regex matches are fully anchored. A match of `env=~"foo"` is treated as `env=~"^foo$"`. +[Regex](#regular-expressions) matches are fully anchored. A match of `env=~"foo"` is treated as `env=~"^foo$"`. For example, this selects all `http_requests_total` time series for `staging`, `testing`, and `development` environments and HTTP methods other than `GET`. @@ -194,59 +241,25 @@ A workaround for this restriction is to use the `__name__` label: {__name__="on"} # Good! -All regular expressions in Prometheus use [RE2 -syntax](https://github.com/google/re2/wiki/Syntax). - ### Range Vector Selectors Range vector literals work like instant vector literals, except that they -select a range of samples back from the current instant. Syntactically, a [time -duration](#time-durations) is appended in square brackets (`[]`) at the end of -a vector selector to specify how far back in time values should be fetched for -each resulting range vector element. The range is a closed interval, -i.e. samples with timestamps coinciding with either boundary of the range are -still included in the selection. +select a range of samples back from the current instant. Syntactically, a +[float literal](#float-literals-and-time-durations) is appended in square +brackets (`[]`) at the end of a vector selector to specify for how many seconds +back in time values should be fetched for each resulting range vector element. +Commonly, the float literal uses the syntax with one or more time units, e.g. +`[5m]`. The range is a left-open and right-closed interval, i.e. samples with +timestamps coinciding with the left boundary of the range are excluded from the +selection, while samples coinciding with the right boundary of the range are +included in the selection. -In this example, we select all the values we have recorded within the last 5 -minutes for all time series that have the metric name `http_requests_total` and -a `job` label set to `prometheus`: +In this example, we select all the values recorded less than 5m ago for all +time series that have the metric name `http_requests_total` and a `job` label +set to `prometheus`: http_requests_total{job="prometheus"}[5m] -### Time Durations - -Time durations are specified as a number, followed immediately by one of the -following units: - -* `ms` - milliseconds -* `s` - seconds -* `m` - minutes -* `h` - hours -* `d` - days - assuming a day always has 24h -* `w` - weeks - assuming a week always has 7d -* `y` - years - assuming a year always has 365d1 - -1 For days in a year, the leap day is ignored, and conversely, for a minute, a leap second is ignored. - -Time durations can be combined by concatenation. Units must be ordered from the -longest to the shortest. A given unit must only appear once in a time duration. - -Here are some examples of valid time durations: - - 5h - 1h30m - 5m - 10s - - -As of version 2.54, time durations can also be represented using the syntax of float literals, implying the number of seconds of the time duration. This is an experimental feature and might still change. - -Examples: - - 1.0 # Equivalent to 1s - 0.001 # Equivalent to 1ms - 120 # Equivalent to 2m - ### Offset modifier The `offset` modifier allows changing the time offset for individual @@ -329,7 +342,7 @@ Note that the `@` modifier allows a query to look ahead of its evaluation time. Subquery allows you to run an instant query for a given range and resolution. The result of a subquery is a range vector. -Syntax: ` '[' ':' [] ']' [ @ ] [ offset ]` +Syntax: ` '[' ':' [] ']' [ @ ] [ offset ]` * `` is optional. Default is the global evaluation interval. @@ -349,6 +362,12 @@ PromQL supports line comments that start with `#`. Example: # This is a comment +## Regular expressions + +All regular expressions in Prometheus use [RE2 syntax](https://github.com/google/re2/wiki/Syntax). + +Regex matches are always fully anchored. + ## Gotchas ### Staleness @@ -358,8 +377,9 @@ independently of the actual present time series data. This is mainly to support cases like aggregation (`sum`, `avg`, and so on), where multiple aggregated time series do not precisely align in time. Because of their independence, Prometheus needs to assign a value at those timestamps for each relevant time -series. It does so by taking the newest sample before this timestamp within the lookback period. -The lookback period is 5 minutes by default. +series. It does so by taking the newest sample that is less than the lookback period ago. +The lookback period is 5 minutes by default, but can be +[set with the `--query.lookback-delta` flag](../command-line/prometheus.md) If a target scrape or rule evaluation no longer returns a sample for a time series that was previously present, this time series will be marked as stale. diff --git a/docs/querying/examples.md b/docs/querying/examples.md index 8287ff6f62..1879a9945c 100644 --- a/docs/querying/examples.md +++ b/docs/querying/examples.md @@ -25,14 +25,11 @@ for the same vector, making it a [range vector](../basics/#range-vector-selector Note that an expression resulting in a range vector cannot be graphed directly, but viewed in the tabular ("Console") view of the expression browser. -Using regular expressions, you could select time series only for jobs whose +Using [regular expressions](./basics.md#regular-expressions), you could select time series only for jobs whose name match a certain pattern, in this case, all jobs that end with `server`: http_requests_total{job=~".*server"} -All regular expressions in Prometheus use [RE2 -syntax](https://github.com/google/re2/wiki/Syntax). - To select all HTTP status codes except 4xx ones, you could run: http_requests_total{status!~"4.."} diff --git a/docs/querying/functions.md b/docs/querying/functions.md index e13628c5c5..facdfce154 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -326,45 +326,70 @@ With native histograms, aggregating everything works as usual without any `by` c histogram_quantile(0.9, sum(rate(http_request_duration_seconds[10m]))) -The `histogram_quantile()` function interpolates quantile values by -assuming a linear distribution within a bucket. +In the (common) case that a quantile value does not coincide with a bucket +boundary, the `histogram_quantile()` function interpolates the quantile value +within the bucket the quantile value falls into. For classic histograms, for +native histograms with custom bucket boundaries, and for the zero bucket of +other native histograms, it assumes a uniform distribution of observations +within the bucket (also called _linear interpolation_). For the +non-zero-buckets of native histograms with a standard exponential bucketing +schema, the interpolation is done under the assumption that the samples within +the bucket are distributed in a way that they would uniformly populate the +buckets in a hypothetical histogram with higher resolution. (This is also +called _exponential interpolation_.) If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned. -The following is only relevant for classic histograms: If `b` contains -fewer than two buckets, `NaN` is returned. The highest bucket must have an -upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located -in the highest bucket, the upper bound of the second highest bucket is -returned. A lower limit of the lowest bucket is assumed to be 0 if the upper -bound of that bucket is greater than -0. In that case, the usual linear interpolation is applied within that -bucket. Otherwise, the upper bound of the lowest bucket is returned for -quantiles located in the lowest bucket. +Special cases for classic histograms: -You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in -a histogram. +* If `b` contains fewer than two buckets, `NaN` is returned. +* The highest bucket must have an upper bound of `+Inf`. (Otherwise, `NaN` is + returned.) +* If a quantile is located in the highest bucket, the upper bound of the second + highest bucket is returned. +* The lower limit of the lowest bucket is assumed to be 0 if the upper bound of + that bucket is greater than 0. In that case, the usual linear interpolation + is applied within that bucket. Otherwise, the upper bound of the lowest + bucket is returned for quantiles located in the lowest bucket. -You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in -a histogram. +Special cases for native histograms (relevant for the exact interpolation +happening within the zero bucket): -Buckets of classic histograms are cumulative. Therefore, the following should always be the case: +* A zero bucket with finite width is assumed to contain no negative + observations if the histogram has observations in positive buckets, but none + in negative buckets. +* A zero bucket with finite width is assumed to contain no positive + observations if the histogram has observations in negative buckets, but none + in positive buckets. -* The counts in the buckets are monotonically increasing (strictly non-decreasing). -* A lack of observations between the upper limits of two consecutive buckets results in equal counts -in those two buckets. +You can use `histogram_quantile(0, v instant-vector)` to get the estimated +minimum value stored in a histogram. -However, floating point precision issues (e.g. small discrepancies introduced by computing of buckets -with `sum(rate(...))`) or invalid data might violate these assumptions. In that case, -`histogram_quantile` would be unable to return meaningful results. To mitigate the issue, -`histogram_quantile` assumes that tiny relative differences between consecutive buckets are happening -because of floating point precision errors and ignores them. (The threshold to ignore a difference -between two buckets is a trillionth (1e-12) of the sum of both buckets.) Furthermore, if there are -non-monotonic bucket counts even after this adjustment, they are increased to the value of the -previous buckets to enforce monotonicity. The latter is evidence for an actual issue with the input -data and is therefore flagged with an informational annotation reading `input to histogram_quantile -needed to be fixed for monotonicity`. If you encounter this annotation, you should find and remove -the source of the invalid data. +You can use `histogram_quantile(1, v instant-vector)` to get the estimated +maximum value stored in a histogram. + +Buckets of classic histograms are cumulative. Therefore, the following should +always be the case: + +* The counts in the buckets are monotonically increasing (strictly + non-decreasing). +* A lack of observations between the upper limits of two consecutive buckets + results in equal counts in those two buckets. + +However, floating point precision issues (e.g. small discrepancies introduced +by computing of buckets with `sum(rate(...))`) or invalid data might violate +these assumptions. In that case, `histogram_quantile` would be unable to return +meaningful results. To mitigate the issue, `histogram_quantile` assumes that +tiny relative differences between consecutive buckets are happening because of +floating point precision errors and ignores them. (The threshold to ignore a +difference between two buckets is a trillionth (1e-12) of the sum of both +buckets.) Furthermore, if there are non-monotonic bucket counts even after this +adjustment, they are increased to the value of the previous buckets to enforce +monotonicity. The latter is evidence for an actual issue with the input data +and is therefore flagged with an informational annotation reading `input to +histogram_quantile needed to be fixed for monotonicity`. If you encounter this +annotation, you should find and remove the source of the invalid data. ## `histogram_stddev()` and `histogram_stdvar()` @@ -380,15 +405,22 @@ do not show up in the returned vector. Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard variance of observations in a native histogram. -## `holt_winters()` +## `double_exponential_smoothing()` -`holt_winters(v range-vector, sf scalar, tf scalar)` produces a smoothed value +**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.** + +`double_exponential_smoothing(v range-vector, sf scalar, tf scalar)` produces a smoothed value for time series based on the range in `v`. The lower the smoothing factor `sf`, the more importance is given to old data. The higher the trend factor `tf`, the more trends in the data is considered. Both `sf` and `tf` must be between 0 and 1. +For additional details, refer to [NIST Engineering Statistics Handbook](https://www.itl.nist.gov/div898/handbook/pmc/section4/pmc433.htm). +In Prometheus V2 this function was called `holt_winters`. This caused confusion +since the Holt-Winters method usually refers to triple exponential smoothing. +Double exponential smoothing as implemented here is also referred to as "Holt +Linear". -`holt_winters` should only be used with gauges. +`double_exponential_smoothing` should only be used with gauges. ## `hour()` @@ -432,6 +464,97 @@ by the number of seconds under the specified time range window, and should be used primarily for human readability. Use `rate` in recording rules so that increases are tracked consistently on a per-second basis. +## `info()` (experimental) + +_The `info` function is an experiment to improve UX +around including labels from [info metrics](https://grafana.com/blog/2021/08/04/how-to-use-promql-joins-for-more-effective-queries-of-prometheus-metrics-at-scale/#info-metrics). +The behavior of this function may change in future versions of Prometheus, +including its removal from PromQL. `info` has to be enabled via the +[feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`._ + +`info(v instant-vector, [data-label-selector instant-vector])` finds, for each time +series in `v`, all info series with matching _identifying_ labels (more on +this later), and adds the union of their _data_ (i.e., non-identifying) labels +to the time series. The second argument `data-label-selector` is optional. +It is not a real instant vector, but uses a subset of its syntax. +It must start and end with curly braces (`{ ... }`) and may only contain label matchers. +The label matchers are used to constrain which info series to consider +and which data labels to add to `v`. + +Identifying labels of an info series are the subset of labels that uniquely +identify the info series. The remaining labels are considered +_data labels_ (also called non-identifying). (Note that Prometheus's concept +of time series identity always includes _all_ the labels. For the sake of the `info` +function, we “logically” define info series identity in a different way than +in the conventional Prometheus view.) The identifying labels of an info series +are used to join it to regular (non-info) series, i.e. those series that have +the same labels as the identifying labels of the info series. The data labels, which are +the ones added to the regular series by the `info` function, effectively encode +metadata key value pairs. (This implies that a change in the data labels +in the conventional Prometheus view constitutes the end of one info series and +the beginning of a new info series, while the “logical” view of the `info` function is +that the same info series continues to exist, just with different “data”.) + +The conventional approach of adding data labels is sometimes called a “join query”, +as illustrated by the following example: + +``` + rate(http_server_request_duration_seconds_count[2m]) +* on (job, instance) group_left (k8s_cluster_name) + target_info +``` + +The core of the query is the expression `rate(http_server_request_duration_seconds_count[2m])`. +But to add data labels from an info metric, the user has to use elaborate +(and not very obvious) syntax to specify which info metric to use (`target_info`), what the +identifying labels are (`on (job, instance)`), and which data labels to add +(`group_left (k8s_cluster_name)`). + +This query is not only verbose and hard to write, it might also run into an “identity crisis”: +If any of the data labels of `target_info` changes, Prometheus sees that as a change of series +(as alluded to above, Prometheus just has no native concept of non-identifying labels). +If the old `target_info` series is not properly marked as stale (which can happen with certain ingestion paths), +the query above will fail for up to 5m (the lookback delta) because it will find a conflicting +match with both the old and the new version of `target_info`. + +The `info` function not only resolves this conflict in favor of the newer series, it also simplifies the syntax +because it knows about the available info series and what their identifying labels are. The example query +looks like this with the `info` function: + +``` +info( + rate(http_server_request_duration_seconds_count[2m]), + {k8s_cluster_name=~".+"} +) +``` + +The common case of adding _all_ data labels can be achieved by +omitting the 2nd argument of the `info` function entirely, simplifying +the example even more: + +``` +info(rate(http_server_request_duration_seconds_count[2m])) +``` + +While `info` normally automatically finds all matching info series, it's possible to +restrict them by providing a `__name__` label matcher, e.g. +`{__name__="target_info"}`. + +### Limitations + +In its current iteration, `info` defaults to considering only info series with +the name `target_info`. It also assumes that the identifying info series labels are +`instance` and `job`. `info` does support other info series names however, through +`__name__` label matchers. E.g., one can explicitly say to consider both +`target_info` and `build_info` as follows: +`{__name__=~"(target|build)_info"}`. However, the identifying labels always +have to be `instance` and `job`. + +These limitations are partially defeating the purpose of the `info` function. +At the current stage, this is an experiment to find out how useful the approach +turns out to be in practice. A final version of the `info` function will indeed +consider all matching info series and with their appropriate identifying labels. + ## `irate()` `irate(v range-vector)` calculates the per-second instant rate of increase of @@ -475,7 +598,7 @@ label_join(up{job="api-server",src1="a",src2="b",src3="c"}, "foo", ",", "src1", ## `label_replace()` For each timeseries in `v`, `label_replace(v instant-vector, dst_label string, replacement string, src_label string, regex string)` -matches the [regular expression](https://github.com/google/re2/wiki/Syntax) `regex` against the value of the label `src_label`. If it +matches the [regular expression](./basics.md#regular-expressions) `regex` against the value of the label `src_label`. If it matches, the value of the label `dst_label` in the returned timeseries will be the expansion of `replacement`, together with the original labels in the input. Capturing groups in the regular expression can be referenced with `$1`, `$2`, etc. Named capturing groups in the regular expression can be referenced with `$name` (where `name` is the capturing group name). If the regular expression doesn't match then the timeseries is returned unchanged. diff --git a/docs/querying/remote_read_api.md b/docs/querying/remote_read_api.md index efbd08e984..76de112342 100644 --- a/docs/querying/remote_read_api.md +++ b/docs/querying/remote_read_api.md @@ -17,7 +17,8 @@ Request are made to the following endpoint. ### Samples -This returns a message that includes a list of raw samples. +This returns a message that includes a list of raw samples matching the +requested query. ### Streamed Chunks diff --git a/docs/stability.md b/docs/stability.md index 1fd2e51e0c..cb30b8ad99 100644 --- a/docs/stability.md +++ b/docs/stability.md @@ -9,7 +9,7 @@ Prometheus promises API stability within a major version, and strives to avoid breaking changes for key features. Some features, which are cosmetic, still under development, or depend on 3rd party services, are not covered by this. -Things considered stable for 2.x: +Things considered stable for 3.x: * The query language and data model * Alerting and recording rules @@ -18,21 +18,25 @@ Things considered stable for 2.x: * Configuration file format (minus the service discovery remote read/write, see below) * Rule/alert file format * Console template syntax and semantics -* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/). +* Remote write sending, per the [1.0 specification](https://prometheus.io/docs/concepts/remote_write_spec/) and receiving +* Agent mode +* OTLP receiver endpoint -Things considered unstable for 2.x: +Things considered unstable for 3.x: * Any feature listed as experimental or subject to change, including: - * The [`holt_winters` PromQL function](https://github.com/prometheus/prometheus/issues/2458) - * Remote write receiving, remote read and the remote read endpoint + * The [`double_exponential_smoothing` PromQL function](https://github.com/prometheus/prometheus/issues/2458) + * Remote read and the remote read endpoint * Server-side HTTPS and basic authentication -* Service discovery integrations, with the exception of `static_configs` and `file_sd_configs` +* Service discovery integrations, with the exception of `static_configs`, `file_sd_configs` and `http_sd_config` * Go APIs of packages that are part of the server * HTML generated by the web UI * The metrics in the /metrics endpoint of Prometheus itself * Exact on-disk format. Potential changes however, will be forward compatible and transparently handled by Prometheus * The format of the logs +Prometheus 2.x stability guarantees can be found [in the 2.x documentation](https://prometheus.io/docs/prometheus/2.55/stability/). + As long as you are not using any features marked as experimental/unstable, an upgrade within a major version can usually be performed without any operational adjustments and very little risk that anything will break. Any breaking changes diff --git a/docs/storage.md b/docs/storage.md index 55d4309d37..e625e9c225 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -87,10 +87,9 @@ or 31 days, whichever is smaller. Prometheus has several flags that configure local storage. The most important are: - `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`. -- `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is - set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention` - nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`. - Supported units: y, w, d, h, m, s, ms. +- `--storage.tsdb.retention.time`: How long to retain samples in storage. If neither + this flag nor `storage.tsdb.retention.size` is set, the retention time defaults to + `15d`. Supported units: y, w, d, h, m, s, ms. - `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only @@ -98,7 +97,6 @@ Prometheus has several flags that configure local storage. The most important ar chunks are counted in the total size. So the minimum requirement for the disk is the peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` (m-mapped Head chunks) directory combined (peaks every 2 hours). -- `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`. - `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL). Depending on your data, you can expect the WAL size to be halved with little extra cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0. @@ -117,13 +115,12 @@ time series you scrape (fewer targets or fewer series per target), or you can increase the scrape interval. However, reducing the number of series is likely more effective, due to compression of samples within a series. -If your local storage becomes corrupted for whatever reason, the best -strategy to address the problem is to shut down Prometheus then remove the -entire storage directory. You can also try removing individual block directories, -or the WAL directory to resolve the problem. Note that this means losing -approximately two hours data per block directory. Again, Prometheus's local -storage is not intended to be durable long-term storage; external solutions -offer extended retention and data durability. +If your local storage becomes corrupted to the point where Prometheus will not +start it is recommended to backup the storage directory and restore the +corrupted block directories from your backups. If you do not have backups the +last resort is to remove the corrupted files. For example you can try removing +individual block directories or the write-ahead-log (wal) files. Note that this +means losing the data for the time range those blocks or wal covers. CAUTION: Non-POSIX compliant filesystems are not supported for Prometheus' local storage as unrecoverable corruptions may happen. NFS filesystems @@ -146,7 +143,7 @@ a buffer, ensuring that older entries will be removed before the allocated stora for Prometheus becomes full. At present, we recommend setting the retention size to, at most, 80-85% of your -allocated Prometheus disk space. This increases the likelihood that older entires +allocated Prometheus disk space. This increases the likelihood that older entries will be removed prior to hitting any disk limitations. ## Remote storage integrations @@ -157,31 +154,27 @@ a set of interfaces that allow integrating with remote storage systems. ### Overview -Prometheus integrates with remote storage systems in three ways: +Prometheus integrates with remote storage systems in four ways: -- Prometheus can write samples that it ingests to a remote URL in a standardized format. -- Prometheus can receive samples from other Prometheus servers in a standardized format. -- Prometheus can read (back) sample data from a remote URL in a standardized format. +- Prometheus can write samples that it ingests to a remote URL in a [Remote Write format](https://prometheus.io/docs/specs/remote_write_spec_2_0/). +- Prometheus can receive samples from other clients in a [Remote Write format](https://prometheus.io/docs/specs/remote_write_spec_2_0/). +- Prometheus can read (back) sample data from a remote URL in a [Remote Read format](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto#L31). +- Prometheus can return sample data requested by clients in a [Remote Read format](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto#L31). ![Remote read and write architecture](images/remote_integrations.png) -The read and write protocols both use a snappy-compressed protocol buffer encoding over -HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC -over HTTP/2 in the future, when all hops between Prometheus and the remote storage can -safely be assumed to support HTTP/2. +The remote read and write protocols both use a snappy-compressed protocol buffer encoding over +HTTP. The read protocol is not yet considered as stable API. -For details on configuring remote storage integrations in Prometheus, see the +The write protocol has a [stable specification for 1.0 version](https://prometheus.io/docs/specs/remote_write_spec/) +and [experimental specification for 2.0 version](https://prometheus.io/docs/specs/remote_write_spec_2_0/), +both supported by Prometheus server. + +For details on configuring remote storage integrations in Prometheus as a client, see the [remote write](configuration/configuration.md#remote_write) and [remote read](configuration/configuration.md#remote_read) sections of the Prometheus configuration documentation. -The built-in remote write receiver can be enabled by setting the -`--web.enable-remote-write-receiver` command line flag. When enabled, -the remote write receiver endpoint is `/api/v1/write`. - -For details on the request and response messages, see the -[remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto). - Note that on the read path, Prometheus only fetches raw series data for a set of label selectors and time ranges from the remote end. All PromQL evaluation on the raw data still happens in Prometheus itself. This means that remote read queries @@ -189,6 +182,11 @@ have some scalability limit, since all necessary data needs to be loaded into th querying Prometheus server first and then processed there. However, supporting fully distributed evaluation of PromQL was deemed infeasible for the time being. +Prometheus also serves both protocols. The built-in remote write receiver can be enabled +by setting the `--web.enable-remote-write-receiver` command line flag. When enabled, +the remote write receiver endpoint is `/api/v1/write`. The remote read endpoint is +available on [`/api/v1/read`](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/). + ### Existing integrations To learn more about existing integrations with remote storage systems, see the diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index 8ccbafe6f1..128132a8d2 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "net" "net/http" "os" @@ -26,10 +27,9 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" prom_discovery "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -41,7 +41,7 @@ var ( a = kingpin.New("sd adapter usage", "Tool to generate file_sd target files for unimplemented SD mechanisms.") outputFile = a.Flag("output.file", "Output file for file_sd compatible file.").Default("custom_sd.json").String() listenAddress = a.Flag("listen.address", "The address the Consul HTTP API is listening on for requests.").Default("localhost:8500").String() - logger log.Logger + logger *slog.Logger // addressLabel is the name for the label containing a target's address. addressLabel = model.MetaLabelPrefix + "consul_address" @@ -90,7 +90,7 @@ type discovery struct { address string refreshInterval int tagSeparator string - logger log.Logger + logger *slog.Logger oldSourceList map[string]bool } @@ -164,7 +164,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { var srvs map[string][]string resp, err := http.Get(fmt.Sprintf("http://%s/v1/catalog/services", d.address)) if err != nil { - level.Error(d.logger).Log("msg", "Error getting services list", "err", err) + d.logger.Error("Error getting services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -173,7 +173,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { io.Copy(io.Discard, resp.Body) resp.Body.Close() if err != nil { - level.Error(d.logger).Log("msg", "Error reading services list", "err", err) + d.logger.Error("Error reading services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -181,7 +181,7 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { err = json.Unmarshal(b, &srvs) resp.Body.Close() if err != nil { - level.Error(d.logger).Log("msg", "Error parsing services list", "err", err) + d.logger.Error("Error parsing services list", "err", err) time.Sleep(time.Duration(d.refreshInterval) * time.Second) continue } @@ -200,13 +200,13 @@ func (d *discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { } resp, err := http.Get(fmt.Sprintf("http://%s/v1/catalog/service/%s", d.address, name)) if err != nil { - level.Error(d.logger).Log("msg", "Error getting services nodes", "service", name, "err", err) + d.logger.Error("Error getting services nodes", "service", name, "err", err) break } tg, err := d.parseServiceNodes(resp, name) if err != nil { - level.Error(d.logger).Log("msg", "Error parsing services nodes", "service", name, "err", err) + d.logger.Error("Error parsing services nodes", "service", name, "err", err) break } tgs = append(tgs, tg) @@ -254,8 +254,7 @@ func main() { fmt.Println("err: ", err) return } - logger = log.NewSyncLogger(log.NewLogfmtLogger(os.Stdout)) - logger = log.With(logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller) + logger = promslog.New(&promslog.Config{}) ctx := context.Background() @@ -272,7 +271,7 @@ func main() { } if err != nil { - level.Error(logger).Log("msg", "failed to create discovery metrics", "err", err) + logger.Error("failed to create discovery metrics", "err", err) os.Exit(1) } @@ -280,7 +279,7 @@ func main() { refreshMetrics := prom_discovery.NewRefreshMetrics(reg) metrics, err := prom_discovery.RegisterSDMetrics(reg, refreshMetrics) if err != nil { - level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err) + logger.Error("failed to register service discovery metrics", "err", err) os.Exit(1) } diff --git a/documentation/examples/custom-sd/adapter/adapter.go b/documentation/examples/custom-sd/adapter/adapter.go index dcf5a2b78c..b242c4eaa0 100644 --- a/documentation/examples/custom-sd/adapter/adapter.go +++ b/documentation/examples/custom-sd/adapter/adapter.go @@ -18,13 +18,12 @@ import ( "context" "encoding/json" "fmt" + "log/slog" "os" "path/filepath" "reflect" "sort" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" @@ -55,7 +54,7 @@ type Adapter struct { manager *discovery.Manager output string name string - logger log.Logger + logger *slog.Logger } func mapToArray(m map[string]*customSD) []customSD { @@ -106,7 +105,7 @@ func (a *Adapter) refreshTargetGroups(allTargetGroups map[string][]*targetgroup. a.groups = tempGroups err := a.writeOutput() if err != nil { - level.Error(log.With(a.logger, "component", "sd-adapter")).Log("err", err) + a.logger.With("component", "sd-adapter").Error("failed to write output", "err", err) } } } @@ -163,7 +162,7 @@ func (a *Adapter) Run() { } // NewAdapter creates a new instance of Adapter. -func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger log.Logger, sdMetrics map[string]discovery.DiscovererMetrics, registerer prometheus.Registerer) *Adapter { +func NewAdapter(ctx context.Context, file, name string, d discovery.Discoverer, logger *slog.Logger, sdMetrics map[string]discovery.DiscovererMetrics, registerer prometheus.Registerer) *Adapter { return &Adapter{ ctx: ctx, disc: d, diff --git a/documentation/examples/prometheus-otlp.yml b/documentation/examples/prometheus-otlp.yml new file mode 100644 index 0000000000..f0a8ab8b11 --- /dev/null +++ b/documentation/examples/prometheus-otlp.yml @@ -0,0 +1,31 @@ +# my global config +global: + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + +otlp: + # Recommended attributes to be promoted to labels. + promote_resource_attributes: + - service.instance.id + - service.name + - service.namespace + - cloud.availability_zone + - cloud.region + - container.name + - deployment.environment.name + - k8s.cluster.name + - k8s.container.name + - k8s.cronjob.name + - k8s.daemonset.name + - k8s.deployment.name + - k8s.job.name + - k8s.namespace.name + - k8s.pod.name + - k8s.replicaset.name + - k8s.statefulset.name + # Ingest OTLP data keeping UTF-8 characters in metric/label names. + translation_strategy: NoUTF8EscapingWithSuffixes + +storage: + # OTLP is a push-based protocol, Out of order samples is a common scenario. + tsdb: + out_of_order_time_window: 30m diff --git a/documentation/examples/prometheus-ovhcloud.yml b/documentation/examples/prometheus-ovhcloud.yml index 21facad1ca..b2cc60af25 100644 --- a/documentation/examples/prometheus-ovhcloud.yml +++ b/documentation/examples/prometheus-ovhcloud.yml @@ -1,4 +1,4 @@ -# An example scrape configuration for running Prometheus with Ovhcloud. +# An example scrape configuration for running Prometheus with OVHcloud. scrape_configs: - job_name: 'ovhcloud' ovhcloud_sd_configs: diff --git a/documentation/examples/remote_storage/go.mod b/documentation/examples/remote_storage/go.mod index e5e052469b..298236a8d5 100644 --- a/documentation/examples/remote_storage/go.mod +++ b/documentation/examples/remote_storage/go.mod @@ -1,17 +1,16 @@ module github.com/prometheus/prometheus/documentation/examples/remote_storage -go 1.21.0 +go 1.22.0 require ( github.com/alecthomas/kingpin/v2 v2.4.0 - github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 - github.com/influxdata/influxdb v1.11.5 - github.com/prometheus/client_golang v1.20.0 - github.com/prometheus/common v0.57.0 + github.com/influxdata/influxdb v1.11.8 + github.com/prometheus/client_golang v1.20.5 + github.com/prometheus/common v0.60.1 github.com/prometheus/prometheus v0.53.1 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 ) require ( @@ -26,6 +25,7 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/dennwc/varint v1.0.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-kit/log v0.2.1 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect @@ -55,11 +55,11 @@ require ( go.opentelemetry.io/otel/trace v1.27.0 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.25.0 // indirect - golang.org/x/net v0.27.0 // indirect - golang.org/x/oauth2 v0.21.0 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/text v0.16.0 // indirect + golang.org/x/crypto v0.27.0 // indirect + golang.org/x/net v0.29.0 // indirect + golang.org/x/oauth2 v0.23.0 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/text v0.18.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect google.golang.org/grpc v1.65.0 // indirect diff --git a/documentation/examples/remote_storage/go.sum b/documentation/examples/remote_storage/go.sum index 34c474ef89..bd11b9d04e 100644 --- a/documentation/examples/remote_storage/go.sum +++ b/documentation/examples/remote_storage/go.sum @@ -166,8 +166,8 @@ github.com/hetznercloud/hcloud-go/v2 v2.9.0 h1:s0N6R7Zoi2DPfMtUF5o9VeUBzTtHVY6MI github.com/hetznercloud/hcloud-go/v2 v2.9.0/go.mod h1:qtW/TuU7Bs16ibXl/ktJarWqU2LwHr7eGlwoilHxtgg= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/influxdata/influxdb v1.11.5 h1:+em5VOl6lhAZubXj5o6SobCwvrRs3XDlBx/MUI4schI= -github.com/influxdata/influxdb v1.11.5/go.mod h1:k8sWREQl1/9t46VrkrH5adUM4UNGIt206ipO3plbkw8= +github.com/influxdata/influxdb v1.11.8 h1:lX8MJDfk91O7nqzzonQkjk87gOeQy9V/Xp3gpELhG1s= +github.com/influxdata/influxdb v1.11.8/go.mod h1:zRTAuk/Ie/V1LGxJUv8jfDmfv+ypz22lxfhc1MxC3rI= github.com/ionos-cloud/sdk-go/v6 v6.1.11 h1:J/uRN4UWO3wCyGOeDdMKv8LWRzKu6UIkLEaes38Kzh8= github.com/ionos-cloud/sdk-go/v6 v6.1.11/go.mod h1:EzEgRIDxBELvfoa/uBN0kOQaqovLjUWEB7iW4/Q+t4k= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= @@ -253,8 +253,8 @@ github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXP github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= -github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -264,8 +264,8 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVhoNcY= -github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= @@ -293,8 +293,8 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/vultr/govultr/v2 v2.17.2 h1:gej/rwr91Puc/tgh+j33p/BLR16UrIPnSr+AIwYWZQs= github.com/vultr/govultr/v2 v2.17.2/go.mod h1:ZFOKGWmgjytfyjeyAdhQlSWwTjh2ig+X49cAp50dzXI= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= @@ -323,8 +323,8 @@ golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnf golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= +golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -344,20 +344,20 @@ golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81R golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= +golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= -golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -373,17 +373,17 @@ golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= +golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= +golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= +golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go index 36242a8f4d..b02560dbab 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/graphite/client.go @@ -16,19 +16,19 @@ package graphite import ( "bytes" "fmt" + "log/slog" "math" "net" "sort" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" ) // Client allows sending batches of Prometheus samples to Graphite. type Client struct { - logger log.Logger + logger *slog.Logger address string transport string @@ -37,9 +37,9 @@ type Client struct { } // NewClient creates a new Client. -func NewClient(logger log.Logger, address, transport string, timeout time.Duration, prefix string) *Client { +func NewClient(logger *slog.Logger, address, transport string, timeout time.Duration, prefix string) *Client { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Client{ logger: logger, @@ -93,7 +93,7 @@ func (c *Client) Write(samples model.Samples) error { t := float64(s.Timestamp.UnixNano()) / 1e9 v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send value to Graphite, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send value to Graphite, skipping sample", "value", v, "sample", s) continue } fmt.Fprintf(&buf, "%s %f %f\n", k, v, t) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go index e84ed9e129..6ae40f8173 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/influxdb/client.go @@ -17,22 +17,22 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "math" "os" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" influx "github.com/influxdata/influxdb/client/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/prompb" ) // Client allows sending batches of Prometheus samples to InfluxDB. type Client struct { - logger log.Logger + logger *slog.Logger client influx.Client database string @@ -41,16 +41,16 @@ type Client struct { } // NewClient creates a new Client. -func NewClient(logger log.Logger, conf influx.HTTPConfig, db, rp string) *Client { +func NewClient(logger *slog.Logger, conf influx.HTTPConfig, db, rp string) *Client { c, err := influx.NewHTTPClient(conf) // Currently influx.NewClient() *should* never return an error. if err != nil { - level.Error(logger).Log("err", err) + logger.Error("Error creating influx HTTP client", "err", err) os.Exit(1) } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Client{ @@ -84,7 +84,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send to InfluxDB, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send to InfluxDB, skipping sample", "value", v, "sample", s) c.ignoredSamples.Inc() continue } diff --git a/documentation/examples/remote_storage/remote_storage_adapter/main.go b/documentation/examples/remote_storage/remote_storage_adapter/main.go index bb348aba7f..7f62990d2e 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/main.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/main.go @@ -17,6 +17,7 @@ package main import ( "fmt" "io" + "log/slog" "net/http" _ "net/http/pprof" "net/url" @@ -26,16 +27,14 @@ import ( "time" "github.com/alecthomas/kingpin/v2" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" influx "github.com/influxdata/influxdb/client/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" - "github.com/prometheus/common/promlog/flag" + "github.com/prometheus/common/promslog" + "github.com/prometheus/common/promslog/flag" "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/graphite" "github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/influxdb" @@ -57,7 +56,7 @@ type config struct { remoteTimeout time.Duration listenAddr string telemetryPath string - promlogConfig promlog.Config + promslogConfig promslog.Config } var ( @@ -105,11 +104,11 @@ func main() { cfg := parseFlags() http.Handle(cfg.telemetryPath, promhttp.Handler()) - logger := promlog.New(&cfg.promlogConfig) + logger := promslog.New(&cfg.promslogConfig) writers, readers := buildClients(logger, cfg) if err := serve(logger, cfg.listenAddr, writers, readers); err != nil { - level.Error(logger).Log("msg", "Failed to listen", "addr", cfg.listenAddr, "err", err) + logger.Error("Failed to listen", "addr", cfg.listenAddr, "err", err) os.Exit(1) } } @@ -120,7 +119,7 @@ func parseFlags() *config { cfg := &config{ influxdbPassword: os.Getenv("INFLUXDB_PW"), - promlogConfig: promlog.Config{}, + promslogConfig: promslog.Config{}, } a.Flag("graphite-address", "The host:port of the Graphite server to send samples to. None, if empty."). @@ -146,7 +145,7 @@ func parseFlags() *config { a.Flag("web.telemetry-path", "Address to listen on for web endpoints."). Default("/metrics").StringVar(&cfg.telemetryPath) - flag.AddFlags(a, &cfg.promlogConfig) + flag.AddFlags(a, &cfg.promslogConfig) _, err := a.Parse(os.Args[1:]) if err != nil { @@ -168,19 +167,19 @@ type reader interface { Name() string } -func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { +func buildClients(logger *slog.Logger, cfg *config) ([]writer, []reader) { var writers []writer var readers []reader if cfg.graphiteAddress != "" { c := graphite.NewClient( - log.With(logger, "storage", "Graphite"), + logger.With("storage", "Graphite"), cfg.graphiteAddress, cfg.graphiteTransport, cfg.remoteTimeout, cfg.graphitePrefix) writers = append(writers, c) } if cfg.opentsdbURL != "" { c := opentsdb.NewClient( - log.With(logger, "storage", "OpenTSDB"), + logger.With("storage", "OpenTSDB"), cfg.opentsdbURL, cfg.remoteTimeout, ) @@ -189,7 +188,7 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { if cfg.influxdbURL != "" { url, err := url.Parse(cfg.influxdbURL) if err != nil { - level.Error(logger).Log("msg", "Failed to parse InfluxDB URL", "url", cfg.influxdbURL, "err", err) + logger.Error("Failed to parse InfluxDB URL", "url", cfg.influxdbURL, "err", err) os.Exit(1) } conf := influx.HTTPConfig{ @@ -199,7 +198,7 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { Timeout: cfg.remoteTimeout, } c := influxdb.NewClient( - log.With(logger, "storage", "InfluxDB"), + logger.With("storage", "InfluxDB"), conf, cfg.influxdbDatabase, cfg.influxdbRetentionPolicy, @@ -208,15 +207,15 @@ func buildClients(logger log.Logger, cfg *config) ([]writer, []reader) { writers = append(writers, c) readers = append(readers, c) } - level.Info(logger).Log("msg", "Starting up...") + logger.Info("Starting up...") return writers, readers } -func serve(logger log.Logger, addr string, writers []writer, readers []reader) error { +func serve(logger *slog.Logger, addr string, writers []writer, readers []reader) error { http.HandleFunc("/write", func(w http.ResponseWriter, r *http.Request) { req, err := remote.DecodeWriteRequest(r.Body) if err != nil { - level.Error(logger).Log("msg", "Read error", "err", err.Error()) + logger.Error("Read error", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -238,21 +237,21 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e http.HandleFunc("/read", func(w http.ResponseWriter, r *http.Request) { compressed, err := io.ReadAll(r.Body) if err != nil { - level.Error(logger).Log("msg", "Read error", "err", err.Error()) + logger.Error("Read error", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } reqBuf, err := snappy.Decode(nil, compressed) if err != nil { - level.Error(logger).Log("msg", "Decode error", "err", err.Error()) + logger.Error("Decode error", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } var req prompb.ReadRequest if err := proto.Unmarshal(reqBuf, &req); err != nil { - level.Error(logger).Log("msg", "Unmarshal error", "err", err.Error()) + logger.Error("Unmarshal error", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -267,7 +266,7 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e var resp *prompb.ReadResponse resp, err = reader.Read(&req) if err != nil { - level.Warn(logger).Log("msg", "Error executing query", "query", req, "storage", reader.Name(), "err", err) + logger.Warn("Error executing query", "query", req, "storage", reader.Name(), "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -283,7 +282,7 @@ func serve(logger log.Logger, addr string, writers []writer, readers []reader) e compressed = snappy.Encode(nil, data) if _, err := w.Write(compressed); err != nil { - level.Warn(logger).Log("msg", "Error writing response", "storage", reader.Name(), "err", err) + logger.Warn("Error writing response", "storage", reader.Name(), "err", err) } }) @@ -309,12 +308,12 @@ func protoToSamples(req *prompb.WriteRequest) model.Samples { return samples } -func sendSamples(logger log.Logger, w writer, samples model.Samples) { +func sendSamples(logger *slog.Logger, w writer, samples model.Samples) { begin := time.Now() err := w.Write(samples) duration := time.Since(begin).Seconds() if err != nil { - level.Warn(logger).Log("msg", "Error sending samples to remote storage", "err", err, "storage", w.Name(), "num_samples", len(samples)) + logger.Warn("Error sending samples to remote storage", "err", err, "storage", w.Name(), "num_samples", len(samples)) failedSamples.WithLabelValues(w.Name()).Add(float64(len(samples))) } sentSamples.WithLabelValues(w.Name()).Add(float64(len(samples))) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go index abb1d0b7d3..433c70527a 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client.go @@ -19,13 +19,12 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "math" "net/http" "net/url" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" ) @@ -36,14 +35,14 @@ const ( // Client allows sending batches of Prometheus samples to OpenTSDB. type Client struct { - logger log.Logger + logger *slog.Logger url string timeout time.Duration } // NewClient creates a new Client. -func NewClient(logger log.Logger, url string, timeout time.Duration) *Client { +func NewClient(logger *slog.Logger, url string, timeout time.Duration) *Client { return &Client{ logger: logger, url: url, @@ -78,7 +77,7 @@ func (c *Client) Write(samples model.Samples) error { for _, s := range samples { v := float64(s.Value) if math.IsNaN(v) || math.IsInf(v, 0) { - level.Debug(c.logger).Log("msg", "Cannot send value to OpenTSDB, skipping sample", "value", v, "sample", s) + c.logger.Debug("Cannot send value to OpenTSDB, skipping sample", "value", v, "sample", s) continue } metric := TagValue(s.Metric[model.MetricNameLabel]) diff --git a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client_test.go b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client_test.go index a30448e766..bc9703c88c 100644 --- a/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client_test.go +++ b/documentation/examples/remote_storage/remote_storage_adapter/opentsdb/client_test.go @@ -43,14 +43,14 @@ func TestMarshalStoreSamplesRequest(t *testing.T) { Value: 3.1415, Tags: tagsFromMetric(metric), } - expectedJSON := []byte(`{"metric":"test_.metric","timestamp":4711,"value":3.1415,"tags":{"many_chars":"abc_21ABC_.012-3_2145_C3_B667_7E89./","testlabel":"test_.value"}}`) + expectedJSON := `{"metric":"test_.metric","timestamp":4711,"value":3.1415,"tags":{"many_chars":"abc_21ABC_.012-3_2145_C3_B667_7E89./","testlabel":"test_.value"}}` resultingJSON, err := json.Marshal(request) require.NoError(t, err, "Marshal(request) resulted in err.") - require.Equal(t, expectedJSON, resultingJSON) + require.JSONEq(t, expectedJSON, string(resultingJSON)) var unmarshaledRequest StoreSamplesRequest - err = json.Unmarshal(expectedJSON, &unmarshaledRequest) + err = json.Unmarshal([]byte(expectedJSON), &unmarshaledRequest) require.NoError(t, err, "Unmarshal(expectedJSON, &unmarshaledRequest) resulted in err.") require.Equal(t, request, unmarshaledRequest) } diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index 563daab801..9a6de90d82 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -84,8 +84,8 @@ severity: 'warning', }, annotations: { - summary: 'Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.', - description: '{{ printf "%%.1f" $value }}%% errors while sending alerts from Prometheus %(prometheusName)s to Alertmanager {{$labels.alertmanager}}.' % $._config, + summary: 'More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors.', + description: '{{ printf "%%.1f" $value }}%% of alerts sent by Prometheus %(prometheusName)s to Alertmanager {{$labels.alertmanager}} were affected by errors.' % $._config, }, }, { diff --git a/go.mod b/go.mod index 9a4a35c4d1..ca76db8249 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,12 @@ module github.com/prometheus/prometheus -go 1.21.0 +go 1.22.0 -toolchain go1.22.5 +toolchain go1.23.0 require ( - github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0 - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4 v4.3.0 github.com/Code-Hex/go-generics-cache v1.5.1 @@ -17,34 +17,32 @@ require ( github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 github.com/cespare/xxhash/v2 v2.3.0 github.com/dennwc/varint v1.0.0 - github.com/digitalocean/godo v1.119.0 - github.com/docker/docker v27.1.1+incompatible - github.com/edsrzf/mmap-go v1.1.0 - github.com/envoyproxy/go-control-plane v0.12.0 + github.com/digitalocean/godo v1.131.0 + github.com/docker/docker v27.3.1+incompatible + github.com/edsrzf/mmap-go v1.2.0 + github.com/envoyproxy/go-control-plane v0.13.1 github.com/envoyproxy/protoc-gen-validate v1.1.0 github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb github.com/fsnotify/fsnotify v1.7.0 - github.com/go-kit/log v0.2.1 - github.com/go-logfmt/logfmt v0.6.0 github.com/go-openapi/strfmt v0.23.0 - github.com/go-zookeeper/zk v1.0.3 + github.com/go-zookeeper/zk v1.0.4 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/google/go-cmp v0.6.0 github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da github.com/google/uuid v1.6.0 - github.com/gophercloud/gophercloud v1.14.0 + github.com/gophercloud/gophercloud v1.14.1 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc github.com/grpc-ecosystem/grpc-gateway v1.16.0 - github.com/hashicorp/consul/api v1.29.4 + github.com/hashicorp/consul/api v1.30.0 github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 - github.com/hetznercloud/hcloud-go/v2 v2.13.1 + github.com/hetznercloud/hcloud-go/v2 v2.17.0 github.com/ionos-cloud/sdk-go/v6 v6.2.1 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.17.9 + github.com/klauspost/compress v1.17.11 github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b - github.com/linode/linodego v1.38.0 - github.com/miekg/dns v1.1.61 + github.com/linode/linodego v1.43.0 + github.com/miekg/dns v1.1.62 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 @@ -52,52 +50,52 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.20.2 + github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.56.0 + github.com/prometheus/common v0.60.1 github.com/prometheus/common/assets v0.2.0 - github.com/prometheus/common/sigv4 v0.1.0 - github.com/prometheus/exporter-toolkit v0.11.0 - github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 + github.com/prometheus/exporter-toolkit v0.13.1 + github.com/prometheus/sigv4 v0.1.0 + github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 github.com/vultr/govultr/v2 v2.17.2 - go.opentelemetry.io/collector/pdata v1.12.0 - go.opentelemetry.io/collector/semconv v0.105.0 - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 - go.opentelemetry.io/otel v1.28.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 - go.opentelemetry.io/otel/sdk v1.28.0 - go.opentelemetry.io/otel/trace v1.28.0 + go.opentelemetry.io/collector/pdata v1.18.0 + go.opentelemetry.io/collector/semconv v0.112.0 + go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 + go.opentelemetry.io/otel v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 + go.opentelemetry.io/otel/sdk v1.31.0 + go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/atomic v1.11.0 - go.uber.org/automaxprocs v1.5.3 + go.uber.org/automaxprocs v1.6.0 go.uber.org/goleak v1.3.0 go.uber.org/multierr v1.11.0 - golang.org/x/oauth2 v0.22.0 - golang.org/x/sync v0.7.0 - golang.org/x/sys v0.22.0 - golang.org/x/text v0.16.0 - golang.org/x/time v0.5.0 - golang.org/x/tools v0.23.0 - google.golang.org/api v0.190.0 - google.golang.org/genproto/googleapis/api v0.0.0-20240725223205-93522f1f2a9f - google.golang.org/grpc v1.65.0 - google.golang.org/protobuf v1.34.2 + golang.org/x/oauth2 v0.23.0 + golang.org/x/sync v0.9.0 + golang.org/x/sys v0.27.0 + golang.org/x/text v0.20.0 + golang.org/x/tools v0.27.0 + google.golang.org/api v0.204.0 + google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 + google.golang.org/grpc v1.67.1 + google.golang.org/protobuf v1.35.2 gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 - k8s.io/api v0.29.3 - k8s.io/apimachinery v0.29.3 - k8s.io/client-go v0.29.3 + k8s.io/api v0.31.3 + k8s.io/apimachinery v0.31.3 + k8s.io/client-go v0.31.3 k8s.io/klog v1.0.0 k8s.io/klog/v2 v2.130.1 ) require ( - cloud.google.com/go/auth v0.7.3 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect - cloud.google.com/go/compute/metadata v0.5.0 // indirect + cloud.google.com/go/auth v0.10.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect + cloud.google.com/go/compute/metadata v0.5.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect @@ -106,7 +104,7 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cilium/ebpf v0.11.0 // indirect - github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b // indirect + github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect github.com/containerd/cgroups/v3 v3.0.3 // indirect github.com/containerd/log v0.1.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect @@ -115,11 +113,10 @@ require ( github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect - github.com/evanphx/json-patch v5.6.0+incompatible // indirect github.com/fatih/color v1.16.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/ghodss/yaml v1.0.0 // indirect - github.com/go-kit/kit v0.12.0 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/analysis v0.22.2 // indirect @@ -130,20 +127,20 @@ require ( github.com/go-openapi/spec v0.20.14 // indirect github.com/go-openapi/swag v0.22.9 // indirect github.com/go-openapi/validate v0.23.0 // indirect - github.com/go-resty/resty/v2 v2.13.1 // indirect + github.com/go-resty/resty/v2 v2.15.3 // indirect github.com/godbus/dbus/v5 v5.0.4 // indirect github.com/golang-jwt/jwt/v5 v5.2.1 // indirect - github.com/golang/glog v1.2.1 // indirect + github.com/golang/glog v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/s2a-go v0.1.8 // indirect - github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/gorilla/websocket v1.5.0 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect github.com/hashicorp/cronexpr v1.1.2 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect @@ -163,6 +160,8 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mdlayher/socket v0.4.1 // indirect + github.com/mdlayher/vsock v1.2.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect @@ -176,35 +175,34 @@ require ( github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pkg/errors v0.9.1 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/objx v0.5.2 // indirect + github.com/x448/float16 v0.8.4 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.mongodb.org/mongo-driver v1.14.0 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect - golang.org/x/crypto v0.25.0 // indirect + golang.org/x/crypto v0.29.0 // indirect golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect - golang.org/x/mod v0.19.0 // indirect - golang.org/x/net v0.27.0 // indirect - golang.org/x/term v0.22.0 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect + golang.org/x/mod v0.22.0 // indirect + golang.org/x/net v0.31.0 // indirect + golang.org/x/term v0.26.0 // indirect + golang.org/x/time v0.7.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gotest.tools/v3 v3.0.3 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect - k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.3.0 // indirect -) - -replace ( - k8s.io/klog => github.com/simonpasquier/klog-gokit v0.3.0 - k8s.io/klog/v2 => github.com/simonpasquier/klog-gokit/v3 v3.3.0 + sigs.k8s.io/yaml v1.4.0 // indirect ) // Exclude linodego v1.0.0 as it is no longer published on github. diff --git a/go.sum b/go.sum index 7e763af265..ccf1d9e376 100644 --- a/go.sum +++ b/go.sum @@ -1,45 +1,17 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= -cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= -cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= -cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= -cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= -cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= -cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= -cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= -cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= -cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= -cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= -cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= -cloud.google.com/go/auth v0.7.3 h1:98Vr+5jMaCZ5NZk6e/uBgf60phTk/XN84r8QEWB9yjY= -cloud.google.com/go/auth v0.7.3/go.mod h1:HJtWUx1P5eqjy/f6Iq5KeytNpbAcGolPhOgyop2LlzA= -cloud.google.com/go/auth/oauth2adapt v0.2.3 h1:MlxF+Pd3OmSudg/b1yZ5lJwoXCEaeedAguodky1PcKI= -cloud.google.com/go/auth/oauth2adapt v0.2.3/go.mod h1:tMQXOfZzFuNuUxOypHlQEXgdfX5cuhwU+ffUuXRJE8I= -cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= -cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= -cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= -cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= -cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= -cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute/metadata v0.5.0 h1:Zr0eK8JbFv6+Wi4ilXAR8FJ3wyNdpxHKJNPos6LTZOY= -cloud.google.com/go/compute/metadata v0.5.0/go.mod h1:aHnloV2TPI38yx4s9+wAZhHykWvVCfu7hQbF+9CWoiY= -cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= -cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= -cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= -cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= -cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= -cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= -cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= -cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= -cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= -cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= -dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0 h1:GJHeeA2N7xrG3q30L2UXDyuWRzDM900/65j70wcM4Ww= -github.com/Azure/azure-sdk-for-go/sdk/azcore v1.13.0/go.mod h1:l38EPgmsp71HHLq9j7De57JcKOWPyhrsW1Awm1JS6K0= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0 h1:tfLQ34V6F7tVSwoTf/4lH5sE0o6eCJuNDTmH09nDpbc= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.7.0/go.mod h1:9kIvujWAA58nmPmWB1m23fyWic1kYZMxD9CxaWn4Qpg= +cloud.google.com/go/auth v0.10.0 h1:tWlkvFAh+wwTOzXIjrwM64karR1iTBZ/GRr0S/DULYo= +cloud.google.com/go/auth v0.10.0/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI= +cloud.google.com/go/auth/oauth2adapt v0.2.5 h1:2p29+dePqsCHPP1bqDJcKj4qxRyYCcbzKpFyKGt3MTk= +cloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= +cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo= +cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 h1:JZg6HRh6W6U4OLl6lk7BZ7BLisIzM9dG1R50zUk9C/M= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0/go.mod h1:YL1xnZ6QejvQHWJrX/AvhFl4WW4rqHVoKspWNVwFk0M= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0 h1:B/dfvscEQtew9dVuoxqxrUKKv8Ih2f55PydknDamU+g= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.8.0/go.mod h1:fiPSssYvltE08HJchL04dOy+RD4hgrjph0cwGGMntdI= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.0 h1:+m0M/LFxN43KvULkDNfdXOgrjtg6UYJPFBJyuEcRCAw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.0/go.mod h1:PwOyop78lveYMRs6oCxjiVyBdyCgIYH6XHIVZO9/SFQ= github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 h1:ywEEhmNahHBihViHepv3xPBn1663uRv2t2q/ESv9seY= github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkYRNWPENUnqx6bJ2xnSDFI2tjwZNuY= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5 v5.7.0 h1:LkHbJbgF3YyvC53aqYGR+wWQDn2Rdp9AQdGndf9QvY4= @@ -52,49 +24,37 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1. github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.1.1/go.mod h1:c/wcGeGx5FUPbM/JltUYHZcKmigwyVLJlDq+4HdtXaw= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/Code-Hex/go-generics-cache v1.5.1 h1:6vhZGc5M7Y/YD8cIUcY8kcuQLB4cHR7U+0KMqAA0KcU= github.com/Code-Hex/go-generics-cache v1.5.1/go.mod h1:qxcC9kRVrct9rHeiYpFWSoW1vxyillCVzX13KZG8dl4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/KimMachineGun/automemlimit v0.6.1 h1:ILa9j1onAAMadBsyyUJv5cack8Y1WT26yLj/V+ulKp8= github.com/KimMachineGun/automemlimit v0.6.1/go.mod h1:T7xYht7B8r6AG/AqFcUdc7fzd2bIdBKmepfP2S1svPY= -github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= -github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI= -github.com/VividCortex/gohistogram v1.0.0/go.mod h1:Pf5mBqqDxYaXu3hDrrU+w6nw50o/4+TcAqDqk/vUH7g= -github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c= github.com/alecthomas/kingpin/v2 v2.4.0 h1:f48lwail6p8zpO1bC4TxtqACaGqHYA22qkHjHpqDjYY= github.com/alecthomas/kingpin/v2 v2.4.0/go.mod h1:0gyi0zQnjuFk8xrkNKamJoyUo382HRL7ATRpFZCw6tE= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= -github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 h1:t3eaIm0rUkzbrIewtiFmMK5RXHej2XnoXNhxVsAYUfg= github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= -github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= -github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= -github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.55.5 h1:KKUZBfBoyqy5d3swXyiC7Q76ic40rYcbqH7qjh59kzU= github.com/aws/aws-sdk-go v1.55.5/go.mod h1:eRwEWoyTWFMVYVQzKMNHWP5/RV4xIUGMQfXQHfHkpNU= -github.com/aws/aws-sdk-go-v2 v0.18.0/go.mod h1:JWVYvqSMppoMJC0x5wdwiImzgXTI9FuZwxzkQq9wy+g= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3/go.mod h1:CIWtjkly68+yqLPbvwwR/fjNJA/idrtULjZWh2v1ys0= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= @@ -102,39 +62,26 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= -github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ= -github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= -github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= -github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cilium/ebpf v0.11.0 h1:V8gS/bTCCjX9uUnkUFUpPsksM8n1lXBAvHcpiFk1X2Y= github.com/cilium/ebpf v0.11.0/go.mod h1:WE7CZAnqOL2RouJ4f1uyNhqr2P4CCvXFIqdRDUgWsVs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= -github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b h1:ga8SEFjZ60pxLcmhnThWgvH2wg8376yUJmPhEH4H3kw= -github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= -github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= -github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd/go.mod h1:sE/e/2PUdi/liOCUjSTXgM1o87ZssimdTWN964YiIeI= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 h1:N+3sFI5GUjRKBi+i0TxYVST9h4Ie192jJWpHvthBBgg= +github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/containerd/cgroups/v3 v3.0.3 h1:S5ByHZ/h9PMe5IOQoN7E+nMc2UcLEM/V48DGDJ9kip0= github.com/containerd/cgroups/v3 v3.0.3/go.mod h1:8HBe7V3aWGLFPd/k03swSIsGjZhHI2WzJmticMgVuz0= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= -github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= -github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -142,39 +89,32 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE= github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/digitalocean/godo v1.119.0 h1:dmFNQwSIAcH3z+FVovHLkazKDC2uA8oOlGvg5+H4vRw= -github.com/digitalocean/godo v1.119.0/go.mod h1:WQVH83OHUy6gC4gXpEVQKtxTd4L5oCp+5OialidkPLY= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= +github.com/digitalocean/godo v1.131.0 h1:0WHymufAV5avpodT0h5/pucUVfO4v7biquOIqhLeROY= +github.com/digitalocean/godo v1.131.0/go.mod h1:PU8JB6I1XYkQIdHFop8lLAY9ojp6M0XcU0TWaQSxbrc= github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0= github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/dnaeon/go-vcr v1.2.0 h1:zHCHvJYTMh1N7xnV7zf1m1GPBF9Ad0Jk/whtQ1663qI= github.com/dnaeon/go-vcr v1.2.0/go.mod h1:R4UdLID7HZT3taECzJs4YgbbH6PIGXB6W/sc5OLb6RQ= -github.com/docker/docker v27.1.1+incompatible h1:hO/M4MtV36kzKldqnA37IWhebRA+LnqqcqDja6kVaKY= -github.com/docker/docker v27.1.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI= +github.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= -github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= -github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= -github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= -github.com/edsrzf/mmap-go v1.1.0 h1:6EUwBLQ/Mcr1EYLE4Tn1VdW1A4ckqCQWZBw8Hr0kjpQ= -github.com/edsrzf/mmap-go v1.1.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= +github.com/edsrzf/mmap-go v1.2.0 h1:hXLYlkbaPzt1SaQk+anYwKSRNhufIDCchSPkUD6dD84= +github.com/edsrzf/mmap-go v1.2.0/go.mod h1:19H/e8pUPLicwkyNgOykDXkJ9F0MHE+Z52B8EIth78Q= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= -github.com/envoyproxy/go-control-plane v0.6.9/go.mod h1:SBwIajubJHhxtWwsL9s8ss4safvEdbitLhGGK48rN6g= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= -github.com/envoyproxy/go-control-plane v0.12.0 h1:4X+VP1GHd1Mhj6IB5mMeGbLCleqxjletLK6K0rbxyZI= -github.com/envoyproxy/go-control-plane v0.12.0/go.mod h1:ZBTaoJ23lqITozF0M6G4/IragXCQKCnYbmlmtHvwRG0= +github.com/envoyproxy/go-control-plane v0.13.1 h1:vPfJZCkob6yTMEgS+0TwfTUfbHjfy/6vOJ8hUWX/uXE= +github.com/envoyproxy/go-control-plane v0.13.1/go.mod h1:X45hY0mufo6Fd0KW3rqsGvQMw58jvjymeCzBU3mWyHw= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= -github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= -github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb h1:IT4JYU7k4ikYg1SCxNI1/Tieq/NFvh6dzLdgi7eu0tM= github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb/go.mod h1:bH6Xx7IW64qjjJq8M2u4dxNaBiDfKK+z/3eGDpXEQhc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -184,31 +124,19 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= -github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA= github.com/frankban/quicktest v1.14.5/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= +github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= -github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= -github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o= -github.com/go-kit/kit v0.12.0 h1:e4o3o3IsBfAKQh5Qbbiqyfu97Ku7jrO/JbohvztANh4= -github.com/go-kit/kit v0.12.0/go.mod h1:lHd+EkCZPIwYItmGDDRdhinkzX2A1sj+M9biaEaizzs= -github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= -github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= -github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= -github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= -github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi4= -github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= +github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -232,61 +160,43 @@ github.com/go-openapi/swag v0.22.9 h1:XX2DssF+mQKM2DHsbgZK74y/zj4mo9I99+89xUmuZC github.com/go-openapi/swag v0.22.9/go.mod h1:3/OXnFfnMAwBD099SwYRk7GD3xOrr1iL7d/XNLXVVwE= github.com/go-openapi/validate v0.23.0 h1:2l7PJLzCis4YUGEoW6eoQw3WhyM65WSIcjX6SQnlfDw= github.com/go-openapi/validate v0.23.0/go.mod h1:EeiAZ5bmpSIOJV1WLfyYF9qp/B1ZgSaEpHTJHtN5cbE= -github.com/go-resty/resty/v2 v2.13.1 h1:x+LHXBI2nMB1vqndymf26quycC4aggYJ7DECYbiz03g= -github.com/go-resty/resty/v2 v2.13.1/go.mod h1:GznXlLxkq6Nh4sU59rPmUw3VtgpO3aS96ORAI6Q7d+0= -github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-resty/resty/v2 v2.15.3 h1:bqff+hcqAflpiF591hhJzNdkRsFhlB96CYfBwSFvql8= +github.com/go-resty/resty/v2 v2.15.3/go.mod h1:0fHAoK7JoBy/Ch36N8VFeMsK7xQOHhvWaC3iOktwmIU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= -github.com/go-zookeeper/zk v1.0.3 h1:7M2kwOsc//9VeeFiPtf+uSJlVpU66x9Ba5+8XK7/TDg= -github.com/go-zookeeper/zk v1.0.3/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= +github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= +github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= +github.com/go-zookeeper/zk v1.0.4 h1:DPzxraQx7OrPyXq2phlGlNSIyWEsAox0RJmjTseMV6I= +github.com/go-zookeeper/zk v1.0.4/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw= github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= -github.com/golang/glog v1.2.1 h1:OptwRhECazUx5ix5TTWC3EZhsZEHWcYWY4FQHTIubm4= -github.com/golang/glog v1.2.1/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= -github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY= +github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= -github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= -github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= -github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.1 h1:gK4Kx5IaGY9CD5sPJ36FHiBJ6ZXl0kilRiiCj+jdYp4= github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= @@ -295,13 +205,9 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -310,54 +216,29 @@ github.com/google/go-querystring v1.1.0/go.mod h1:Kcdr2DB4koayq7X8pmAG4sNG59So17 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= -github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= -github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= -github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= -github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da h1:xRmpO92tb8y+Z85iUOMOicpCfaYcv7o3Cg3wKrIpg8g= github.com/google/pprof v0.0.0-20240711041743-f6c9dda6c6da/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= -github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= -github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= -github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= -github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= -github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/googleapis/enterprise-certificate-proxy v0.3.4 h1:XYIDZApgAnrN1c855gTgghdIA6Stxb52D5RnLI1SLyw= +github.com/googleapis/enterprise-certificate-proxy v0.3.4/go.mod h1:YKe7cfqYXjKGpGvmSg28/fFvhNzinZQm8DGnaburhGA= github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= -github.com/gophercloud/gophercloud v1.14.0 h1:Bt9zQDhPrbd4qX7EILGmy+i7GP35cc+AAL2+wIJpUE8= -github.com/gophercloud/gophercloud v1.14.0/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= -github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= -github.com/gorilla/context v1.1.1/go.mod h1:kBGZzfjB9CEq2AlWe17Uuf7NDRt0dE0s8S51q0aT7Yg= -github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= -github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/gophercloud/gophercloud v1.14.1 h1:DTCNaTVGl8/cFu58O1JwWgis9gtISAFONqpMKNg/Vpw= +github.com/gophercloud/gophercloud v1.14.1/go.mod h1:aAVqcocTSXh2vYFZ1JTvx4EQmfgzxRcNupUfxZbBNDM= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248= github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= -github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= -github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= -github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= -github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE= -github.com/hashicorp/consul/api v1.29.4 h1:P6slzxDLBOxUSj3fWo2o65VuKtbtOXFi7TSSgtXutuE= -github.com/hashicorp/consul/api v1.29.4/go.mod h1:HUlfw+l2Zy68ceJavv2zAyArl2fqhGWnMycyt56sBgg= -github.com/hashicorp/consul/proto-public v0.6.2 h1:+DA/3g/IiKlJZb88NBn0ZgXrxJp2NlvCZdEyl+qxvL0= -github.com/hashicorp/consul/proto-public v0.6.2/go.mod h1:cXXbOg74KBNGajC+o8RlA502Esf0R9prcoJgiOX/2Tg= -github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I= +github.com/hashicorp/consul/api v1.30.0 h1:ArHVMMILb1nQv8vZSGIwwQd2gtc+oSQZ6CalyiyH2XQ= +github.com/hashicorp/consul/api v1.30.0/go.mod h1:B2uGchvaXVW2JhFoS8nqTxMD5PBykr4ebY4JWHTTeLM= github.com/hashicorp/consul/sdk v0.16.1 h1:V8TxTnImoPD5cj0U9Spl0TUxcytjcbbJeADFF07KdHg= github.com/hashicorp/consul/sdk v0.16.1/go.mod h1:fSXvwxB2hmh1FMZCNl6PwX0Q/1wdWtHJcZ7Ea5tns0s= github.com/hashicorp/cronexpr v1.1.2 h1:wG/ZYIKT+RT3QkOdgYc+xsKWVRgnxJ1OJtjjy84fJ9A= @@ -366,7 +247,6 @@ github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brv github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= @@ -384,7 +264,6 @@ github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9 github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= -github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= @@ -395,71 +274,51 @@ github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/b github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= -github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= -github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4= github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= -github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= -github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM= github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0= github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3 h1:fgVfQ4AC1avVOnu2cfms8VAiD8lUq3vWI8mTocOXN/w= github.com/hashicorp/nomad/api v0.0.0-20240717122358-3d93bd3778f3/go.mod h1:svtxn6QnrQ69P23VvIWMR34tg3vmwLz4UdUzm1dSCgE= -github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY= github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4= -github.com/hetznercloud/hcloud-go/v2 v2.13.1 h1:jq0GP4QaYE5d8xR/Zw17s9qoaESRJMXfGmtD1a/qckQ= -github.com/hetznercloud/hcloud-go/v2 v2.13.1/go.mod h1:dhix40Br3fDiBhwaSG/zgaYOFFddpfBm/6R1Zz0IiF0= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= -github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= -github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/hetznercloud/hcloud-go/v2 v2.17.0 h1:ge0w2piey9SV6XGyU/wQ6HBR24QyMbJ3wLzezplqR68= +github.com/hetznercloud/hcloud-go/v2 v2.17.0/go.mod h1:zfyZ4Orx+mPpYDzWAxXR7DHGL50nnlZ5Edzgs1o6f/s= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= -github.com/influxdata/influxdb1-client v0.0.0-20191209144304-8bf82d3c094d/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/ionos-cloud/sdk-go/v6 v6.2.1 h1:mxxN+frNVmbFrmmFfXnBC3g2USYJrl6mc1LW2iNYbFY= github.com/ionos-cloud/sdk-go/v6 v6.2.1/go.mod h1:SXrO9OGyWjd2rZhAhEpdYN6VUAODzzqRdqA9BCviQtI= github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww= github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= -github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= -github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= -github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= -github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= -github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= -github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6 h1:IsMZxCuZqKuao2vNdfD82fjjgPLfyHLpR41Z88viRWs= +github.com/keybase/go-keychain v0.0.0-20231219164618-57a3676c3af6/go.mod h1:3VeWNIJaW+O5xpRQbPp0Ybqu1vJd/pm7s2F473HRrkw= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00= github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b/go.mod h1:pcaDhQK0/NJZEvtCO0qQPPropqV0sJOJ6YW7X+9kRwM= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -470,11 +329,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= -github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= -github.com/linode/linodego v1.38.0 h1:wP3oW9OhGc6vhze8NPf2knbwH4TzSbrjzuCd9okjbTY= -github.com/linode/linodego v1.38.0/go.mod h1:L7GXKFD3PoN2xSEtFc04wIXP5WK65O10jYQx0PQISWQ= -github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= +github.com/linode/linodego v1.43.0 h1:sGeBB3caZt7vKBoPS5p4AVzmlG4JoqQOdigIibx3egk= +github.com/linode/linodego v1.43.0/go.mod h1:n4TMFu1UVNala+icHqrTEFFaicYSF74cSAUG5zkTwfA= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= @@ -485,7 +341,6 @@ github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= @@ -493,27 +348,23 @@ github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27k github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g= github.com/maxatome/go-testdeep v1.12.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM= -github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg= +github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U= +github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= +github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= +github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= -github.com/miekg/dns v1.1.61 h1:nLxbwF3XxhwVSm8g9Dghm9MHPaUZuqhPiGL+675ZmEs= -github.com/miekg/dns v1.1.61/go.mod h1:mnAarhS3nWaW+NVP2wTkYVIZyHNJ098SJZUki3eykwQ= -github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= +github.com/miekg/dns v1.1.62 h1:cN8OuEF1/x5Rq6Np+h1epln8OiyPWV+lROx9LxcGgIQ= +github.com/miekg/dns v1.1.62/go.mod h1:mvDlcItzm+br7MToIKqkglaGhlFMHJ9DTNNWONWXbNQ= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= -github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= -github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= -github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= @@ -534,64 +385,37 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8m github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= -github.com/nats-io/jwt v0.3.2/go.mod h1:/euKqTS1ZD+zzjYrY7pseZrTtWQSjujC7xjPc8wL6eU= -github.com/nats-io/nats-server/v2 v2.1.2/go.mod h1:Afk+wRZqkMQs/p45uXdrVLuab3gwv3Z8C4HTBu8GD/k= -github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= -github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= -github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1 h1:dOYG7LS/WK00RWZc8XGgcUTlTxpp3mKhdR2Q9z9HbXM= github.com/nsf/jsondiff v0.0.0-20230430225905-43f6cf3098c1/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= -github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= -github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= github.com/oklog/ulid v1.3.1 h1:EGfNDEx6MqHz8B3uNV6QAib1UR2Lm97sHi3ocA6ESJ4= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= -github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.7.0 h1:WSHQ+IS43OoUrWtD1/bbclrwK8TTH5hzp+umCiuxHgs= -github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo/v2 v2.13.0 h1:0jY9lJquiL8fcf3M4LAXN5aMlS/b2BV86HFFPCPMgE4= -github.com/onsi/ginkgo/v2 v2.13.0/go.mod h1:TE309ZR8s5FsKKpuB1YAQYBzCaAfUgatB/xlT/ETL/o= -github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.29.0 h1:KIA/t2t5UBzoirT4H9tsML45GEbo3ouUnBHsCfD2tVg= -github.com/onsi/gomega v1.29.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= -github.com/op/go-logging v0.0.0-20160315200505-970db520ece7/go.mod h1:HzydrMdWErDVzsI23lYNej1Htcns9BCg93Dk0bBINWk= +github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= +github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= +github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= -github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= -github.com/opentracing/basictracer-go v1.0.0/go.mod h1:QfBfYuafItcjQuMwinw9GhYKwFXS9KnPs5lxoYwgW74= -github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/openzipkin-contrib/zipkin-go-opentracing v0.4.5/go.mod h1:/wsWhb9smxSfWAKL3wpBW7V8scJMt8N8gnaMCS9E/cA= -github.com/openzipkin/zipkin-go v0.1.6/go.mod h1:QgAqvLzwWbR/WpD4A3cGpPtJrZXNIiJc5AZX7/PBEpw= -github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= -github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4= github.com/ovh/go-ovh v1.6.0 h1:ixLOwxQdzYDx296sXcgS35TOPEahJkpjMGtzPadCjQI= github.com/ovh/go-ovh v1.6.0/go.mod h1:cTVDnl94z4tl8pP1uZ/8jlVxntjSIf09bNcQ5TJSC7c= -github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= -github.com/performancecopilot/speed v3.0.0+incompatible/go.mod h1:/CLtqpZ5gBg1M9iaPbIdPPGyKcA8hKdoy6hAWba7Yac= -github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= -github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -602,84 +426,52 @@ github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P github.com/prometheus/alertmanager v0.27.0 h1:V6nTa2J5V4s8TG4C4HtrBP/WNSebCCTYGGv4qecA/+I= github.com/prometheus/alertmanager v0.27.0/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= -github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod h1:p2iRAGwDERtqlqzRXnrOVns+ignqQo//hLXqYxZYVNs= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= -github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= -github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= -github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg= -github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= -github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.2.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= -github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= -github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= -github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.56.0 h1:UffReloqkBtvtQEYDg2s+uDPGRrJyC6vZWPGXf6OhPY= -github.com/prometheus/common v0.56.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= -github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= -github.com/prometheus/common/sigv4 v0.1.0/go.mod h1:2Jkxxk9yYvCkE5G1sQT7GuEXm57JrvHu9k5YwTjsNtI= -github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g= -github.com/prometheus/exporter-toolkit v0.11.0/go.mod h1:BVnENhnNecpwoTLiABx7mrPB/OLRIgN74qlQbV+FK1Q= +github.com/prometheus/exporter-toolkit v0.13.1 h1:Evsh0gWQo2bdOHlnz9+0Nm7/OFfIwhE2Ws4A2jIlR04= +github.com/prometheus/exporter-toolkit v0.13.1/go.mod h1:ujdv2YIOxtdFxxqtloLpbqmxd5J0Le6IITUvIRSWjj0= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= -github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= -github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= -github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= -github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4= -github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/prometheus/sigv4 v0.1.0 h1:FgxH+m1qf9dGQ4w8Dd6VkthmpFQfGTzUeavMoQeG1LA= +github.com/prometheus/sigv4 v0.1.0/go.mod h1:doosPW9dOitMzYe2I2BN0jZqUuBrGPbXrNsTScN18iU= +github.com/redis/go-redis/v9 v9.6.1 h1:HHDteefn6ZkTtY5fGUE8tj8uy85AHk6zP7CpzIAM0y4= +github.com/redis/go-redis/v9 v9.6.1/go.mod h1:0C0c6ycQsdpVNQpxb1njEQIqkx5UcsM8FJCQLgE9+RA= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E= -github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29 h1:BkTk4gynLjguayxrYxZoMZjBnAOh7ntQvUkOFmkMqPU= -github.com/scaleway/scaleway-sdk-go v1.0.0-beta.29/go.mod h1:fCa7OJZ/9DRTnOKmxvT6pn+LPWUptQAmHF/SBJUGEcg= +github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30 h1:yoKAVkEVwAqbGbR8n87rHQ1dulL25rKloGadb3vm770= +github.com/scaleway/scaleway-sdk-go v1.0.0-beta.30/go.mod h1:sH0u6fq6x4R5M7WxkoQFY/o7UaiItec0o1LinLCJNq8= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/shoenig/test v1.7.1 h1:UJcjSAI3aUKx52kfcfhblgyhZceouhvvs3OYdWgn+PY= github.com/shoenig/test v1.7.1/go.mod h1:UxJ6u/x2v/TNs/LoLxBNJRV9DiwBBKYxXSyczsBHFoI= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c h1:aqg5Vm5dwtvL+YgDpBcK1ITf3o96N/K7/wsRXQnUTEs= github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c/go.mod h1:owqhoLW1qZoYLZzLnBw+QkPP9WZnjlSWihhxAJC1+/M= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= -github.com/simonpasquier/klog-gokit v0.3.0 h1:TkFK21cbwDRS+CiystjqbAiq5ubJcVTk9hLUck5Ntcs= -github.com/simonpasquier/klog-gokit v0.3.0/go.mod h1:+SUlDQNrhVtGt2FieaqNftzzk8P72zpWlACateWxA9k= -github.com/simonpasquier/klog-gokit/v3 v3.3.0 h1:HMzH999kO5gEgJTaWWO+xjncW5oycspcsBnjn9b853Q= -github.com/simonpasquier/klog-gokit/v3 v3.3.0/go.mod h1:uSbnWC3T7kt1dQyY9sjv0Ao1SehMAJdVnUNSKhjaDsg= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= -github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= -github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= -github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= -github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= -github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= -github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -695,420 +487,183 @@ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1F github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= -github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= -github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/vultr/govultr/v2 v2.17.2 h1:gej/rwr91Puc/tgh+j33p/BLR16UrIPnSr+AIwYWZQs= github.com/vultr/govultr/v2 v2.17.2/go.mod h1:ZFOKGWmgjytfyjeyAdhQlSWwTjh2ig+X49cAp50dzXI= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= -github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= -github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= -go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.mongodb.org/mongo-driver v1.14.0 h1:P98w8egYRjYe3XDjxhYJagTokP/H6HzlsnojRgZRd80= go.mongodb.org/mongo-driver v1.14.0/go.mod h1:Vzb0Mk/pa7e6cWw85R4F/endUC3u0U9jGcNU603k65c= -go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= -go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= -go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= -go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/collector/pdata v1.12.0 h1:Xx5VK1p4VO0md8MWm2icwC1MnJ7f8EimKItMWw46BmA= -go.opentelemetry.io/collector/pdata v1.12.0/go.mod h1:MYeB0MmMAxeM0hstCFrCqWLzdyeYySim2dG6pDT6nYI= -go.opentelemetry.io/collector/semconv v0.105.0 h1:8p6dZ3JfxFTjbY38d8xlQGB1TQ3nPUvs+D0RERniZ1g= -go.opentelemetry.io/collector/semconv v0.105.0/go.mod h1:yMVUCNoQPZVq/IPfrHrnntZTWsLf5YGZ7qwKulIl5hw= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= -go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= -go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 h1:R3X6ZXmNPRR8ul6i3WgFURCHzaXjHdm0karRG/+dj3s= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0/go.mod h1:QWFXnDavXWwMx2EEcZsf3yxgEKAqsxQ+Syjp+seyInw= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 h1:j9+03ymgYhPKmeXGk5Zu+cIZOlVzd9Zv7QIiyItjFBU= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0/go.mod h1:Y5+XiUG4Emn1hTfciPzGPJaSI+RpDts6BnCIir0SLqk= -go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= -go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= -go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= -go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= -go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= -go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/collector/pdata v1.18.0 h1:/yg2rO2dxqDM2p6GutsMCxXN6sKlXwyIz/ZYyUPONBg= +go.opentelemetry.io/collector/pdata v1.18.0/go.mod h1:Ox1YVLe87cZDB/TL30i4SUz1cA5s6AM6SpFMfY61ICs= +go.opentelemetry.io/collector/semconv v0.112.0 h1:JPQyvZhlNLVSuVI+FScONaiFygB7h7NTZceUEKIQUEc= +go.opentelemetry.io/collector/semconv v0.112.0/go.mod h1:zCJ5njhWpejR+A40kiEoeFm1xq1uzyZwMnRNX6/D82A= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0 h1:4BZHA+B1wXEQoGNHxW8mURaLhcdGwvRnmhGbm+odRbc= +go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.56.0/go.mod h1:3qi2EEwMgB4xnKgPLqsDP3j9qxnHDZeHsnAxfjQqTko= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0 h1:K0XaT3DwHAcV4nKLzcQvwAgSyisUghWoY20I7huthMk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.31.0/go.mod h1:B5Ki776z/MBnVha1Nzwp5arlzBbE3+1jk+pGmaP5HME= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0 h1:FFeLy03iVTXP6ffeN2iXrxfGsZGCjVx0/4KlizjyBwU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.31.0/go.mod h1:TMu73/k1CP8nBUpDLc71Wj/Kf7ZS9FK5b53VapRsP9o= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0 h1:lUsI2TYsQw2r1IASwoROaCnjdj2cvC2+Jbxvk6nHnWU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.31.0/go.mod h1:2HpZxxQurfGxJlJDblybejHB6RX6pmExPNe517hREw4= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= +go.opentelemetry.io/otel/sdk v1.31.0 h1:xLY3abVHYZ5HSfOg3l2E5LUj2Cwva5Y7yGxnSW9H5Gk= +go.opentelemetry.io/otel/sdk v1.31.0/go.mod h1:TfRbMdhvxIIr/B2N2LQW2S5v9m3gOQ/08KsbbO5BPT0= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= -go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= -go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= -go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8= -go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= -go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= -go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= -go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= -go.uber.org/zap v1.13.0/go.mod h1:zwrFLgMcdUuIBviXEYEH1YKNaOBnKXsx2IPda5bBwHM= golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= -golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= -golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30= -golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M= +golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= +golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= -golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= -golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= -golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= -golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= -golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= -golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= -golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= -golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= -golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= -golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= -golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= -golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= -golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8= -golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181201002055-351d144fa1fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190125091013-d26f9f9a57f3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190923162816-aa69164e4478/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210410081132-afb366fc7cd1/go.mod h1:9tjilg8BloeKEkVJvy7fQ90B1CfIiPueXVOjqfkSzI8= -golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= -golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= -golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= -golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys= -golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE= +golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= +golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= -golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= -golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= +golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= -golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= +golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= -golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= -golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= -golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= +golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4= -golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI= -golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= -golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= -golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= +golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= -golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= -golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= -golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= -golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg= -golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI= +golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= +golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= -google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= -google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= -google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= -google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= -google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= -google.golang.org/api v0.190.0 h1:ASM+IhLY1zljNdLu19W1jTmU6A+gMk6M46Wlur61s+Q= -google.golang.org/api v0.190.0/go.mod h1:QIr6I9iedBLnfqoD6L6Vze1UvS5Hzj5r2aUBOaZnLHo= +google.golang.org/api v0.204.0 h1:3PjmQQEDkR/ENVZZwIYB4W/KzYtN8OrqnNcHWpeR8E4= +google.golang.org/api v0.204.0/go.mod h1:69y8QSoKIbL9F94bWgWAq6wGqGwyjBgi2y8rAK8zLag= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= -google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= -google.golang.org/genproto v0.0.0-20190530194941-fb225487d101/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s= -google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= -google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= -google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= -google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= -google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= -google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= -google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto/googleapis/api v0.0.0-20240725223205-93522f1f2a9f h1:b1Ln/PG8orm0SsBbHZWke8dDp2lrCD4jSmfglFpTZbk= -google.golang.org/genproto/googleapis/api v0.0.0-20240725223205-93522f1f2a9f/go.mod h1:AHT0dDg3SoMOgZGnZk29b5xTbPHMoEC8qthmBLJCpys= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf h1:liao9UHurZLtiEwBgT9LMOnKYsHze6eA6w1KQCMVN2Q= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= -google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= +google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53 h1:fVoAXEKA4+yufmbdVYv+SE73+cPZbbbe8paLsHfkK+U= +google.golang.org/genproto/googleapis/api v0.0.0-20241015192408-796eee8c2d53/go.mod h1:riSXTwQ4+nqmPGtobMFyW5FqVAmIs0St6VPp4Ug7CE4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= -google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM= -google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= -google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= -google.golang.org/grpc v1.22.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= -google.golang.org/grpc v1.23.1/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= -google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= -google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= -google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= -google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -1117,36 +672,26 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= -google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/gcfg.v1 v1.2.3/go.mod h1:yesOnuUOFQAhST5vPY4nbZsb/huCgGGXlipJsBn0b3o= +gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= +gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= -gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= @@ -1155,32 +700,25 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= -honnef.co/go/tools v0.0.0-20180728063816-88497007e858/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= -k8s.io/api v0.29.3 h1:2ORfZ7+bGC3YJqGpV0KSDDEVf8hdGQ6A03/50vj8pmw= -k8s.io/api v0.29.3/go.mod h1:y2yg2NTyHUUkIoTC+phinTnEa3KFM6RZ3szxt014a80= -k8s.io/apimachinery v0.29.3 h1:2tbx+5L7RNvqJjn7RIuIKu9XTsIZ9Z5wX2G22XAa5EU= -k8s.io/apimachinery v0.29.3/go.mod h1:hx/S4V2PNW4OMg3WizRrHutyB5la0iCUbZym+W0EQIU= -k8s.io/client-go v0.29.3 h1:R/zaZbEAxqComZ9FHeQwOh3Y1ZUs7FaHKZdQtIc2WZg= -k8s.io/client-go v0.29.3/go.mod h1:tkDisCvgPfiRpxGnOORfkljmS+UrW+WtXAy2fTvXJB0= +k8s.io/api v0.31.3 h1:umzm5o8lFbdN/hIXbrK9oRpOproJO62CV1zqxXrLgk8= +k8s.io/api v0.31.3/go.mod h1:UJrkIp9pnMOI9K2nlL6vwpxRzzEX5sWgn8kGQe92kCE= +k8s.io/apimachinery v0.31.3 h1:6l0WhcYgasZ/wk9ktLq5vLaoXJJr5ts6lkaQzgeYPq4= +k8s.io/apimachinery v0.31.3/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.3 h1:CAlZuM+PH2cm+86LOBemaJI/lQ5linJ6UFxKX/SoG+4= +k8s.io/client-go v0.31.3/go.mod h1:2CgjPUTpv3fE5dNygAr2NcM8nhHzXvxB8KL5gYc3kJs= +k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8= +k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I= +k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= +k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= -k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= -rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= -rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= +k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= -sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= -sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= -sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= -sourcegraph.com/sourcegraph/appdash v0.0.0-20190731080439-ebfcffb1b5c0/go.mod h1:hI742Nqp5OhwiqlzhgfbWU4mW4yO10fP+LoT9WOswdU= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/model/histogram/float_histogram.go b/model/histogram/float_histogram.go index 2a37ea66d4..e5519a56d6 100644 --- a/model/histogram/float_histogram.go +++ b/model/histogram/float_histogram.go @@ -14,6 +14,7 @@ package histogram import ( + "errors" "fmt" "math" "strings" @@ -230,6 +231,17 @@ func (h *FloatHistogram) TestExpression() string { res = append(res, fmt.Sprintf("custom_values:%g", m.CustomValues)) } + switch m.CounterResetHint { + case UnknownCounterReset: + // Unknown is the default, don't add anything. + case CounterReset: + res = append(res, "counter_reset_hint:reset") + case NotCounterReset: + res = append(res, "counter_reset_hint:not_reset") + case GaugeType: + res = append(res, "counter_reset_hint:gauge") + } + addBuckets := func(kind, bucketsKey, offsetKey string, buckets []float64, spans []Span) []string { if len(spans) > 1 { panic(fmt.Sprintf("histogram with multiple %s spans not supported", kind)) @@ -293,6 +305,14 @@ func (h *FloatHistogram) Div(scalar float64) *FloatHistogram { h.ZeroCount /= scalar h.Count /= scalar h.Sum /= scalar + // Division by zero removes all buckets. + if scalar == 0 { + h.PositiveBuckets = nil + h.NegativeBuckets = nil + h.PositiveSpans = nil + h.NegativeSpans = nil + return h + } for i := range h.PositiveBuckets { h.PositiveBuckets[i] /= scalar } @@ -342,7 +362,7 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (*FloatHistogram, error) { default: // All other cases shouldn't actually happen. // They are a direct collision of CounterReset and NotCounterReset. - // Conservatively set the CounterResetHint to "unknown" and isse a warning. + // Conservatively set the CounterResetHint to "unknown" and issue a warning. h.CounterResetHint = UnknownCounterReset // TODO(trevorwhitney): Actually issue the warning as soon as the plumbing for it is in place } @@ -658,7 +678,7 @@ func detectReset(currIt, prevIt *floatBucketIterator) bool { if !currIt.Next() { // Reached end of currIt early, therefore // previous histogram has a bucket that the - // current one does not have. Unlass all + // current one does not have. Unless all // remaining buckets in the previous histogram // are unpopulated, this is a reset. for { @@ -765,16 +785,16 @@ func (h *FloatHistogram) Validate() error { return fmt.Errorf("custom buckets: %w", err) } if h.ZeroCount != 0 { - return fmt.Errorf("custom buckets: must have zero count of 0") + return errors.New("custom buckets: must have zero count of 0") } if h.ZeroThreshold != 0 { - return fmt.Errorf("custom buckets: must have zero threshold of 0") + return errors.New("custom buckets: must have zero threshold of 0") } if len(h.NegativeSpans) > 0 { - return fmt.Errorf("custom buckets: must not have negative spans") + return errors.New("custom buckets: must not have negative spans") } if len(h.NegativeBuckets) > 0 { - return fmt.Errorf("custom buckets: must not have negative buckets") + return errors.New("custom buckets: must not have negative buckets") } } else { if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { @@ -788,7 +808,7 @@ func (h *FloatHistogram) Validate() error { return fmt.Errorf("negative side: %w", err) } if h.CustomValues != nil { - return fmt.Errorf("histogram with exponential schema must not have custom bounds") + return errors.New("histogram with exponential schema must not have custom bounds") } } err := checkHistogramBuckets(h.PositiveBuckets, &pCount, false) @@ -891,7 +911,7 @@ func (h *FloatHistogram) trimBucketsInZeroBucket() { // reconcileZeroBuckets finds a zero bucket large enough to include the zero // buckets of both histograms (the receiving histogram and the other histogram) // with a zero threshold that is not within a populated bucket in either -// histogram. This method modifies the receiving histogram accourdingly, but +// histogram. This method modifies the receiving histogram accordingly, but // leaves the other histogram as is. Instead, it returns the zero count the // other histogram would have if it were modified. func (h *FloatHistogram) reconcileZeroBuckets(other *FloatHistogram) float64 { @@ -929,10 +949,10 @@ func (h *FloatHistogram) floatBucketIterator( positive bool, absoluteStartValue float64, targetSchema int32, ) floatBucketIterator { if h.UsesCustomBuckets() && targetSchema != h.Schema { - panic(fmt.Errorf("cannot merge from custom buckets schema to exponential schema")) + panic(errors.New("cannot merge from custom buckets schema to exponential schema")) } if !h.UsesCustomBuckets() && IsCustomBucketsSchema(targetSchema) { - panic(fmt.Errorf("cannot merge from exponential buckets schema to custom schema")) + panic(errors.New("cannot merge from exponential buckets schema to custom schema")) } if targetSchema > h.Schema { panic(fmt.Errorf("cannot merge from schema %d to %d", h.Schema, targetSchema)) diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index 1558a6d679..34988e9d39 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -131,6 +131,54 @@ func TestFloatHistogramMul(t *testing.T) { NegativeBuckets: []float64{9, 3, 15, 18}, }, }, + { + "negation", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -1, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -11, + Count: -30, + Sum: -23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-1, 0, -3, -4, -7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-3, -1, -5, -6}, + }, + }, + { + "negative multiplier", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -2, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -22, + Count: -60, + Sum: -46, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-2, 0, -6, -8, -14}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-6, -2, -10, -12}, + }, + }, { "no-op with custom buckets", &FloatHistogram{ @@ -351,14 +399,10 @@ func TestFloatHistogramDiv(t *testing.T) { }, 0, &FloatHistogram{ - ZeroThreshold: 0.01, - ZeroCount: math.Inf(1), - Count: math.Inf(1), - Sum: math.Inf(1), - PositiveSpans: []Span{{-2, 1}, {2, 3}}, - PositiveBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, - NegativeSpans: []Span{{3, 2}, {3, 2}}, - NegativeBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1), math.Inf(1)}, + ZeroThreshold: 0.01, + Count: math.Inf(1), + Sum: math.Inf(1), + ZeroCount: math.Inf(1), }, }, { @@ -409,6 +453,54 @@ func TestFloatHistogramDiv(t *testing.T) { NegativeBuckets: []float64{1.5, 0.5, 2.5, 3}, }, }, + { + "negation", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 5.5, + Count: 3493.3, + Sum: 2349209.324, + PositiveSpans: []Span{{-2, 1}, {2, 3}}, + PositiveBuckets: []float64{1, 3.3, 4.2, 0.1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3.1, 3, 1.234e5, 1000}, + }, + -1, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -5.5, + Count: -3493.3, + Sum: -2349209.324, + PositiveSpans: []Span{{-2, 1}, {2, 3}}, + PositiveBuckets: []float64{-1, -3.3, -4.2, -0.1}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-3.1, -3, -1.234e5, -1000}, + }, + }, + { + "negative half", + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: 11, + Count: 30, + Sum: 23, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{1, 0, 3, 4, 7}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{3, 1, 5, 6}, + }, + -2, + &FloatHistogram{ + ZeroThreshold: 0.01, + ZeroCount: -5.5, + Count: -15, + Sum: -11.5, + PositiveSpans: []Span{{-2, 2}, {1, 3}}, + PositiveBuckets: []float64{-0.5, 0, -1.5, -2, -3.5}, + NegativeSpans: []Span{{3, 2}, {3, 2}}, + NegativeBuckets: []float64{-1.5, -0.5, -2.5, -3}, + }, + }, { "no-op with custom buckets", &FloatHistogram{ diff --git a/model/histogram/histogram.go b/model/histogram/histogram.go index e4b99ec420..778aefe282 100644 --- a/model/histogram/histogram.go +++ b/model/histogram/histogram.go @@ -14,6 +14,7 @@ package histogram import ( + "errors" "fmt" "math" "slices" @@ -432,16 +433,16 @@ func (h *Histogram) Validate() error { return fmt.Errorf("custom buckets: %w", err) } if h.ZeroCount != 0 { - return fmt.Errorf("custom buckets: must have zero count of 0") + return errors.New("custom buckets: must have zero count of 0") } if h.ZeroThreshold != 0 { - return fmt.Errorf("custom buckets: must have zero threshold of 0") + return errors.New("custom buckets: must have zero threshold of 0") } if len(h.NegativeSpans) > 0 { - return fmt.Errorf("custom buckets: must not have negative spans") + return errors.New("custom buckets: must not have negative spans") } if len(h.NegativeBuckets) > 0 { - return fmt.Errorf("custom buckets: must not have negative buckets") + return errors.New("custom buckets: must not have negative buckets") } } else { if err := checkHistogramSpans(h.PositiveSpans, len(h.PositiveBuckets)); err != nil { @@ -455,7 +456,7 @@ func (h *Histogram) Validate() error { return fmt.Errorf("negative side: %w", err) } if h.CustomValues != nil { - return fmt.Errorf("histogram with exponential schema must not have custom bounds") + return errors.New("histogram with exponential schema must not have custom bounds") } } err := checkHistogramBuckets(h.PositiveBuckets, &pCount, true) diff --git a/model/labels/labels_common.go b/model/labels/labels_common.go index d7bdc1e076..99529a3836 100644 --- a/model/labels/labels_common.go +++ b/model/labels/labels_common.go @@ -230,5 +230,5 @@ func contains(s []Label, n string) bool { } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } diff --git a/model/labels/labels_stringlabels.go b/model/labels/labels_stringlabels.go index c8bce51234..c64bb990e0 100644 --- a/model/labels/labels_stringlabels.go +++ b/model/labels/labels_stringlabels.go @@ -16,7 +16,6 @@ package labels import ( - "reflect" "slices" "strings" "unsafe" @@ -299,10 +298,8 @@ func Equal(ls, o Labels) bool { func EmptyLabels() Labels { return Labels{} } -func yoloBytes(s string) (b []byte) { - *(*string)(unsafe.Pointer(&b)) = s - (*reflect.SliceHeader)(unsafe.Pointer(&b)).Cap = len(s) - return +func yoloBytes(s string) []byte { + return unsafe.Slice(unsafe.StringData(s), len(s)) } // New returns a sorted Labels from the given labels. @@ -338,8 +335,8 @@ func Compare(a, b Labels) int { } i := 0 // First, go 8 bytes at a time. Data strings are expected to be 8-byte aligned. - sp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&shorter)).Data) - lp := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&longer)).Data) + sp := unsafe.Pointer(unsafe.StringData(shorter)) + lp := unsafe.Pointer(unsafe.StringData(longer)) for ; i < len(shorter)-8; i += 8 { if *(*uint64)(unsafe.Add(sp, i)) != *(*uint64)(unsafe.Add(lp, i)) { break diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index 9208908311..3a4040776c 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -954,7 +954,7 @@ func TestMarshaling(t *testing.T) { expectedJSON := "{\"aaa\":\"111\",\"bbb\":\"2222\",\"ccc\":\"33333\"}" b, err := json.Marshal(lbls) require.NoError(t, err) - require.Equal(t, expectedJSON, string(b)) + require.JSONEq(t, expectedJSON, string(b)) var gotJ Labels err = json.Unmarshal(b, &gotJ) @@ -964,7 +964,7 @@ func TestMarshaling(t *testing.T) { expectedYAML := "aaa: \"111\"\nbbb: \"2222\"\nccc: \"33333\"\n" b, err = yaml.Marshal(lbls) require.NoError(t, err) - require.Equal(t, expectedYAML, string(b)) + require.YAMLEq(t, expectedYAML, string(b)) var gotY Labels err = yaml.Unmarshal(b, &gotY) @@ -980,7 +980,7 @@ func TestMarshaling(t *testing.T) { b, err = json.Marshal(f) require.NoError(t, err) expectedJSONFromStruct := "{\"a_labels\":" + expectedJSON + "}" - require.Equal(t, expectedJSONFromStruct, string(b)) + require.JSONEq(t, expectedJSONFromStruct, string(b)) var gotFJ foo err = json.Unmarshal(b, &gotFJ) @@ -990,7 +990,7 @@ func TestMarshaling(t *testing.T) { b, err = yaml.Marshal(f) require.NoError(t, err) expectedYAMLFromStruct := "a_labels:\n aaa: \"111\"\n bbb: \"2222\"\n ccc: \"33333\"\n" - require.Equal(t, expectedYAMLFromStruct, string(b)) + require.YAMLEq(t, expectedYAMLFromStruct, string(b)) var gotFY foo err = yaml.Unmarshal(b, &gotFY) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index d2151d83dd..bfd9034059 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -63,13 +63,13 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { // available, even if the string matcher is faster. m.matchString = m.stringMatcher.Matches } else { - parsed, err := syntax.Parse(v, syntax.Perl) + parsed, err := syntax.Parse(v, syntax.Perl|syntax.DotNL) if err != nil { return nil, err } // Simplify the syntax tree to run faster. parsed = parsed.Simplify() - m.re, err = regexp.Compile("^(?:" + parsed.String() + ")$") + m.re, err = regexp.Compile("^(?s:" + parsed.String() + ")$") if err != nil { return nil, err } @@ -802,7 +802,7 @@ type equalMultiStringMapMatcher struct { func (m *equalMultiStringMapMatcher) add(s string) { if !m.caseSensitive { - s = toNormalisedLower(s) + s = toNormalisedLower(s, nil) // Don't pass a stack buffer here - it will always escape to heap. } m.values[s] = struct{}{} @@ -840,15 +840,24 @@ func (m *equalMultiStringMapMatcher) setMatches() []string { } func (m *equalMultiStringMapMatcher) Matches(s string) bool { - if !m.caseSensitive { - s = toNormalisedLower(s) + if len(m.values) > 0 { + sNorm := s + var a [32]byte + if !m.caseSensitive { + sNorm = toNormalisedLower(s, a[:]) + } + if _, ok := m.values[sNorm]; ok { + return true + } } - if _, ok := m.values[s]; ok { - return true - } if m.minPrefixLen > 0 && len(s) >= m.minPrefixLen { - for _, matcher := range m.prefixes[s[:m.minPrefixLen]] { + prefix := s[:m.minPrefixLen] + var a [32]byte + if !m.caseSensitive { + prefix = toNormalisedLower(s[:m.minPrefixLen], a[:]) + } + for _, matcher := range m.prefixes[prefix] { if matcher.Matches(s) { return true } @@ -859,22 +868,37 @@ func (m *equalMultiStringMapMatcher) Matches(s string) bool { // toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert // it to lower case. -func toNormalisedLower(s string) string { - var buf []byte +func toNormalisedLower(s string, a []byte) string { for i := 0; i < len(s); i++ { c := s[i] if c >= utf8.RuneSelf { return strings.Map(unicode.ToLower, norm.NFKD.String(s)) } if 'A' <= c && c <= 'Z' { - if buf == nil { - buf = []byte(s) - } - buf[i] = c + 'a' - 'A' + return toNormalisedLowerSlow(s, i, a) } } - if buf == nil { - return s + return s +} + +// toNormalisedLowerSlow is split from toNormalisedLower because having a call +// to `copy` slows it down even when it is not called. +func toNormalisedLowerSlow(s string, i int, a []byte) string { + var buf []byte + if cap(a) > len(s) { + buf = a[:len(s)] + copy(buf, s) + } else { + buf = []byte(s) + } + for ; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf { + return strings.Map(unicode.ToLower, norm.NFKD.String(s)) + } + if 'A' <= c && c <= 'Z' { + buf[i] = c + 'a' - 'A' + } } return yoloString(buf) } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 24875e64ef..e9745502ff 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -121,7 +121,7 @@ func TestFastRegexMatcher_MatchString(t *testing.T) { t.Parallel() m, err := NewFastRegexMatcher(r) require.NoError(t, err) - re := regexp.MustCompile("^(?:" + r + ")$") + re := regexp.MustCompile("^(?s:" + r + ")$") require.Equal(t, re.MatchString(v), m.MatchString(v)) }) } @@ -167,7 +167,7 @@ func TestOptimizeConcatRegex(t *testing.T) { } for _, c := range cases { - parsed, err := syntax.Parse(c.regex, syntax.Perl) + parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL) require.NoError(t, err) prefix, suffix, contains := optimizeConcatRegex(parsed) @@ -248,7 +248,7 @@ func TestFindSetMatches(t *testing.T) { c := c t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matches, actualCaseSensitive := findSetMatches(parsed) require.Equal(t, c.expMatches, matches) @@ -333,7 +333,8 @@ func BenchmarkToNormalizedLower(b *testing.B) { } b.ResetTimer() for n := 0; n < b.N; n++ { - toNormalisedLower(inputs[n%len(inputs)]) + var a [256]byte + toNormalisedLower(inputs[n%len(inputs)], a[:]) } }) } @@ -348,15 +349,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { pattern string exp StringMatcher }{ - {".*", anyStringWithoutNewlineMatcher{}}, - {".*?", anyStringWithoutNewlineMatcher{}}, + {".*", trueMatcher{}}, + {".*?", trueMatcher{}}, {"(?s:.*)", trueMatcher{}}, - {"(.*)", anyStringWithoutNewlineMatcher{}}, - {"^.*$", anyStringWithoutNewlineMatcher{}}, - {".+", &anyNonEmptyStringMatcher{matchNL: false}}, + {"(.*)", trueMatcher{}}, + {"^.*$", trueMatcher{}}, + {".+", &anyNonEmptyStringMatcher{matchNL: true}}, {"(?s:.+)", &anyNonEmptyStringMatcher{matchNL: true}}, - {"^.+$", &anyNonEmptyStringMatcher{matchNL: false}}, - {"(.+)", &anyNonEmptyStringMatcher{matchNL: false}}, + {"^.+$", &anyNonEmptyStringMatcher{matchNL: true}}, + {"(.+)", &anyNonEmptyStringMatcher{matchNL: true}}, {"", emptyStringMatcher{}}, {"^$", emptyStringMatcher{}}, {"^foo$", &equalStringMatcher{s: "foo", caseSensitive: true}}, @@ -366,23 +367,23 @@ func TestStringMatcherFromRegexp(t *testing.T) { {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, - {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: anyStringWithoutNewlineMatcher{}}}, - {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: anyStringWithoutNewlineMatcher{}}}, - {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}}, + {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}}, + {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, {"10\\.0\\.(1|2)\\.+", nil}, - {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{}, suffix: "foo", suffixCaseSensitive: true}}, - {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: anyStringWithoutNewlineMatcher{}}}, - {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: false}}}, - {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: anyStringWithoutNewlineMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: false}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, - {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: false}}})}, - {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: false}, right: nil}}, + {"10\\.0\\.(1|2).+", &containsStringMatcher{substrings: []string{"10.0.1", "10.0.2"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^.+foo", &literalSuffixStringMatcher{left: &anyNonEmptyStringMatcher{matchNL: true}, suffix: "foo", suffixCaseSensitive: true}}, + {"foo-.*$", &literalPrefixSensitiveStringMatcher{prefix: "foo-", right: trueMatcher{}}}, + {"(prometheus|api_prom)_api_v1_.+", &containsStringMatcher{substrings: []string{"prometheus_api_v1_", "api_prom_api_v1_"}, left: nil, right: &anyNonEmptyStringMatcher{matchNL: true}}}, + {"^((.*)(bar|b|buzz)(.+)|foo)$", orStringMatcher([]StringMatcher{&containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}, &equalStringMatcher{s: "foo", caseSensitive: true}})}, + {"((fo(bar))|.+foo)", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "fobar", caseSensitive: true}}), &literalSuffixStringMatcher{suffix: "foo", suffixCaseSensitive: true, left: &anyNonEmptyStringMatcher{matchNL: true}}})}, + {"(.+)/(gateway|cortex-gw|cortex-gw-internal)", &containsStringMatcher{substrings: []string{"/gateway", "/cortex-gw", "/cortex-gw-internal"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: nil}}, // we don't support case insensitive matching for contains. // This is because there's no strings.IndexOfFold function. // We can revisit later if this is really popular by using strings.ToUpper. @@ -393,15 +394,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {".*foo.*bar.*", nil}, {`\d*`, nil}, {".", nil}, - {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: anyStringWithoutNewlineMatcher{}}}}}, + {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}}, // This one is not supported because `stringMatcherFromRegexp` is not reentrant for syntax.OpConcat. // It would make the code too complex to handle it. {"(.+)/(foo.*|bar$)", nil}, // Case sensitive alternate with same literal prefix and .* suffix. - {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixSensitiveStringMatcher{prefix: "xyz-016a-ixb-", right: orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "dp", right: trueMatcher{}}, &literalPrefixSensitiveStringMatcher{prefix: "op", right: trueMatcher{}}}}}, // Case insensitive alternate with same literal prefix and .* suffix. - {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, - {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: anyStringWithoutNewlineMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: anyStringWithoutNewlineMatcher{}}}}}, + {"(?i:(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*))", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}}, + {"(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", &literalPrefixInsensitiveStringMatcher{prefix: "XYZ-016A-IXB-", right: orStringMatcher{&literalPrefixInsensitiveStringMatcher{prefix: "DP", right: trueMatcher{}}, &literalPrefixInsensitiveStringMatcher{prefix: "OP", right: trueMatcher{}}}}}, // Concatenated variable length selectors are not supported. {"foo.*.*", nil}, {"foo.+.+", nil}, @@ -410,15 +411,15 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"aaa.?.?", nil}, {"aaa.?.*", nil}, // Regexps with ".?". - {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, + {"ext.?|xfs", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, {"(?s)(ext.?|xfs)", orStringMatcher{&literalPrefixSensitiveStringMatcher{prefix: "ext", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}, &equalStringMatcher{s: "xfs", caseSensitive: true}}}, - {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: false}}}, + {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}}, {"f.?o", nil}, } { c := c t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matches := stringMatcherFromRegexp(parsed) require.Equal(t, c.exp, matches) @@ -437,16 +438,16 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { { pattern: "(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", expectedLiteralPrefixMatchers: 3, - expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX"}, - expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "xyz-016a-ixb-dp\n"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "xyz-016a-ixb-dpXXX", "xyz-016a-ixb-op", "xyz-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"}, + expectedNotMatches: []string{"XYZ-016a-ixb-dp", "xyz-016a-ixb-d", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Case insensitive. { pattern: "(?i)(xyz-016a-ixb-dp.*|xyz-016a-ixb-op.*)", expectedLiteralPrefixMatchers: 3, - expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX"}, - expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp", "xyz-016a-ixb-dp\n"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dpXXX", "xyz-016a-ixb-op", "XYZ-016a-ixb-opXXX", "xyz-016a-ixb-dp\n"}, + expectedNotMatches: []string{"xyz-016a-ixb-d", "xyz", "dp"}, }, // Nested literal prefixes, case sensitive. @@ -474,13 +475,13 @@ func TestStringMatcherFromRegexp_LiteralPrefix(t *testing.T) { }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher. numPrefixMatchers := 0 @@ -523,16 +524,16 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) { { pattern: "(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", expectedLiteralSuffixMatchers: 2, - expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op"}, - expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "xyz-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"}, + expectedNotMatches: []string{"XYZ-016a-ixb-dp", "yz-016a-ixb-dp", "XYZ-016a-ixb-op", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Case insensitive. { pattern: "(?i)(.*xyz-016a-ixb-dp|.*xyz-016a-ixb-op)", expectedLiteralSuffixMatchers: 2, - expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op"}, - expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp", "\nxyz-016a-ixb-dp"}, + expectedMatches: []string{"xyz-016a-ixb-dp", "XYZ-016a-ixb-dp", "XXXxyz-016a-ixb-dp", "XyZ-016a-ixb-op", "XXXxyz-016a-ixb-op", "\nxyz-016a-ixb-dp"}, + expectedNotMatches: []string{"yz-016a-ixb-dp", "xyz-016a-ixb-o", "xyz", "dp"}, }, // Nested literal suffixes, case sensitive. @@ -552,13 +553,13 @@ func TestStringMatcherFromRegexp_LiteralSuffix(t *testing.T) { }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains literalSuffixStringMatcher. numSuffixMatchers := 0 @@ -598,26 +599,26 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) { { pattern: "test.?", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"test", "test!"}, - expectedNotMatches: []string{"test\n", "tes", "test!!"}, + expectedMatches: []string{"test\n", "test", "test!"}, + expectedNotMatches: []string{"tes", "test!!"}, }, { pattern: ".?test", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"test", "!test"}, - expectedNotMatches: []string{"\ntest", "tes", "test!"}, + expectedMatches: []string{"\ntest", "test", "!test"}, + expectedNotMatches: []string{"tes", "test!"}, }, { pattern: "(aaa.?|bbb.?)", expectedZeroOrOneMatchers: 2, - expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX"}, - expectedNotMatches: []string{"aa", "aaaXX", "aaa\n", "bb", "bbbXX", "bbb\n"}, + expectedMatches: []string{"aaa", "aaaX", "bbb", "bbbX", "aaa\n", "bbb\n"}, + expectedNotMatches: []string{"aa", "aaaXX", "bb", "bbbXX"}, }, { pattern: ".*aaa.?", expectedZeroOrOneMatchers: 1, - expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX"}, - expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX", "XXXaaa\n"}, + expectedMatches: []string{"aaa", "Xaaa", "aaaX", "XXXaaa", "XXXaaaX", "XXXaaa\n"}, + expectedNotMatches: []string{"aa", "aaaXX", "XXXaaaXXX"}, }, // Match newline. @@ -632,18 +633,18 @@ func TestStringMatcherFromRegexp_Quest(t *testing.T) { { pattern: "(aaa.?|((?s).?bbb.+))", expectedZeroOrOneMatchers: 2, - expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX"}, - expectedNotMatches: []string{"aa", "aaa\n", "Xbbb", "\nbbb"}, + expectedMatches: []string{"aaa", "aaaX", "bbbX", "XbbbX", "bbbXXX", "\nbbbX", "aaa\n"}, + expectedNotMatches: []string{"aa", "Xbbb", "\nbbb"}, }, } { t.Run(c.pattern, func(t *testing.T) { - parsed, err := syntax.Parse(c.pattern, syntax.Perl) + parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) require.NoError(t, err) matcher := stringMatcherFromRegexp(parsed) require.NotNil(t, matcher) - re := regexp.MustCompile("^" + c.pattern + "$") + re := regexp.MustCompile("^(?s:" + c.pattern + ")$") // Pre-condition check: ensure it contains zeroOrOneCharacterStringMatcher. numZeroOrOneMatchers := 0 @@ -1112,7 +1113,7 @@ func BenchmarkOptimizeEqualOrPrefixStringMatchers(b *testing.B) { } b.Logf("regexp: %s", re) - parsed, err := syntax.Parse(re, syntax.Perl) + parsed, err := syntax.Parse(re, syntax.Perl|syntax.DotNL) require.NoError(b, err) unoptimized := stringMatcherFromRegexpInternal(parsed) @@ -1390,6 +1391,6 @@ func TestToNormalisedLower(t *testing.T) { "ſſAſſa": "ssassa", } for input, expectedOutput := range testCases { - require.Equal(t, expectedOutput, toNormalisedLower(input)) + require.Equal(t, expectedOutput, toNormalisedLower(input, nil)) } } diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index a880465969..2bec6cfabb 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -16,6 +16,8 @@ package relabel import ( "crypto/md5" "encoding/binary" + "encoding/json" + "errors" "fmt" "strconv" "strings" @@ -83,20 +85,20 @@ func (a *Action) UnmarshalYAML(unmarshal func(interface{}) error) error { type Config struct { // A list of labels from which values are taken and concatenated // with the configured separator in order. - SourceLabels model.LabelNames `yaml:"source_labels,flow,omitempty"` + SourceLabels model.LabelNames `yaml:"source_labels,flow,omitempty" json:"sourceLabels,omitempty"` // Separator is the string between concatenated values from the source labels. - Separator string `yaml:"separator,omitempty"` + Separator string `yaml:"separator,omitempty" json:"separator,omitempty"` // Regex against which the concatenation is matched. - Regex Regexp `yaml:"regex,omitempty"` + Regex Regexp `yaml:"regex,omitempty" json:"regex,omitempty"` // Modulus to take of the hash of concatenated values from the source labels. - Modulus uint64 `yaml:"modulus,omitempty"` + Modulus uint64 `yaml:"modulus,omitempty" json:"modulus,omitempty"` // TargetLabel is the label to which the resulting string is written in a replacement. // Regexp interpolation is allowed for the replace action. - TargetLabel string `yaml:"target_label,omitempty"` + TargetLabel string `yaml:"target_label,omitempty" json:"targetLabel,omitempty"` // Replacement is the regex replacement pattern to be used. - Replacement string `yaml:"replacement,omitempty"` + Replacement string `yaml:"replacement,omitempty" json:"replacement,omitempty"` // Action is the action to be performed for the relabeling. - Action Action `yaml:"action,omitempty"` + Action Action `yaml:"action,omitempty" json:"action,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -114,10 +116,10 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { func (c *Config) Validate() error { if c.Action == "" { - return fmt.Errorf("relabel action cannot be empty") + return errors.New("relabel action cannot be empty") } if c.Modulus == 0 && c.Action == HashMod { - return fmt.Errorf("relabel configuration for hashmod requires non-zero modulus") + return errors.New("relabel configuration for hashmod requires non-zero modulus") } if (c.Action == Replace || c.Action == HashMod || c.Action == Lowercase || c.Action == Uppercase || c.Action == KeepEqual || c.Action == DropEqual) && c.TargetLabel == "" { return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) @@ -171,7 +173,7 @@ type Regexp struct { // NewRegexp creates a new anchored Regexp and returns an error if the // passed-in regular expression does not compile. func NewRegexp(s string) (Regexp, error) { - regex, err := regexp.Compile("^(?:" + s + ")$") + regex, err := regexp.Compile("^(?s:" + s + ")$") return Regexp{Regexp: regex}, err } @@ -206,6 +208,25 @@ func (re Regexp) MarshalYAML() (interface{}, error) { return nil, nil } +// UnmarshalJSON implements the json.Unmarshaler interface. +func (re *Regexp) UnmarshalJSON(b []byte) error { + var s string + if err := json.Unmarshal(b, &s); err != nil { + return err + } + r, err := NewRegexp(s) + if err != nil { + return err + } + *re = r + return nil +} + +// MarshalJSON implements the json.Marshaler interface. +func (re Regexp) MarshalJSON() ([]byte, error) { + return json.Marshal(re.String()) +} + // IsZero implements the yaml.IsZeroer interface. func (re Regexp) IsZero() bool { return re.Regexp == DefaultRelabelConfig.Regex.Regexp @@ -218,8 +239,8 @@ func (re Regexp) String() string { } str := re.Regexp.String() - // Trim the anchor `^(?:` prefix and `)$` suffix. - return str[4 : len(str)-2] + // Trim the anchor `^(?s:` prefix and `)$` suffix. + return str[5 : len(str)-2] } // Process returns a relabeled version of the given label set. The relabel configurations @@ -277,6 +298,13 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return false } case Replace: + // Fast path to add or delete label pair. + if val == "" && cfg.Regex == DefaultRelabelConfig.Regex && + !varInRegexTemplate(cfg.TargetLabel) && !varInRegexTemplate(cfg.Replacement) { + lb.Set(cfg.TargetLabel, cfg.Replacement) + break + } + indexes := cfg.Regex.FindStringSubmatchIndex(val) // If there is no match no replacement must take place. if indexes == nil { @@ -326,3 +354,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { return true } + +func varInRegexTemplate(template string) bool { + return strings.Contains(template, "$") +} diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index fc9952134d..6f234675c6 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -14,6 +14,7 @@ package relabel import ( + "encoding/json" "strconv" "testing" @@ -569,6 +570,29 @@ func TestRelabel(t *testing.T) { }, drop: true, }, + { + input: labels.FromMap(map[string]string{ + "a": "line1\nline2", + "b": "bar", + "c": "baz", + }), + relabel: []*Config{ + { + SourceLabels: model.LabelNames{"a"}, + Regex: MustNewRegexp("line1.*line2"), + TargetLabel: "d", + Separator: ";", + Replacement: "match${1}", + Action: Replace, + }, + }, + output: labels.FromMap(map[string]string{ + "a": "line1\nline2", + "b": "bar", + "c": "baz", + "d": "match", + }), + }, } for _, test := range tests { @@ -838,6 +862,34 @@ func BenchmarkRelabel(b *testing.B) { "__scrape_timeout__", "10s", "job", "kubernetes-pods"), }, + { + name: "static label pair", + config: ` + - replacement: wwwwww + target_label: wwwwww + - replacement: yyyyyyyyyyyy + target_label: xxxxxxxxx + - replacement: xxxxxxxxx + target_label: yyyyyyyyyyyy + - source_labels: ["something"] + target_label: with_source_labels + replacement: value + - replacement: dropped + target_label: ${0} + - replacement: ${0} + target_label: dropped`, + lbls: labels.FromStrings( + "abcdefg01", "hijklmn1", + "abcdefg02", "hijklmn2", + "abcdefg03", "hijklmn3", + "abcdefg04", "hijklmn4", + "abcdefg05", "hijklmn5", + "abcdefg06", "hijklmn6", + "abcdefg07", "hijklmn7", + "abcdefg08", "hijklmn8", + "job", "foo", + ), + }, } for i := range tests { err := yaml.UnmarshalStrict([]byte(tests[i].config), &tests[i].cfgs) @@ -913,3 +965,35 @@ func TestRegexp_ShouldMarshalAndUnmarshalZeroValue(t *testing.T) { require.NoError(t, err) require.Nil(t, unmarshalled.Regexp) } + +func TestRegexp_JSONUnmarshalThenMarshal(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "Empty regex", + input: `{"regex":""}`, + }, + { + name: "string literal", + input: `{"regex":"foo"}`, + }, + { + name: "regex", + input: `{"regex":".*foo.*"}`, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var unmarshalled Config + err := json.Unmarshal([]byte(test.input), &unmarshalled) + require.NoError(t, err) + + marshalled, err := json.Marshal(&unmarshalled) + require.NoError(t, err) + + require.Equal(t, test.input, string(marshalled)) + }) + } +} diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index bfb85ce740..bb36a21208 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -111,6 +111,20 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { ) } + for k, v := range g.Labels { + if !model.LabelName(k).IsValid() || k == model.MetricNameLabel { + errs = append( + errs, fmt.Errorf("invalid label name: %s", k), + ) + } + + if !model.LabelValue(v).IsValid() { + errs = append( + errs, fmt.Errorf("invalid label value: %s", v), + ) + } + } + set[g.Name] = struct{}{} for i, r := range g.Rules { @@ -136,11 +150,12 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - QueryOffset *model.Duration `yaml:"query_offset,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } // Rule describes an alerting or recording rule. @@ -169,14 +184,14 @@ type RuleNode struct { func (r *RuleNode) Validate() (nodes []WrappedError) { if r.Record.Value != "" && r.Alert.Value != "" { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("only one of 'record' and 'alert' must be set"), + err: errors.New("only one of 'record' and 'alert' must be set"), node: &r.Record, nodeAlt: &r.Alert, }) } if r.Record.Value == "" && r.Alert.Value == "" { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("one of 'record' or 'alert' must be set"), + err: errors.New("one of 'record' or 'alert' must be set"), node: &r.Record, nodeAlt: &r.Alert, }) @@ -184,7 +199,7 @@ func (r *RuleNode) Validate() (nodes []WrappedError) { if r.Expr.Value == "" { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("field 'expr' must be set in rule"), + err: errors.New("field 'expr' must be set in rule"), node: &r.Expr, }) } else if _, err := parser.ParseExpr(r.Expr.Value); err != nil { @@ -196,19 +211,19 @@ func (r *RuleNode) Validate() (nodes []WrappedError) { if r.Record.Value != "" { if len(r.Annotations) > 0 { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid field 'annotations' in recording rule"), + err: errors.New("invalid field 'annotations' in recording rule"), node: &r.Record, }) } if r.For != 0 { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid field 'for' in recording rule"), + err: errors.New("invalid field 'for' in recording rule"), node: &r.Record, }) } if r.KeepFiringFor != 0 { nodes = append(nodes, WrappedError{ - err: fmt.Errorf("invalid field 'keep_firing_for' in recording rule"), + err: errors.New("invalid field 'keep_firing_for' in recording rule"), node: &r.Record, }) } diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index ef5008f4bf..73ea174594 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -85,9 +85,8 @@ func TestParseFileFailure(t *testing.T) { for _, c := range table { _, errs := ParseFile(filepath.Join("testdata", c.filename)) - require.NotNil(t, errs, "Expected error parsing %s but got none", c.filename) - require.Error(t, errs[0]) - require.Containsf(t, errs[0].Error(), c.errMsg, "Expected error for %s.", c.filename) + require.NotEmpty(t, errs, "Expected error parsing %s but got none", c.filename) + require.ErrorContainsf(t, errs[0], c.errMsg, "Expected error for %s.", c.filename) } } @@ -108,6 +107,23 @@ groups: severity: "page" annotations: summary: "Instance {{ $labels.instance }} down" +`, + shouldPass: true, + }, + { + ruleString: ` +groups: +- name: example + labels: + team: myteam + rules: + - alert: InstanceDown + expr: up == 0 + for: 5m + labels: + severity: "page" + annotations: + summary: "Instance {{ $labels.instance }} down" `, shouldPass: true, }, @@ -259,8 +275,7 @@ func TestError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := tt.error.Error() - require.Equal(t, tt.want, got) + require.EqualError(t, tt.error, tt.want) }) } } @@ -308,8 +323,7 @@ func TestWrappedError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := tt.wrappedError.Error() - require.Equal(t, tt.want, got) + require.EqualError(t, tt.wrappedError, tt.want) }) } } diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go new file mode 100644 index 0000000000..bd0d5089ac --- /dev/null +++ b/model/textparse/benchmark_test.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "testing" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/labels" + + "github.com/prometheus/common/expfmt" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" +) + +type newParser func([]byte, *labels.SymbolTable) Parser + +var newTestParserFns = map[string]newParser{ + "promtext": NewPromParser, + "promproto": func(b []byte, st *labels.SymbolTable) Parser { + return NewProtobufParser(b, true, st) + }, + "omtext": func(b []byte, st *labels.SymbolTable) Parser { + return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + }, + "omtext_with_nhcb": func(b []byte, st *labels.SymbolTable) Parser { + p := NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) + return NewNHCBParser(p, st, false) + }, +} + +// BenchmarkParse benchmarks parsing, mimicking how scrape/scrape.go#append use it. +// Typically used as follows: +/* + export bench=v1 && go test ./model/textparse/... \ + -run '^$' -bench '^BenchmarkParse' \ + -benchtime 2s -count 6 -cpu 2 -benchmem -timeout 999m \ + | tee ${bench}.txt +*/ +// For profiles, add -memprofile=${bench}.mem.pprof -cpuprofile=${bench}.cpu.pprof +// options. +// +// NOTE(bwplotka): Previous iterations of this benchmark had different cases for isolated +// Series, Series+Metrics with and without reuse, Series+CT. Those cases are sometimes +// good to know if you are working on a certain optimization, but it does not +// make sense to persist such cases for everybody (e.g. for CI one day). +// For local iteration, feel free to adjust cases/comment out code etc. +// +// NOTE(bwplotka): Do not try to conclude "what parser (OM, proto, prom) is the fastest" +// as the testdata has different amount and type of metrics and features (e.g. exemplars). +func BenchmarkParse(b *testing.B) { + for _, bcase := range []struct { + dataFile string // Localized to "./testdata". + dataProto []byte + parser string + + compareToExpfmtFormat expfmt.FormatType + }{ + {dataFile: "promtestdata.txt", parser: "promtext", compareToExpfmtFormat: expfmt.TypeTextPlain}, + {dataFile: "promtestdata.nometa.txt", parser: "promtext", compareToExpfmtFormat: expfmt.TypeTextPlain}, + + // We don't pass compareToExpfmtFormat: expfmt.TypeProtoDelim as expfmt does not support GAUGE_HISTOGRAM, see https://github.com/prometheus/common/issues/430. + {dataProto: createTestProtoBuf(b).Bytes(), parser: "promproto"}, + + // We don't pass compareToExpfmtFormat: expfmt.TypeOpenMetrics as expfmt does not support OM exemplars, see https://github.com/prometheus/common/issues/703. + {dataFile: "omtestdata.txt", parser: "omtext"}, + {dataFile: "promtestdata.txt", parser: "omtext"}, // Compare how omtext parser deals with Prometheus text format vs promtext. + + // NHCB. + {dataFile: "omhistogramdata.txt", parser: "omtext"}, // Measure OM parser baseline for histograms. + {dataFile: "omhistogramdata.txt", parser: "omtext_with_nhcb"}, // Measure NHCB over OM parser. + } { + var buf []byte + dataCase := bcase.dataFile + if len(bcase.dataProto) > 0 { + dataCase = "createTestProtoBuf()" + buf = bcase.dataProto + } else { + f, err := os.Open(filepath.Join("testdata", bcase.dataFile)) + require.NoError(b, err) + b.Cleanup(func() { + _ = f.Close() + }) + buf, err = io.ReadAll(f) + require.NoError(b, err) + } + b.Run(fmt.Sprintf("data=%v/parser=%v", dataCase, bcase.parser), func(b *testing.B) { + newParserFn := newTestParserFns[bcase.parser] + var ( + res labels.Labels + e exemplar.Exemplar + ) + + b.SetBytes(int64(len(buf))) + b.ReportAllocs() + b.ResetTimer() + + st := labels.NewSymbolTable() + for i := 0; i < b.N; i++ { + p := newParserFn(buf, st) + + Inner: + for { + t, err := p.Next() + switch t { + case EntryInvalid: + if errors.Is(err, io.EOF) { + break Inner + } + b.Fatal(err) + case EntryType: + _, _ = p.Type() + continue + case EntryHelp: + _, _ = p.Help() + continue + case EntryUnit: + _, _ = p.Unit() + continue + case EntryComment: + continue + case EntryHistogram: + _, _, _, _ = p.Histogram() + case EntrySeries: + _, _, _ = p.Series() + default: + b.Fatal("not implemented entry", t) + } + + _ = p.Metric(&res) + _ = p.CreatedTimestamp() + for hasExemplar := p.Exemplar(&e); hasExemplar; hasExemplar = p.Exemplar(&e) { + } + } + } + }) + + b.Run(fmt.Sprintf("data=%v/parser=xpfmt", dataCase), func(b *testing.B) { + if bcase.compareToExpfmtFormat == expfmt.TypeUnknown { + b.Skip("compareToExpfmtFormat not set") + } + + b.SetBytes(int64(len(buf))) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + decSamples := make(model.Vector, 0, 50) + sdec := expfmt.SampleDecoder{ + Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.NewFormat(bcase.compareToExpfmtFormat)), + Opts: &expfmt.DecodeOptions{ + Timestamp: model.TimeFromUnixNano(0), + }, + } + + for { + if err := sdec.Decode(&decSamples); err != nil { + if errors.Is(err, io.EOF) { + break + } + b.Fatal(err) + } + decSamples = decSamples[:0] + } + } + }) + } +} diff --git a/model/textparse/interface.go b/model/textparse/interface.go index 0b5d9281e4..2682855281 100644 --- a/model/textparse/interface.go +++ b/model/textparse/interface.go @@ -14,6 +14,8 @@ package textparse import ( + "errors" + "fmt" "mime" "github.com/prometheus/common/model" @@ -23,8 +25,7 @@ import ( "github.com/prometheus/prometheus/model/labels" ) -// Parser parses samples from a byte slice of samples in the official -// Prometheus and OpenMetrics text exposition formats. +// Parser parses samples from a byte slice of samples in different exposition formats. type Parser interface { // Series returns the bytes of a series with a simple float64 as a // value, the timestamp if set, and the value of the current sample. @@ -58,6 +59,8 @@ type Parser interface { // Metric writes the labels of the current sample into the passed labels. // It returns the string from which the metric was parsed. + // The values of the "le" labels of classic histograms and "quantile" labels + // of summaries should follow the OpenMetrics formatting rules. Metric(l *labels.Labels) string // Exemplar writes the exemplar of the current sample into the passed @@ -69,6 +72,8 @@ type Parser interface { // CreatedTimestamp returns the created timestamp (in milliseconds) for the // current sample. It returns nil if it is unknown e.g. if it wasn't set, // if the scrape protocol or metric type does not support created timestamps. + // Assume the CreatedTimestamp returned pointer is only valid until + // the Next iteration. CreatedTimestamp() *int64 // Next advances the parser to the next sample. @@ -76,26 +81,65 @@ type Parser interface { Next() (Entry, error) } -// New returns a new parser of the byte slice. -// -// This function always returns a valid parser, but might additionally -// return an error if the content type cannot be parsed. -func New(b []byte, contentType string, parseClassicHistograms bool, st *labels.SymbolTable) (Parser, error) { +// extractMediaType returns the mediaType of a required parser. It tries first to +// extract a valid and supported mediaType from contentType. If that fails, +// the provided fallbackType (possibly an empty string) is returned, together with +// an error. fallbackType is used as-is without further validation. +func extractMediaType(contentType, fallbackType string) (string, error) { if contentType == "" { - return NewPromParser(b, st), nil + if fallbackType == "" { + return "", errors.New("non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target") + } + return fallbackType, fmt.Errorf("non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol %q", fallbackType) } + // We have a contentType, parse it. mediaType, _, err := mime.ParseMediaType(contentType) if err != nil { - return NewPromParser(b, st), err + if fallbackType == "" { + retErr := fmt.Errorf("cannot parse Content-Type %q and no fallback_scrape_protocol for target", contentType) + return "", errors.Join(retErr, err) + } + retErr := fmt.Errorf("could not parse received Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) + return fallbackType, errors.Join(retErr, err) } + + // We have a valid media type, either we recognise it and can use it + // or we have to error. + switch mediaType { + case "application/openmetrics-text", "application/vnd.google.protobuf", "text/plain": + return mediaType, nil + } + // We're here because we have no recognised mediaType. + if fallbackType == "" { + return "", fmt.Errorf("received unsupported Content-Type %q and no fallback_scrape_protocol specified for target", contentType) + } + return fallbackType, fmt.Errorf("received unsupported Content-Type %q, using fallback_scrape_protocol %q", contentType, fallbackType) +} + +// New returns a new parser of the byte slice. +// +// This function no longer guarantees to return a valid parser. +// +// It only returns a valid parser if the supplied contentType and fallbackType allow. +// An error may also be returned if fallbackType had to be used or there was some +// other error parsing the supplied Content-Type. +// If the returned parser is nil then the scrape must fail. +func New(b []byte, contentType, fallbackType string, parseClassicHistograms, skipOMCTSeries bool, st *labels.SymbolTable) (Parser, error) { + mediaType, err := extractMediaType(contentType, fallbackType) + // err may be nil or something we want to warn about. + switch mediaType { case "application/openmetrics-text": - return NewOpenMetricsParser(b, st), nil + return NewOpenMetricsParser(b, st, func(o *openMetricsParserOptions) { + o.SkipCTSeries = skipOMCTSeries + }), err case "application/vnd.google.protobuf": - return NewProtobufParser(b, parseClassicHistograms, st), nil + return NewProtobufParser(b, parseClassicHistograms, st), err + case "text/plain": + return NewPromParser(b, st), err default: - return NewPromParser(b, st), nil + return nil, err } } diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index c644565628..72c8284f2d 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -14,16 +14,28 @@ package textparse import ( + "errors" + "io" "testing" + "github.com/google/go-cmp/cmp" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/testutil" ) func TestNewParser(t *testing.T) { t.Parallel() + requireNilParser := func(t *testing.T, p Parser) { + require.Nil(t, p) + } + requirePromParser := func(t *testing.T, p Parser) { require.NotNil(t, p) _, ok := p.(*PromParser) @@ -36,34 +48,83 @@ func TestNewParser(t *testing.T) { require.True(t, ok) } + requireProtobufParser := func(t *testing.T, p Parser) { + require.NotNil(t, p) + _, ok := p.(*ProtobufParser) + require.True(t, ok) + } + for name, tt := range map[string]*struct { - contentType string - validateParser func(*testing.T, Parser) - err string + contentType string + fallbackScrapeProtocol config.ScrapeProtocol + validateParser func(*testing.T, Parser) + err string }{ "empty-string": { - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target", + }, + "empty-string-fallback-text-plain": { + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "non-compliant scrape target sending blank Content-Type, using fallback_scrape_protocol \"text/plain\"", }, "invalid-content-type-1": { contentType: "invalid/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "expected token after slash", }, + "invalid-content-type-1-fallback-text-plain": { + contentType: "invalid/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-openmetrics": { + contentType: "invalid/", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText0_0_1, + err: "expected token after slash", + }, + "invalid-content-type-1-fallback-protobuf": { + contentType: "invalid/", + validateParser: requireProtobufParser, + fallbackScrapeProtocol: config.PrometheusProto, + err: "expected token after slash", + }, "invalid-content-type-2": { contentType: "invalid/invalid/invalid", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "unexpected content after media subtype", }, + "invalid-content-type-2-fallback-text-plain": { + contentType: "invalid/invalid/invalid", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "unexpected content after media subtype", + }, "invalid-content-type-3": { contentType: "/", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "no media type", }, + "invalid-content-type-3-fallback-text-plain": { + contentType: "/", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText1_0_0, + err: "no media type", + }, "invalid-content-type-4": { contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", - validateParser: requirePromParser, + validateParser: requireNilParser, err: "duplicate parameter name", }, + "invalid-content-type-4-fallback-open-metrics": { + contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8", + validateParser: requireOpenMetricsParser, + fallbackScrapeProtocol: config.OpenMetricsText1_0_0, + err: "duplicate parameter name", + }, "openmetrics": { contentType: "application/openmetrics-text", validateParser: requireOpenMetricsParser, @@ -80,27 +141,129 @@ func TestNewParser(t *testing.T) { contentType: "text/plain", validateParser: requirePromParser, }, + "protobuf": { + contentType: "application/vnd.google.protobuf", + validateParser: requireProtobufParser, + }, "plain-text-with-version": { contentType: "text/plain; version=0.0.4", validateParser: requirePromParser, }, "some-other-valid-content-type": { contentType: "text/html", - validateParser: requirePromParser, + validateParser: requireNilParser, + err: "received unsupported Content-Type \"text/html\" and no fallback_scrape_protocol specified for target", + }, + "some-other-valid-content-type-fallback-text-plain": { + contentType: "text/html", + validateParser: requirePromParser, + fallbackScrapeProtocol: config.PrometheusText0_0_4, + err: "received unsupported Content-Type \"text/html\", using fallback_scrape_protocol \"text/plain\"", }, } { t.Run(name, func(t *testing.T) { tt := tt // Copy to local variable before going parallel. t.Parallel() - p, err := New([]byte{}, tt.contentType, false, labels.NewSymbolTable()) + fallbackProtoMediaType := tt.fallbackScrapeProtocol.HeaderMediaType() + + p, err := New([]byte{}, tt.contentType, fallbackProtoMediaType, false, false, labels.NewSymbolTable()) tt.validateParser(t, p) if tt.err == "" { require.NoError(t, err) } else { - require.Error(t, err) - require.Contains(t, err.Error(), tt.err) + require.ErrorContains(t, err, tt.err) } }) } } + +// parsedEntry represents data that is parsed for each entry. +type parsedEntry struct { + // In all but EntryComment, EntryInvalid. + m string + + // In EntryHistogram. + shs *histogram.Histogram + fhs *histogram.FloatHistogram + + // In EntrySeries. + v float64 + + // In EntrySeries and EntryHistogram. + lset labels.Labels + t *int64 + es []exemplar.Exemplar + ct *int64 + + // In EntryType. + typ model.MetricType + // In EntryHelp. + help string + // In EntryUnit. + unit string + // In EntryComment. + comment string +} + +func requireEntries(t *testing.T, exp, got []parsedEntry) { + t.Helper() + + testutil.RequireEqualWithOptions(t, exp, got, []cmp.Option{ + cmp.AllowUnexported(parsedEntry{}), + }) +} + +func testParse(t *testing.T, p Parser) (ret []parsedEntry) { + t.Helper() + + for { + et, err := p.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + + var got parsedEntry + var m []byte + switch et { + case EntryInvalid: + t.Fatal("entry invalid not expected") + case EntrySeries, EntryHistogram: + if et == EntrySeries { + m, got.t, got.v = p.Series() + got.m = string(m) + } else { + m, got.t, got.shs, got.fhs = p.Histogram() + got.m = string(m) + } + + p.Metric(&got.lset) + // Parser reuses int pointer. + if ct := p.CreatedTimestamp(); ct != nil { + got.ct = int64p(*ct) + } + for e := (exemplar.Exemplar{}); p.Exemplar(&e); { + got.es = append(got.es, e) + } + case EntryType: + m, got.typ = p.Type() + got.m = string(m) + + case EntryHelp: + m, h := p.Help() + got.m = string(m) + got.help = string(h) + + case EntryUnit: + m, u := p.Unit() + got.m = string(m) + got.unit = string(u) + + case EntryComment: + got.comment = string(p.Comment()) + } + ret = append(ret, got) + } + return ret +} diff --git a/model/textparse/nhcbparse.go b/model/textparse/nhcbparse.go new file mode 100644 index 0000000000..ff756965f4 --- /dev/null +++ b/model/textparse/nhcbparse.go @@ -0,0 +1,380 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "errors" + "io" + "math" + "strconv" + "strings" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/convertnhcb" +) + +type collectionState int + +const ( + stateStart collectionState = iota + stateCollecting + stateEmitting +) + +// The NHCBParser wraps a Parser and converts classic histograms to native +// histograms with custom buckets. +// +// Since Parser interface is line based, this parser needs to keep track +// of the last classic histogram series it saw to collate them into a +// single native histogram. +// +// Note: +// - Only series that have the histogram metadata type are considered for +// conversion. +// - The classic series are also returned if keepClassicHistograms is true. +type NHCBParser struct { + // The parser we're wrapping. + parser Parser + // Option to keep classic histograms along with converted histograms. + keepClassicHistograms bool + + // Labels builder. + builder labels.ScratchBuilder + + // State of the parser. + state collectionState + + // Caches the values from the underlying parser. + // For Series and Histogram. + bytes []byte + ts *int64 + value float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + // For Metric. + lset labels.Labels + metricString string + // For Type. + bName []byte + typ model.MetricType + + // Caches the entry itself if we are inserting a converted NHCB + // halfway through. + entry Entry + err error + + // Caches the values and metric for the inserted converted NHCB. + bytesNHCB []byte + hNHCB *histogram.Histogram + fhNHCB *histogram.FloatHistogram + lsetNHCB labels.Labels + exemplars []exemplar.Exemplar + ctNHCB *int64 + metricStringNHCB string + + // Collates values from the classic histogram series to build + // the converted histogram later. + tempLsetNHCB labels.Labels + tempNHCB convertnhcb.TempHistogram + tempExemplars []exemplar.Exemplar + tempExemplarCount int + tempCT *int64 + + // Remembers the last base histogram metric name (assuming it's + // a classic histogram) so we can tell if the next float series + // is part of the same classic histogram. + lastHistogramName string + lastHistogramLabelsHash uint64 + lastHistogramExponential bool + // Reused buffer for hashing labels. + hBuffer []byte +} + +func NewNHCBParser(p Parser, st *labels.SymbolTable, keepClassicHistograms bool) Parser { + return &NHCBParser{ + parser: p, + keepClassicHistograms: keepClassicHistograms, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), + tempNHCB: convertnhcb.NewTempHistogram(), + } +} + +func (p *NHCBParser) Series() ([]byte, *int64, float64) { + return p.bytes, p.ts, p.value +} + +func (p *NHCBParser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { + if p.state == stateEmitting { + return p.bytesNHCB, p.ts, p.hNHCB, p.fhNHCB + } + return p.bytes, p.ts, p.h, p.fh +} + +func (p *NHCBParser) Help() ([]byte, []byte) { + return p.parser.Help() +} + +func (p *NHCBParser) Type() ([]byte, model.MetricType) { + return p.bName, p.typ +} + +func (p *NHCBParser) Unit() ([]byte, []byte) { + return p.parser.Unit() +} + +func (p *NHCBParser) Comment() []byte { + return p.parser.Comment() +} + +func (p *NHCBParser) Metric(l *labels.Labels) string { + if p.state == stateEmitting { + *l = p.lsetNHCB + return p.metricStringNHCB + } + *l = p.lset + return p.metricString +} + +func (p *NHCBParser) Exemplar(ex *exemplar.Exemplar) bool { + if p.state == stateEmitting { + if len(p.exemplars) == 0 { + return false + } + *ex = p.exemplars[0] + p.exemplars = p.exemplars[1:] + return true + } + return p.parser.Exemplar(ex) +} + +func (p *NHCBParser) CreatedTimestamp() *int64 { + switch p.state { + case stateStart: + if p.entry == EntrySeries || p.entry == EntryHistogram { + return p.parser.CreatedTimestamp() + } + case stateCollecting: + return p.tempCT + case stateEmitting: + return p.ctNHCB + } + return nil +} + +func (p *NHCBParser) Next() (Entry, error) { + if p.state == stateEmitting { + p.state = stateStart + if p.entry == EntrySeries { + isNHCB := p.handleClassicHistogramSeries(p.lset) + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + } + return p.entry, p.err + } + + p.entry, p.err = p.parser.Next() + if p.err != nil { + if errors.Is(p.err, io.EOF) && p.processNHCB() { + return EntryHistogram, nil + } + return EntryInvalid, p.err + } + switch p.entry { + case EntrySeries: + p.bytes, p.ts, p.value = p.parser.Series() + p.metricString = p.parser.Metric(&p.lset) + // Check the label set to see if we can continue or need to emit the NHCB. + var isNHCB bool + if p.compareLabels() { + // Labels differ. Check if we can emit the NHCB. + if p.processNHCB() { + return EntryHistogram, nil + } + isNHCB = p.handleClassicHistogramSeries(p.lset) + } else { + // Labels are the same. Check if after an exponential histogram. + if p.lastHistogramExponential { + isNHCB = false + } else { + isNHCB = p.handleClassicHistogramSeries(p.lset) + } + } + if isNHCB && !p.keepClassicHistograms { + // Do not return the classic histogram series if it was converted to NHCB and we are not keeping classic histograms. + return p.Next() + } + return p.entry, p.err + case EntryHistogram: + p.bytes, p.ts, p.h, p.fh = p.parser.Histogram() + p.metricString = p.parser.Metric(&p.lset) + p.storeExponentialLabels() + case EntryType: + p.bName, p.typ = p.parser.Type() + } + if p.processNHCB() { + return EntryHistogram, nil + } + return p.entry, p.err +} + +// Return true if labels have changed and we should emit the NHCB. +func (p *NHCBParser) compareLabels() bool { + if p.state != stateCollecting { + return false + } + if p.typ != model.MetricTypeHistogram { + // Different metric type. + return true + } + _, name := convertnhcb.GetHistogramMetricBaseName(p.lset.Get(labels.MetricName)) + if p.lastHistogramName != name { + // Different metric name. + return true + } + nextHash, _ := p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + // Different label values. + return p.lastHistogramLabelsHash != nextHash +} + +// Save the label set of the classic histogram without suffix and bucket `le` label. +func (p *NHCBParser) storeClassicLabels(name string) { + p.lastHistogramName = name + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer, labels.BucketLabel) + p.lastHistogramExponential = false +} + +func (p *NHCBParser) storeExponentialLabels() { + p.lastHistogramName = p.lset.Get(labels.MetricName) + p.lastHistogramLabelsHash, _ = p.lset.HashWithoutLabels(p.hBuffer) + p.lastHistogramExponential = true +} + +// handleClassicHistogramSeries collates the classic histogram series to be converted to NHCB +// if it is actually a classic histogram series (and not a normal float series) and if there +// isn't already a native histogram with the same name (assuming it is always processed +// right before the classic histograms) and returns true if the collation was done. +func (p *NHCBParser) handleClassicHistogramSeries(lset labels.Labels) bool { + if p.typ != model.MetricTypeHistogram { + return false + } + mName := lset.Get(labels.MetricName) + // Sanity check to ensure that the TYPE metadata entry name is the same as the base name. + suffixType, name := convertnhcb.GetHistogramMetricBaseName(mName) + if name != string(p.bName) { + return false + } + switch suffixType { + case convertnhcb.SuffixBucket: + if !lset.Has(labels.BucketLabel) { + // This should not really happen. + return false + } + le, err := strconv.ParseFloat(lset.Get(labels.BucketLabel), 64) + if err == nil && !math.IsNaN(le) { + p.processClassicHistogramSeries(lset, name, func(hist *convertnhcb.TempHistogram) { + _ = hist.SetBucketCount(le, p.value) + }) + return true + } + case convertnhcb.SuffixCount: + p.processClassicHistogramSeries(lset, name, func(hist *convertnhcb.TempHistogram) { + _ = hist.SetCount(p.value) + }) + return true + case convertnhcb.SuffixSum: + p.processClassicHistogramSeries(lset, name, func(hist *convertnhcb.TempHistogram) { + _ = hist.SetSum(p.value) + }) + return true + } + return false +} + +func (p *NHCBParser) processClassicHistogramSeries(lset labels.Labels, name string, updateHist func(*convertnhcb.TempHistogram)) { + if p.state != stateCollecting { + p.storeClassicLabels(name) + p.tempCT = p.parser.CreatedTimestamp() + p.state = stateCollecting + p.tempLsetNHCB = convertnhcb.GetHistogramMetricBase(lset, name) + } + p.storeExemplars() + updateHist(&p.tempNHCB) +} + +func (p *NHCBParser) storeExemplars() { + for ex := p.nextExemplarPtr(); p.parser.Exemplar(ex); ex = p.nextExemplarPtr() { + p.tempExemplarCount++ + } +} + +func (p *NHCBParser) nextExemplarPtr() *exemplar.Exemplar { + switch { + case p.tempExemplarCount == len(p.tempExemplars)-1: + // Reuse the previously allocated exemplar, it was not filled up. + case len(p.tempExemplars) == cap(p.tempExemplars): + // Let the runtime grow the slice. + p.tempExemplars = append(p.tempExemplars, exemplar.Exemplar{}) + default: + // Take the next element into use. + p.tempExemplars = p.tempExemplars[:len(p.tempExemplars)+1] + } + return &p.tempExemplars[len(p.tempExemplars)-1] +} + +func (p *NHCBParser) swapExemplars() { + p.exemplars = p.tempExemplars[:p.tempExemplarCount] + p.tempExemplars = p.tempExemplars[:0] +} + +// processNHCB converts the collated classic histogram series to NHCB and caches the info +// to be returned to callers. Retruns true if the conversion was successful. +func (p *NHCBParser) processNHCB() bool { + if p.state != stateCollecting { + return false + } + h, fh, err := p.tempNHCB.Convert() + if err == nil { + if h != nil { + if err := h.Validate(); err != nil { + return false + } + p.hNHCB = h + p.fhNHCB = nil + } else if fh != nil { + if err := fh.Validate(); err != nil { + return false + } + p.hNHCB = nil + p.fhNHCB = fh + } + p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName) + strings.ReplaceAll(p.tempLsetNHCB.DropMetricName().String(), ", ", ",") + p.bytesNHCB = []byte(p.metricStringNHCB) + p.lsetNHCB = p.tempLsetNHCB + p.swapExemplars() + p.ctNHCB = p.tempCT + p.state = stateEmitting + } else { + p.state = stateStart + } + p.tempNHCB.Reset() + p.tempExemplarCount = 0 + p.tempCT = nil + return err == nil +} diff --git a/model/textparse/nhcbparse_test.go b/model/textparse/nhcbparse_test.go new file mode 100644 index 0000000000..859bcc1cb7 --- /dev/null +++ b/model/textparse/nhcbparse_test.go @@ -0,0 +1,984 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "bytes" + "encoding/binary" + "strconv" + "testing" + + "github.com/gogo/protobuf/proto" + "github.com/stretchr/testify/require" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" +) + +func TestNHCBParserOnOMParser(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +# UNIT go_gc_duration_seconds seconds +go_gc_duration_seconds{quantile="0"} 4.9351e-05 +go_gc_duration_seconds{quantile="0.25"} 7.424100000000001e-05 +go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 +# HELP nohelp1 +# HELP help2 escape \ \n \\ \" \x chars +# UNIT nounit +go_gc_duration_seconds{quantile="1.0",a="b"} 8.3835e-05 +go_gc_duration_seconds_count 99 +some:aggregate:rate5m{a_b="c"} 1 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 33 123.123 +# TYPE hh histogram +hh_bucket{le="+Inf"} 1 +# TYPE gh gaugehistogram +gh_bucket{le="+Inf"} 1 +# TYPE hhh histogram +hhh_bucket{le="+Inf"} 1 # {id="histogram-bucket-test"} 4 +hhh_count 1 # {id="histogram-count-test"} 4 +# TYPE ggh gaugehistogram +ggh_bucket{le="+Inf"} 1 # {id="gaugehistogram-bucket-test",xx="yy"} 4 123.123 +ggh_count 1 # {id="gaugehistogram-count-test",xx="yy"} 4 123.123 +# TYPE smr_seconds summary +smr_seconds_count 2.0 # {id="summary-count-test"} 1 123.321 +smr_seconds_sum 42.0 # {id="summary-sum-test"} 1 123.321 +# TYPE ii info +ii{foo="bar"} 1 +# TYPE ss stateset +ss{ss="foo"} 1 +ss{ss="bar"} 0 +ss{A="a"} 0 +# TYPE un unknown +_metric_starting_with_underscore 1 +testmetric{_label_starting_with_underscore="foo"} 1 +testmetric{label="\"bar\""} 1 +# HELP foo Counter with and without labels to certify CT is parsed for both cases +# TYPE foo counter +foo_total 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 +foo_created{a="b"} 1520872607.123 +# HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far +# TYPE bar summary +bar_count 17.0 +bar_sum 324789.3 +bar{quantile="0.95"} 123.7 +bar{quantile="0.99"} 150.0 +bar_created 1520872608.124 +# HELP baz Histogram with the same objective as above's summary +# TYPE baz histogram +baz_bucket{le="0.0"} 0 +baz_bucket{le="+Inf"} 17 +baz_count 17 +baz_sum 324789.3 +baz_created 1520872609.125 +# HELP fizz_created Gauge which shouldn't be parsed as CT +# TYPE fizz_created gauge +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="+Inf"} 18 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 +something_bucket{a="b",le="+Inf"} 9 +something_created{a="b"} 1520430002 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` + + input += "\n# HELP metric foo\x00bar" + input += "\nnull_byte_metric{a=\"abc\x00\"} 1" + input += "\n# EOF\n" + + exp := []parsedEntry{ + { + m: "go_gc_duration_seconds", + help: "A summary of the GC invocation durations.", + }, { + m: "go_gc_duration_seconds", + typ: model.MetricTypeSummary, + }, { + m: "go_gc_duration_seconds", + unit: "seconds", + }, { + m: `go_gc_duration_seconds{quantile="0"}`, + v: 4.9351e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), + }, { + m: `go_gc_duration_seconds{quantile="0.25"}`, + v: 7.424100000000001e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.25"), + }, { + m: `go_gc_duration_seconds{quantile="0.5",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.5", "a", "b"), + }, { + m: "nohelp1", + help: "", + }, { + m: "help2", + help: "escape \\ \n \\ \" \\x chars", + }, { + m: "nounit", + unit: "", + }, { + m: `go_gc_duration_seconds{quantile="1.0",a="b"}`, + v: 8.3835e-05, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), + }, { + m: `go_gc_duration_seconds_count`, + v: 99, + lset: labels.FromStrings("__name__", "go_gc_duration_seconds_count"), + }, { + m: `some:aggregate:rate5m{a_b="c"}`, + v: 1, + lset: labels.FromStrings("__name__", "some:aggregate:rate5m", "a_b", "c"), + }, { + m: "go_goroutines", + help: "Number of goroutines that currently exist.", + }, { + m: "go_goroutines", + typ: model.MetricTypeGauge, + }, { + m: `go_goroutines`, + v: 33, + t: int64p(123123), + lset: labels.FromStrings("__name__", "go_goroutines"), + }, { + m: "hh", + typ: model.MetricTypeHistogram, + }, { + m: `hh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hh"), + }, { + m: "gh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `gh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "gh_bucket", "le", "+Inf"), + }, { + m: "hhh", + typ: model.MetricTypeHistogram, + }, { + m: `hhh{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1, + Sum: 0.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{1}, + // Custom values are empty as we do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "hhh"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, + }, { + m: "ggh", + typ: model.MetricTypeGaugeHistogram, + }, { + m: `ggh_bucket{le="+Inf"}`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: `ggh_count`, + v: 1, + lset: labels.FromStrings("__name__", "ggh_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}}, + }, { + m: "smr_seconds", + typ: model.MetricTypeSummary, + }, { + m: `smr_seconds_count`, + v: 2, + lset: labels.FromStrings("__name__", "smr_seconds_count"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: `smr_seconds_sum`, + v: 42, + lset: labels.FromStrings("__name__", "smr_seconds_sum"), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}}, + }, { + m: "ii", + typ: model.MetricTypeInfo, + }, { + m: `ii{foo="bar"}`, + v: 1, + lset: labels.FromStrings("__name__", "ii", "foo", "bar"), + }, { + m: "ss", + typ: model.MetricTypeStateset, + }, { + m: `ss{ss="foo"}`, + v: 1, + lset: labels.FromStrings("__name__", "ss", "ss", "foo"), + }, { + m: `ss{ss="bar"}`, + v: 0, + lset: labels.FromStrings("__name__", "ss", "ss", "bar"), + }, { + m: `ss{A="a"}`, + v: 0, + lset: labels.FromStrings("A", "a", "__name__", "ss"), + }, { + m: "un", + typ: model.MetricTypeUnknown, + }, { + m: "_metric_starting_with_underscore", + v: 1, + lset: labels.FromStrings("__name__", "_metric_starting_with_underscore"), + }, { + m: "testmetric{_label_starting_with_underscore=\"foo\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "_label_starting_with_underscore", "foo"), + }, { + m: "testmetric{label=\"\\\"bar\\\"\"}", + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: "foo", + help: "Counter with and without labels to certify CT is parsed for both cases", + }, { + m: "foo", + typ: model.MetricTypeCounter, + }, { + m: "foo_total", + v: 17, + lset: labels.FromStrings("__name__", "foo_total"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: `foo_total{a="b"}`, + v: 17.0, + lset: labels.FromStrings("__name__", "foo_total", "a", "b"), + t: int64p(1520879607789), + es: []exemplar.Exemplar{{Labels: labels.FromStrings("id", "counter-test"), Value: 5}}, + ct: int64p(1520872607123), + }, { + m: "bar", + help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", + }, { + m: "bar", + typ: model.MetricTypeSummary, + }, { + m: "bar_count", + v: 17.0, + lset: labels.FromStrings("__name__", "bar_count"), + ct: int64p(1520872608124), + }, { + m: "bar_sum", + v: 324789.3, + lset: labels.FromStrings("__name__", "bar_sum"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), + ct: int64p(1520872608124), + }, { + m: `bar{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), + ct: int64p(1520872608124), + }, { + m: "baz", + help: "Histogram with the same objective as above's summary", + }, { + m: "baz", + typ: model.MetricTypeHistogram, + }, { + m: `baz{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 17, + Sum: 324789.3, + PositiveSpans: []histogram.Span{{Offset: 1, Length: 1}}, // The first bucket has 0 count so we don't store it and Offset is 1. + PositiveBuckets: []int64{17}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "baz"), + ct: int64p(1520872609125), + }, { + m: "fizz_created", + help: "Gauge which shouldn't be parsed as CT", + }, { + m: "fizz_created", + typ: model.MetricTypeGauge, + }, { + m: `fizz_created`, + v: 17, + lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{1, 16}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + ct: int64p(1520430001000), + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{8, -7}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + ct: int64p(1520430002000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), + }, { + m: "metric", + help: "foo\x00bar", + }, { + m: "null_byte_metric{a=\"abc\x00\"}", + v: 1, + lset: labels.FromStrings("__name__", "null_byte_metric", "a", "abc\x00"), + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +func TestNHCBParserOMParser_MultipleHistograms(t *testing.T) { + // The input is taken originally from TestOpenMetricsParse, with additional tests for the NHCBParser. + + input := `# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_bucket{le="0.0"} 1 # {id="something-test"} -2.0 +something_bucket{le="1.0"} 16 # {id="something-test"} 0.5 +something_bucket{le="+Inf"} 18 # {id="something-test"} 8 +something_count{a="b"} 9 +something_sum{a="b"} 42123.0 +something_bucket{a="b",le="0.0"} 8 # {id="something-test"} 0.0 123.321 +something_bucket{a="b",le="1.0"} 8 +something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000 +# EOF +` + + exp := []parsedEntry{ + { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something{}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 18, + Sum: 324789.4, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 3}}, + PositiveBuckets: []int64{1, 14, -13}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: -2.0}, + {Labels: labels.FromStrings("id", "something-test"), Value: 0.5}, + {Labels: labels.FromStrings("id", "something-test"), Value: 8.0}, + }, + }, { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 3}}, + PositiveBuckets: []int64{8, -8, 1}, + CustomValues: []float64{0.0, 1.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "something-test"), Value: 0.0, HasTs: true, Ts: 123321}, + {Labels: labels.FromStrings("id", "something-test"), Value: 2e100, HasTs: true, Ts: 123000}, + }, + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} + +// Verify the requirement tables from +// https://github.com/prometheus/prometheus/issues/13532 . +// "classic" means the option "always_scrape_classic_histograms". +// "nhcb" means the option "convert_classic_histograms_to_nhcb". +// +// Case 1. Only classic histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | YES | NO | NO |. +// | classic=true, nhcb=false | YES | NO | NO |. +// | classic=false, nhcb=true | NO | NO | YES |. +// | classic=true, nhcb=true | YES | NO | YES |. +// +// Case 2. Both classic and exponential histograms are exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | YES | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | YES | YES | NO |. +// +// Case 3. Only exponential histogram is exposed. +// +// | Scrape Config | Expect classic | Expect exponential | Expect NHCB |. +// | classic=false, nhcb=false | NO | YES | NO |. +// | classic=true, nhcb=false | NO | YES | NO |. +// | classic=false, nhcb=true | NO | YES | NO |. +// | classic=true, nhcb=true | NO | YES | NO |. +func TestNHCBParser_NoNHCBWhenExponential(t *testing.T) { + type requirement struct { + expectClassic bool + expectExponential bool + expectNHCB bool + } + + cases := []map[string]requirement{ + // Case 1. + { + "classic=false, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: false, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: false, expectNHCB: true}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: false, expectNHCB: true}, + }, + // Case 2. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: true, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: true, expectExponential: true, expectNHCB: false}, + }, + // Case 3. + { + "classic=false, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=false": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=false, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + "classic=true, nhcb=true": {expectClassic: false, expectExponential: true, expectNHCB: false}, + }, + } + + // Create parser from keep classic option. + type parserFactory func(bool) Parser + + type testCase struct { + name string + parser parserFactory + classic bool + nhcb bool + exp []parsedEntry + } + + type parserOptions struct { + useUTF8sep bool + hasCreatedTimeStamp bool + } + // Defines the parser name, the Parser factory and the test cases + // supported by the parser and parser options. + parsers := []func() (string, parserFactory, []int, parserOptions){ + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + inputBuf := createTestProtoBufHistogram(t) + return NewProtobufParser(inputBuf.Bytes(), keepClassic, labels.NewSymbolTable()) + } + return "ProtoBuf", factory, []int{1, 2, 3}, parserOptions{useUTF8sep: true, hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestOpenMetricsHistogram() + return NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + } + return "OpenMetrics", factory, []int{1}, parserOptions{hasCreatedTimeStamp: true} + }, + func() (string, parserFactory, []int, parserOptions) { + factory := func(keepClassic bool) Parser { + input := createTestPromHistogram() + return NewPromParser([]byte(input), labels.NewSymbolTable()) + } + return "Prometheus", factory, []int{1}, parserOptions{} + }, + } + + testCases := []testCase{} + for _, parser := range parsers { + for _, classic := range []bool{false, true} { + for _, nhcb := range []bool{false, true} { + parserName, parser, supportedCases, options := parser() + requirementName := "classic=" + strconv.FormatBool(classic) + ", nhcb=" + strconv.FormatBool(nhcb) + tc := testCase{ + name: "parser=" + parserName + ", " + requirementName, + parser: parser, + classic: classic, + nhcb: nhcb, + exp: []parsedEntry{}, + } + for _, caseNumber := range supportedCases { + caseI := cases[caseNumber-1] + req, ok := caseI[requirementName] + require.True(t, ok, "Case %d does not have requirement %s", caseNumber, requirementName) + metric := "test_histogram" + strconv.Itoa(caseNumber) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + help: "Test histogram " + strconv.Itoa(caseNumber), + }) + tc.exp = append(tc.exp, parsedEntry{ + m: metric, + typ: model.MetricTypeHistogram, + }) + + var ct *int64 + if options.hasCreatedTimeStamp { + ct = int64p(1000) + } + + var bucketForMetric func(string) string + if options.useUTF8sep { + bucketForMetric = func(s string) string { + return "_bucket\xffle\xff" + s + } + } else { + bucketForMetric = func(s string) string { + return "_bucket{le=\"" + s + "\"}" + } + } + + if req.expectExponential { + // Always expect exponential histogram first. + exponentialSeries := []parsedEntry{ + { + m: metric, + shs: &histogram.Histogram{ + Schema: 3, + Count: 175, + Sum: 0.0008280461746287094, + ZeroThreshold: 2.938735877055719e-39, + ZeroCount: 2, + PositiveSpans: []histogram.Span{{Offset: -161, Length: 1}, {Offset: 8, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: -162, Length: 1}, {Offset: 23, Length: 4}}, + PositiveBuckets: []int64{1, 2, -1, -1}, + NegativeBuckets: []int64{1, 3, -2, -1, 1}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, exponentialSeries...) + } + if req.expectClassic { + // Always expect classic histogram series after exponential. + classicSeries := []parsedEntry{ + { + m: metric + "_count", + v: 175, + lset: labels.FromStrings("__name__", metric+"_count"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + "_sum", + v: 0.0008280461746287094, + lset: labels.FromStrings("__name__", metric+"_sum"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0004899999999999998"), + v: 2, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0004899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0003899999999999998"), + v: 4, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0003899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("-0.0002899999999999998"), + v: 16, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "-0.0002899999999999998"), + t: int64p(1234568), + ct: ct, + }, + { + m: metric + bucketForMetric("+Inf"), + v: 175, + lset: labels.FromStrings("__name__", metric+"_bucket", "le", "+Inf"), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, classicSeries...) + } + if req.expectNHCB { + // Always expect NHCB series after classic. + nhcbSeries := []parsedEntry{ + { + m: metric + "{}", + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 175, + Sum: 0.0008280461746287094, + PositiveSpans: []histogram.Span{{Length: 4}}, + PositiveBuckets: []int64{2, 0, 10, 147}, + CustomValues: []float64{-0.0004899999999999998, -0.0003899999999999998, -0.0002899999999999998}, + }, + lset: labels.FromStrings("__name__", metric), + t: int64p(1234568), + ct: ct, + }, + } + tc.exp = append(tc.exp, nhcbSeries...) + } + } + testCases = append(testCases, tc) + } + } + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + p := tc.parser(tc.classic) + if tc.nhcb { + p = NewNHCBParser(p, labels.NewSymbolTable(), tc.classic) + } + got := testParse(t, p) + requireEntries(t, tc.exp, got) + }) + } +} + +func createTestProtoBufHistogram(t *testing.T) *bytes.Buffer { + testMetricFamilies := []string{`name: "test_histogram1" +help: "Test histogram 1" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + > + timestamp_ms: 1234568 +>`, `name: "test_histogram2" +help: "Test histogram 2" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + bucket: < + cumulative_count: 2 + upper_bound: -0.0004899999999999998 + > + bucket: < + cumulative_count: 4 + upper_bound: -0.0003899999999999998 + > + bucket: < + cumulative_count: 16 + upper_bound: -0.0002899999999999998 + > + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +>`, `name: "test_histogram3" +help: "Test histogram 3" +type: HISTOGRAM +metric: < + histogram: < + created_timestamp: < + seconds: 1 + nanos: 1 + > + sample_count: 175 + sample_sum: 0.0008280461746287094 + schema: 3 + zero_threshold: 2.938735877055719e-39 + zero_count: 2 + negative_span: < + offset: -162 + length: 1 + > + negative_span: < + offset: 23 + length: 4 + > + negative_delta: 1 + negative_delta: 3 + negative_delta: -2 + negative_delta: -1 + negative_delta: 1 + positive_span: < + offset: -161 + length: 1 + > + positive_span: < + offset: 8 + length: 3 + > + positive_delta: 1 + positive_delta: 2 + positive_delta: -1 + positive_delta: -1 + > + timestamp_ms: 1234568 +> +`} + + varintBuf := make([]byte, binary.MaxVarintLen32) + buf := &bytes.Buffer{} + + for _, tmf := range testMetricFamilies { + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(tmf, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf))) + buf.Write(varintBuf[:varintLength]) + buf.Write(protoBuf) + } + + return buf +} + +func createTestOpenMetricsHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234.568 +test_histogram1_sum 0.0008280461746287094 1234.568 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234.568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234.568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234.568 +test_histogram1_bucket{le="+Inf"} 175 1234.568 +test_histogram1_created 1 +# EOF` +} + +func createTestPromHistogram() string { + return `# HELP test_histogram1 Test histogram 1 +# TYPE test_histogram1 histogram +test_histogram1_count 175 1234568 +test_histogram1_sum 0.0008280461746287094 1234768 +test_histogram1_bucket{le="-0.0004899999999999998"} 2 1234568 +test_histogram1_bucket{le="-0.0003899999999999998"} 4 1234568 +test_histogram1_bucket{le="-0.0002899999999999998"} 16 1234568 +test_histogram1_bucket{le="+Inf"} 175 1234568` +} + +func TestNHCBParserErrorHandling(t *testing.T) { + input := `# HELP something Histogram with non cumulative buckets +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 18 +something_bucket{le="+Inf"} 1 +something_count{a="b"} 9 +something_sum{a="b"} 42123 +something_created{a="b"} 1520430002 +something_bucket{a="b",le="0.0"} 1 +something_bucket{a="b",le="+Inf"} 9 +# EOF` + exp := []parsedEntry{ + { + m: "something", + help: "Histogram with non cumulative buckets", + }, + { + m: "something", + typ: model.MetricTypeHistogram, + }, + // The parser should skip the series with non-cumulative buckets. + { + m: `something{a="b"}`, + shs: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 9, + Sum: 42123.0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, + PositiveBuckets: []int64{1, 7}, + CustomValues: []float64{0.0}, // We do not store the +Inf boundary. + }, + lset: labels.FromStrings("__name__", "something", "a", "b"), + ct: int64p(1520430002000), + }, + } + + p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + p = NewNHCBParser(p, labels.NewSymbolTable(), false) + got := testParse(t, p) + requireEntries(t, exp, got) +} diff --git a/model/textparse/openmetricslex.l b/model/textparse/openmetricslex.l index 9afbbbd8bd..09106c52ce 100644 --- a/model/textparse/openmetricslex.l +++ b/model/textparse/openmetricslex.l @@ -69,6 +69,7 @@ S [ ] {S}#{S}\{ l.state = sExemplar; return tComment {L}({L}|{D})* return tLName +\"(\\.|[^\\"\n])*\" l.state = sExemplar; return tQString \} l.state = sEValue; return tBraceClose = l.state = sEValue; return tEqual \"(\\.|[^\\"\n])*\" l.state = sExemplar; return tLValue diff --git a/model/textparse/openmetricslex.l.go b/model/textparse/openmetricslex.l.go index c8789ef60d..c0b2fcdb4d 100644 --- a/model/textparse/openmetricslex.l.go +++ b/model/textparse/openmetricslex.l.go @@ -53,9 +53,9 @@ yystate0: case 8: // start condition: sExemplar goto yystart57 case 9: // start condition: sEValue - goto yystart62 + goto yystart65 case 10: // start condition: sETimestamp - goto yystart68 + goto yystart71 } yystate1: @@ -538,125 +538,153 @@ yystart57: switch { default: goto yyabort - case c == ',': + case c == '"': goto yystate58 - case c == '=': - goto yystate59 - case c == '}': + case c == ',': goto yystate61 + case c == '=': + goto yystate62 + case c == '}': + goto yystate64 case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate60 + goto yystate63 } yystate58: c = l.next() - goto yyrule26 + switch { + default: + goto yyabort + case c == '"': + goto yystate59 + case c == '\\': + goto yystate60 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate58 + } yystate59: c = l.next() - goto yyrule24 + goto yyrule23 yystate60: c = l.next() switch { default: - goto yyrule22 - case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': - goto yystate60 + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate58 } yystate61: c = l.next() - goto yyrule23 + goto yyrule27 yystate62: c = l.next() -yystart62: - switch { - default: - goto yyabort - case c == ' ': - goto yystate63 - case c == '"': - goto yystate65 - } + goto yyrule25 yystate63: c = l.next() switch { default: - goto yyabort - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate64 + goto yyrule22 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate63 } yystate64: c = l.next() - switch { - default: - goto yyrule27 - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate64 - } + goto yyrule24 yystate65: c = l.next() +yystart65: switch { default: goto yyabort - case c == '"': + case c == ' ': goto yystate66 - case c == '\\': - goto yystate67 - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': - goto yystate65 + case c == '"': + goto yystate68 } yystate66: c = l.next() - goto yyrule25 + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate67 + } yystate67: c = l.next() switch { default: - goto yyabort - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': - goto yystate65 + goto yyrule28 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate67 } yystate68: c = l.next() -yystart68: switch { default: goto yyabort - case c == ' ': - goto yystate70 - case c == '\n': + case c == '"': goto yystate69 + case c == '\\': + goto yystate70 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate68 } yystate69: c = l.next() - goto yyrule29 + goto yyrule26 yystate70: c = l.next() switch { default: goto yyabort - case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate71 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate68 } yystate71: c = l.next() +yystart71: switch { default: - goto yyrule28 + goto yyabort + case c == ' ': + goto yystate73 + case c == '\n': + goto yystate72 + } + +yystate72: + c = l.next() + goto yyrule30 + +yystate73: + c = l.next() + switch { + default: + goto yyabort case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': - goto yystate71 + goto yystate74 + } + +yystate74: + c = l.next() + switch { + default: + goto yyrule29 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate74 } yyrule1: // #{S} @@ -782,39 +810,45 @@ yyrule22: // {L}({L}|{D})* { return tLName } -yyrule23: // \} +yyrule23: // \"(\\.|[^\\"\n])*\" + { + l.state = sExemplar + return tQString + goto yystate0 + } +yyrule24: // \} { l.state = sEValue return tBraceClose goto yystate0 } -yyrule24: // = +yyrule25: // = { l.state = sEValue return tEqual goto yystate0 } -yyrule25: // \"(\\.|[^\\"\n])*\" +yyrule26: // \"(\\.|[^\\"\n])*\" { l.state = sExemplar return tLValue goto yystate0 } -yyrule26: // , +yyrule27: // , { return tComma } -yyrule27: // {S}[^ \n]+ +yyrule28: // {S}[^ \n]+ { l.state = sETimestamp return tValue goto yystate0 } -yyrule28: // {S}[^ \n]+ +yyrule29: // {S}[^ \n]+ { return tTimestamp } -yyrule29: // \n +yyrule30: // \n if true { // avoid go vet determining the below panic will not be reached l.state = sInit return tLinebreak @@ -859,10 +893,10 @@ yyabort: // no lexem recognized goto yystate57 } if false { - goto yystate62 + goto yystate65 } if false { - goto yystate68 + goto yystate71 } } diff --git a/model/textparse/openmetricsparse.go b/model/textparse/openmetricsparse.go index 5f0415d3ee..16e805f3a9 100644 --- a/model/textparse/openmetricsparse.go +++ b/model/textparse/openmetricsparse.go @@ -17,13 +17,16 @@ package textparse import ( + "bytes" "errors" "fmt" "io" "math" + "strconv" "strings" "unicode/utf8" + "github.com/cespare/xxhash/v2" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -72,15 +75,16 @@ func (l *openMetricsLexer) Error(es string) { // OpenMetrics text exposition format. // This is based on the working draft https://docs.google.com/document/u/1/d/1KwV0mAXwwbvvifBvDKH_LU1YjyXE_wxCkHNoCGq1GX0/edit type OpenMetricsParser struct { - l *openMetricsLexer - builder labels.ScratchBuilder - series []byte - text []byte - mtype model.MetricType - val float64 - ts int64 - hasTS bool - start int + l *openMetricsLexer + builder labels.ScratchBuilder + series []byte + mfNameLen int // length of metric family name to get from series. + text []byte + mtype model.MetricType + val float64 + ts int64 + hasTS bool + start int // offsets is a list of offsets into series that describe the positions // of the metric name and label names and values for this series. // p.offsets[0] is the start character of the metric name. @@ -95,7 +99,15 @@ type OpenMetricsParser struct { exemplarTs int64 hasExemplarTs bool - skipCTSeries bool + // Created timestamp parsing state. + ct int64 + ctHashSet uint64 + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool + // visitedMFName is the metric family name of the last visited metric when peeking ahead + // for _created series during the execution of the CreatedTimestamp method. + visitedMFName []byte + skipCTSeries bool } type openMetricsParserOptions struct { @@ -201,7 +213,7 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) p.builder.Add(label, value) } @@ -252,87 +264,144 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool { // CreatedTimestamp returns the created timestamp for a current Metric if exists or nil. // NOTE(Maniktherana): Might use additional CPU/mem resources due to deep copy of parser required for peeking given 1.0 OM specification on _created series. func (p *OpenMetricsParser) CreatedTimestamp() *int64 { - if !TypeRequiresCT(p.mtype) { + if !typeRequiresCT(p.mtype) { // Not a CT supported metric type, fast path. + p.ctHashSet = 0 // Use ctHashSet as a single way of telling "empty cache" return nil } var ( - currLset labels.Labels - buf []byte - peekWithoutNameLsetHash uint64 + buf []byte + currName []byte ) - p.Metric(&currLset) - currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - // Search for the _created line for the currFamilyLsetHash using ephemeral parser until - // we see EOF or new metric family. We have to do it as we don't know where (and if) - // that CT line is. - // TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - peek := deepCopy(p) - for { - eType, err := peek.Next() - if err != nil { - // This means peek will give error too later on, so def no CT line found. - // This might result in partial scrape with wrong/missing CT, but only - // spec improvement would help. - // TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. - return nil - } - if eType != EntrySeries { - // Assume we hit different family, no CT line found. - return nil - } - - var peekedLset labels.Labels - peek.Metric(&peekedLset) - peekedName := peekedLset.Get(model.MetricNameLabel) - if !strings.HasSuffix(peekedName, "_created") { - // Not a CT line, search more. - continue - } - - // We got a CT line here, but let's search if CT line is actually for our series, edge case. - peekWithoutNameLsetHash, _ = peekedLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile") - if peekWithoutNameLsetHash != currFamilyLsetHash { - // CT line for a different series, for our series no CT. - return nil - } - ct := int64(peek.val) - return &ct + if len(p.series) > 1 && p.series[0] == '{' && p.series[1] == '"' { + // special case for UTF-8 encoded metric family names. + currName = p.series[p.offsets[0]-p.start : p.mfNameLen+2] + } else { + currName = p.series[p.offsets[0]-p.start : p.mfNameLen] } -} -// TypeRequiresCT returns true if the metric type requires a _created timestamp. -func TypeRequiresCT(t model.MetricType) bool { - switch t { - case model.MetricTypeCounter, model.MetricTypeSummary, model.MetricTypeHistogram: - return true - default: - return false + currHash := p.seriesHash(&buf, currName) + // Check cache, perhaps we fetched something already. + if currHash == p.ctHashSet && p.ct > 0 { + return &p.ct } -} -// deepCopy creates a copy of a parser without re-using the slices' original memory addresses. -func deepCopy(p *OpenMetricsParser) OpenMetricsParser { - newB := make([]byte, len(p.l.b)) - copy(newB, p.l.b) - - newLexer := &openMetricsLexer{ - b: newB, + // Create a new lexer to reset the parser once this function is done executing. + resetLexer := &openMetricsLexer{ + b: p.l.b, i: p.l.i, start: p.l.start, err: p.l.err, state: p.l.state, } - newParser := OpenMetricsParser{ - l: newLexer, - builder: p.builder, - mtype: p.mtype, - val: p.val, - skipCTSeries: false, + p.skipCTSeries = false + + p.ignoreExemplar = true + savedStart := p.start + defer func() { + p.ignoreExemplar = false + p.start = savedStart + p.l = resetLexer + }() + + for { + eType, err := p.Next() + if err != nil { + // This means p.Next() will give error too later on, so def no CT line found. + // This might result in partial scrape with wrong/missing CT, but only + // spec improvement would help. + // TODO: Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. + p.resetCTParseValues() + return nil + } + if eType != EntrySeries { + // Assume we hit different family, no CT line found. + p.resetCTParseValues() + return nil + } + + peekedName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + if len(peekedName) < 8 || string(peekedName[len(peekedName)-8:]) != "_created" { + // Not a CT line, search more. + continue + } + + // Remove _created suffix. + peekedHash := p.seriesHash(&buf, peekedName[:len(peekedName)-8]) + if peekedHash != currHash { + // Found CT line for a different series, for our series no CT. + p.resetCTParseValues() + return nil + } + + // All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds. + // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps + ct := int64(p.val * 1000.0) + p.setCTParseValues(ct, currHash, currName, true) + return &ct + } +} + +var ( + leBytes = []byte{108, 101} + quantileBytes = []byte{113, 117, 97, 110, 116, 105, 108, 101} +) + +// seriesHash generates a hash based on the metric family name and the offsets +// of label names and values from the parsed OpenMetrics data. It skips quantile +// and le labels for summaries and histograms respectively. +func (p *OpenMetricsParser) seriesHash(offsetsArr *[]byte, metricFamilyName []byte) uint64 { + // Iterate through p.offsets to find the label names and values. + for i := 2; i < len(p.offsets); i += 4 { + lStart := p.offsets[i] - p.start + lEnd := p.offsets[i+1] - p.start + label := p.series[lStart:lEnd] + // Skip quantile and le labels for summaries and histograms. + if p.mtype == model.MetricTypeSummary && bytes.Equal(label, quantileBytes) { + continue + } + if p.mtype == model.MetricTypeHistogram && bytes.Equal(label, leBytes) { + continue + } + *offsetsArr = append(*offsetsArr, p.series[lStart:lEnd]...) + vStart := p.offsets[i+2] - p.start + vEnd := p.offsets[i+3] - p.start + *offsetsArr = append(*offsetsArr, p.series[vStart:vEnd]...) + } + + *offsetsArr = append(*offsetsArr, metricFamilyName...) + hashedOffsets := xxhash.Sum64(*offsetsArr) + + // Reset the offsets array for later reuse. + *offsetsArr = (*offsetsArr)[:0] + return hashedOffsets +} + +// setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found. +// This is useful to prevent re-parsing the same series again and early return the CT value. +func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, mfName []byte, skipCTSeries bool) { + p.ct = ct + p.ctHashSet = ctHashSet + p.visitedMFName = mfName + p.skipCTSeries = skipCTSeries // Do we need to set it? +} + +// resetCTParseValues resets the parser to the state before CreatedTimestamp method was called. +func (p *OpenMetricsParser) resetCTParseValues() { + p.ctHashSet = 0 + p.skipCTSeries = true +} + +// typeRequiresCT returns true if the metric type requires a _created timestamp. +func typeRequiresCT(t model.MetricType) bool { + switch t { + case model.MetricTypeCounter, model.MetricTypeSummary, model.MetricTypeHistogram: + return true + default: + return false } - return newParser } // nextToken returns the next token from the openMetricsLexer. @@ -356,10 +425,12 @@ func (p *OpenMetricsParser) Next() (Entry, error) { p.start = p.l.i p.offsets = p.offsets[:0] - p.eOffsets = p.eOffsets[:0] - p.exemplar = p.exemplar[:0] - p.exemplarVal = 0 - p.hasExemplarTs = false + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } switch t := p.nextToken(); t { case tEOFWord: @@ -378,6 +449,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) { mStart++ mEnd-- } + p.mfNameLen = mEnd - mStart p.offsets = append(p.offsets, mStart, mEnd) default: return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) @@ -483,6 +555,16 @@ func (p *OpenMetricsParser) Next() (Entry, error) { func (p *OpenMetricsParser) parseComment() error { var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + // Parse the labels. p.eOffsets, err = p.parseLVals(p.eOffsets, true) if err != nil { @@ -591,10 +673,9 @@ func (p *OpenMetricsParser) parseLVals(offsets []int, isExemplar bool) ([]int, e // isCreatedSeries returns true if the current series is a _created series. func (p *OpenMetricsParser) isCreatedSeries() bool { - var newLbs labels.Labels - p.Metric(&newLbs) - name := newLbs.Get(model.MetricNameLabel) - if TypeRequiresCT(p.mtype) && strings.HasSuffix(name, "_created") { + metricName := p.series[p.offsets[0]-p.start : p.offsets[1]-p.start] + // check length so the metric is longer than len("_created") + if typeRequiresCT(p.mtype) && len(metricName) >= 8 && string(metricName[len(metricName)-8:]) == "_created" { return true } return false @@ -663,3 +744,15 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error } return val, nil } + +// normalizeFloatsInLabelValues ensures that values of the "le" labels of classic histograms and "quantile" labels +// of summaries follow OpenMetrics formatting rules. +func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string { + if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) { + f, err := strconv.ParseFloat(v, 64) + if err == nil { + return formatOpenMetricsFloat(f) + } + } + return v +} diff --git a/model/textparse/openmetricsparse_test.go b/model/textparse/openmetricsparse_test.go index cadaabc99f..a09c56a7ba 100644 --- a/model/textparse/openmetricsparse_test.go +++ b/model/textparse/openmetricsparse_test.go @@ -14,7 +14,7 @@ package textparse import ( - "errors" + "fmt" "io" "testing" @@ -69,32 +69,57 @@ testmetric{label="\"bar\""} 1 # HELP foo Counter with and without labels to certify CT is parsed for both cases # TYPE foo counter foo_total 17.0 1520879607.789 # {id="counter-test"} 5 -foo_created 1000 +foo_created 1520872607.123 foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5 -foo_created{a="b"} 1000 +foo_created{a="b"} 1520872607.123 +foo_total{le="c"} 21.0 +foo_created{le="c"} 1520872621.123 +foo_total{le="1"} 10.0 # HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far # TYPE bar summary bar_count 17.0 bar_sum 324789.3 bar{quantile="0.95"} 123.7 bar{quantile="0.99"} 150.0 -bar_created 1520430000 +bar_created 1520872608.124 # HELP baz Histogram with the same objective as above's summary # TYPE baz histogram baz_bucket{le="0.0"} 0 baz_bucket{le="+Inf"} 17 baz_count 17 baz_sum 324789.3 -baz_created 1520430000 +baz_created 1520872609.125 # HELP fizz_created Gauge which shouldn't be parsed as CT # TYPE fizz_created gauge -fizz_created 17.0` +fizz_created 17.0 +# HELP something Histogram with _created between buckets and summary +# TYPE something histogram +something_count 18 +something_sum 324789.4 +something_created 1520430001 +something_bucket{le="0.0"} 1 +something_bucket{le="1"} 2 +something_bucket{le="+Inf"} 18 +# HELP yum Summary with _created between sum and quantiles +# TYPE yum summary +yum_count 20 +yum_sum 324789.5 +yum_created 1520430003 +yum{quantile="0.95"} 123.7 +yum{quantile="0.99"} 150.0 +# HELP foobar Summary with _created as the first line +# TYPE foobar summary +foobar_count 21 +foobar_created 1520430004 +foobar_sum 324789.6 +foobar{quantile="0.95"} 123.8 +foobar{quantile="0.99"} 150.1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" input += "\n# EOF\n" - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go_gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -107,7 +132,7 @@ fizz_created 17.0` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25"}`, v: 7.424100000000001e-05, @@ -169,12 +194,16 @@ fizz_created 17.0` m: `hhh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "hhh_bucket", "le", "+Inf"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-bucket-test"), Value: 4}, + }, }, { m: `hhh_count`, v: 1, lset: labels.FromStrings("__name__", "hhh_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "histogram-count-test"), Value: 4}, + }, }, { m: "ggh", typ: model.MetricTypeGaugeHistogram, @@ -182,12 +211,16 @@ fizz_created 17.0` m: `ggh_bucket{le="+Inf"}`, v: 1, lset: labels.FromStrings("__name__", "ggh_bucket", "le", "+Inf"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "gaugehistogram-bucket-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + }, }, { m: `ggh_count`, v: 1, lset: labels.FromStrings("__name__", "ggh_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "gaugehistogram-count-test", "xx", "yy"), Value: 4, HasTs: true, Ts: 123123}, + }, }, { m: "smr_seconds", typ: model.MetricTypeSummary, @@ -195,12 +228,16 @@ fizz_created 17.0` m: `smr_seconds_count`, v: 2, lset: labels.FromStrings("__name__", "smr_seconds_count"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "summary-count-test"), Value: 1, HasTs: true, Ts: 123321}, + }, }, { m: `smr_seconds_sum`, v: 42, lset: labels.FromStrings("__name__", "smr_seconds_sum"), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "summary-sum-test"), Value: 1, HasTs: true, Ts: 123321}, + }, }, { m: "ii", typ: model.MetricTypeInfo, @@ -249,15 +286,28 @@ fizz_created 17.0` v: 17, lset: labels.FromStrings("__name__", "foo_total"), t: int64p(1520879607789), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5}, - ct: int64p(1000), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, + }, + ct: int64p(1520872607123), }, { m: `foo_total{a="b"}`, v: 17.0, lset: labels.FromStrings("__name__", "foo_total", "a", "b"), t: int64p(1520879607789), - e: &exemplar.Exemplar{Labels: labels.FromStrings("id", "counter-test"), Value: 5}, - ct: int64p(1000), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id", "counter-test"), Value: 5}, + }, + ct: int64p(1520872607123), + }, { + m: `foo_total{le="c"}`, + v: 21.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "c"), + ct: int64p(1520872621123), + }, { + m: `foo_total{le="1"}`, + v: 10.0, + lset: labels.FromStrings("__name__", "foo_total", "le", "1"), }, { m: "bar", help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far", @@ -268,22 +318,22 @@ fizz_created 17.0` m: "bar_count", v: 17.0, lset: labels.FromStrings("__name__", "bar_count"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: "bar_sum", v: 324789.3, lset: labels.FromStrings("__name__", "bar_sum"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.95"}`, v: 123.7, lset: labels.FromStrings("__name__", "bar", "quantile", "0.95"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: `bar{quantile="0.99"}`, v: 150.0, lset: labels.FromStrings("__name__", "bar", "quantile", "0.99"), - ct: int64p(1520430000), + ct: int64p(1520872608124), }, { m: "baz", help: "Histogram with the same objective as above's summary", @@ -294,22 +344,22 @@ fizz_created 17.0` m: `baz_bucket{le="0.0"}`, v: 0, lset: labels.FromStrings("__name__", "baz_bucket", "le", "0.0"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_bucket{le="+Inf"}`, v: 17, lset: labels.FromStrings("__name__", "baz_bucket", "le", "+Inf"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_count`, v: 17, lset: labels.FromStrings("__name__", "baz_count"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: `baz_sum`, v: 324789.3, lset: labels.FromStrings("__name__", "baz_sum"), - ct: int64p(1520430000), + ct: int64p(1520872609125), }, { m: "fizz_created", help: "Gauge which shouldn't be parsed as CT", @@ -320,6 +370,89 @@ fizz_created 17.0` m: `fizz_created`, v: 17, lset: labels.FromStrings("__name__", "fizz_created"), + }, { + m: "something", + help: "Histogram with _created between buckets and summary", + }, { + m: "something", + typ: model.MetricTypeHistogram, + }, { + m: `something_count`, + v: 18, + lset: labels.FromStrings("__name__", "something_count"), + ct: int64p(1520430001000), + }, { + m: `something_sum`, + v: 324789.4, + lset: labels.FromStrings("__name__", "something_sum"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="0.0"}`, + v: 1, + lset: labels.FromStrings("__name__", "something_bucket", "le", "0.0"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="1"}`, + v: 2, + lset: labels.FromStrings("__name__", "something_bucket", "le", "1.0"), + ct: int64p(1520430001000), + }, { + m: `something_bucket{le="+Inf"}`, + v: 18, + lset: labels.FromStrings("__name__", "something_bucket", "le", "+Inf"), + ct: int64p(1520430001000), + }, { + m: "yum", + help: "Summary with _created between sum and quantiles", + }, { + m: "yum", + typ: model.MetricTypeSummary, + }, { + m: `yum_count`, + v: 20, + lset: labels.FromStrings("__name__", "yum_count"), + ct: int64p(1520430003000), + }, { + m: `yum_sum`, + v: 324789.5, + lset: labels.FromStrings("__name__", "yum_sum"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.95"}`, + v: 123.7, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.95"), + ct: int64p(1520430003000), + }, { + m: `yum{quantile="0.99"}`, + v: 150.0, + lset: labels.FromStrings("__name__", "yum", "quantile", "0.99"), + ct: int64p(1520430003000), + }, { + m: "foobar", + help: "Summary with _created as the first line", + }, { + m: "foobar", + typ: model.MetricTypeSummary, + }, { + m: `foobar_count`, + v: 21, + lset: labels.FromStrings("__name__", "foobar_count"), + ct: int64p(1520430004000), + }, { + m: `foobar_sum`, + v: 324789.6, + lset: labels.FromStrings("__name__", "foobar_sum"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.95"}`, + v: 123.8, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.95"), + ct: int64p(1520430004000), + }, { + m: `foobar{quantile="0.99"}`, + v: 150.1, + lset: labels.FromStrings("__name__", "foobar", "quantile", "0.99"), + ct: int64p(1520430004000), }, { m: "metric", help: "foo\x00bar", @@ -331,7 +464,8 @@ fizz_created 17.0` } p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - checkParseResultsWithCT(t, p, exp, true) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestUTF8OpenMetricsParse(t *testing.T) { @@ -346,17 +480,20 @@ func TestUTF8OpenMetricsParse(t *testing.T) { # UNIT "go.gc_duration_seconds" seconds {"go.gc_duration_seconds",quantile="0"} 4.9351e-05 {"go.gc_duration_seconds",quantile="0.25"} 7.424100000000001e-05 -{"go.gc_duration_seconds_created"} 12313 +{"go.gc_duration_seconds_created"} 1520872607.123 {"go.gc_duration_seconds",quantile="0.5",a="b"} 8.3835e-05 {"http.status",q="0.9",a="b"} 8.3835e-05 {"http.status",q="0.9",a="b"} 8.3835e-05 {q="0.9","http.status",a="b"} 8.3835e-05 {"go.gc_duration_seconds_sum"} 0.004304266 -{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0` +{"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0 +quotedexemplar_count 1 # {"id.thing"="histogram-count-test"} 4 +quotedexemplar2_count 1 # {"id.thing"="histogram-count-test",other="hello"} 4 +` - input += "\n# EOF\n" + input += "# EOF\n" - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go.gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -369,13 +506,13 @@ func TestUTF8OpenMetricsParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), - ct: int64p(12313), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), + ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.25"}`, v: 7.424100000000001e-05, lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.25"), - ct: int64p(12313), + ct: int64p(1520872607123), }, { m: `{"go.gc_duration_seconds",quantile="0.5",a="b"}`, v: 8.3835e-05, @@ -401,11 +538,26 @@ func TestUTF8OpenMetricsParse(t *testing.T) { v: 10.0, lset: labels.FromStrings("__name__", `Heizölrückstoßabdämpfung 10€ metric with "interesting" {character choices}`, "strange©™\n'quoted' \"name\"", "6"), + }, { + m: `quotedexemplar_count`, + v: 1, + lset: labels.FromStrings("__name__", "quotedexemplar_count"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id.thing", "histogram-count-test"), Value: 4}, + }, + }, { + m: `quotedexemplar2_count`, + v: 1, + lset: labels.FromStrings("__name__", "quotedexemplar2_count"), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("id.thing", "histogram-count-test", "other", "hello"), Value: 4}, + }, }, } p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - checkParseResultsWithCT(t, p, exp, true) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestOpenMetricsParseErrors(t *testing.T) { @@ -699,7 +851,7 @@ func TestOpenMetricsParseErrors(t *testing.T) { } for i, c := range cases { - p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable()) + p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) var err error for err == nil { _, err = p.Next() @@ -764,231 +916,121 @@ func TestOMNullByteHandling(t *testing.T) { } for i, c := range cases { - p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable()) + p := NewOpenMetricsParser([]byte(c.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) var err error for err == nil { _, err = p.Next() } if c.err == "" { - require.Equal(t, io.EOF, err, "test %d", i) + require.ErrorIs(t, err, io.EOF, "test %d", i) continue } - require.Equal(t, c.err, err.Error(), "test %d", i) + require.EqualError(t, err, c.err, "test %d", i) } } -// While not desirable, there are cases were CT fails to parse and -// these tests show them. +// TestCTParseFailures tests known failure edge cases, we know does not work due +// current OM spec limitations or clients with broken OM format. // TODO(maniktherana): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this. func TestCTParseFailures(t *testing.T) { - input := `# HELP something Histogram with _created between buckets and summary -# TYPE something histogram -something_count 17 -something_sum 324789.3 -something_created 1520430001 -something_bucket{le="0.0"} 0 -something_bucket{le="+Inf"} 17 -# HELP thing Histogram with _created as first line + for _, tcase := range []struct { + name string + input string + expected []parsedEntry + }{ + { + name: "_created line is a first one", + input: `# HELP thing histogram with _created as first line # TYPE thing histogram -thing_created 1520430002 +thing_created 1520872607.123 thing_count 17 thing_sum 324789.3 thing_bucket{le="0.0"} 0 thing_bucket{le="+Inf"} 17 -# HELP yum Summary with _created between sum and quantiles -# TYPE yum summary -yum_count 17.0 -yum_sum 324789.3 -yum_created 1520430003 -yum{quantile="0.95"} 123.7 -yum{quantile="0.99"} 150.0 -# HELP foobar Summary with _created as the first line -# TYPE foobar summary -foobar_created 1520430004 -foobar_count 17.0 -foobar_sum 324789.3 -foobar{quantile="0.95"} 123.7 -foobar{quantile="0.99"} 150.0` - - input += "\n# EOF\n" - - int64p := func(x int64) *int64 { return &x } - - type expectCT struct { - m string - ct *int64 - typ model.MetricType - help string - isErr bool - } - - exp := []expectCT{ - { - m: "something", - help: "Histogram with _created between buckets and summary", - isErr: false, - }, { - m: "something", - typ: model.MetricTypeHistogram, - isErr: false, - }, { - m: `something_count`, - ct: int64p(1520430001), - isErr: false, - }, { - m: `something_sum`, - ct: int64p(1520430001), - isErr: false, - }, { - m: `something_bucket{le="0.0"}`, - ct: int64p(1520430001), - isErr: true, - }, { - m: `something_bucket{le="+Inf"}`, - ct: int64p(1520430001), - isErr: true, - }, { - m: "thing", - help: "Histogram with _created as first line", - isErr: false, - }, { - m: "thing", - typ: model.MetricTypeHistogram, - isErr: false, - }, { - m: `thing_count`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_sum`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_bucket{le="0.0"}`, - ct: int64p(1520430002), - isErr: true, - }, { - m: `thing_bucket{le="+Inf"}`, - ct: int64p(1520430002), - isErr: true, - }, { - m: "yum", - help: "Summary with _created between summary and quantiles", - isErr: false, - }, { - m: "yum", - typ: model.MetricTypeSummary, - isErr: false, - }, { - m: "yum_count", - ct: int64p(1520430003), - isErr: false, - }, { - m: "yum_sum", - ct: int64p(1520430003), - isErr: false, - }, { - m: `yum{quantile="0.95"}`, - ct: int64p(1520430003), - isErr: true, - }, { - m: `yum{quantile="0.99"}`, - ct: int64p(1520430003), - isErr: true, - }, { - m: "foobar", - help: "Summary with _created as the first line", - isErr: false, - }, { - m: "foobar", - typ: model.MetricTypeSummary, - isErr: false, - }, { - m: "foobar_count", - ct: int64p(1520430004), - isErr: true, - }, { - m: "foobar_sum", - ct: int64p(1520430004), - isErr: true, - }, { - m: `foobar{quantile="0.95"}`, - ct: int64p(1520430004), - isErr: true, - }, { - m: `foobar{quantile="0.99"}`, - ct: int64p(1520430004), - isErr: true, +# HELP thing_c counter with _created as first line +# TYPE thing_c counter +thing_c_created 1520872607.123 +thing_c_total 14123.232 +# EOF +`, + expected: []parsedEntry{ + { + m: "thing", + help: "histogram with _created as first line", + }, + { + m: "thing", + typ: model.MetricTypeHistogram, + }, + { + m: `thing_count`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_sum`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="0.0"}`, + ct: nil, // Should be int64p(1520872607123). + }, + { + m: `thing_bucket{le="+Inf"}`, + ct: nil, // Should be int64p(1520872607123), + }, + { + m: "thing_c", + help: "counter with _created as first line", + }, + { + m: "thing_c", + typ: model.MetricTypeCounter, + }, + { + m: `thing_c_total`, + ct: nil, // Should be int64p(1520872607123). + }, + }, }, - } - - p := NewOpenMetricsParser([]byte(input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) - i := 0 - - var res labels.Labels - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - p.Metric(&res) - - if ct := p.CreatedTimestamp(); exp[i].isErr { - require.Nil(t, ct) - } else { - require.Equal(t, *exp[i].ct, *ct) - } - default: - i++ - continue - } - i++ + { + // TODO(bwplotka): Kind of correct bevaviour? If yes, let's move to the OK tests above. + name: "maybe counter with no meta", + input: `foo_total 17.0 +foo_created 1520872607.123 +foo_total{a="b"} 17.0 +foo_created{a="b"} 1520872608.123 +# EOF +`, + expected: []parsedEntry{ + { + m: `foo_total`, + }, + { + m: `foo_created`, + }, + { + m: `foo_total{a="b"}`, + }, + { + m: `foo_created{a="b"}`, + }, + }, + }, + } { + t.Run(fmt.Sprintf("case=%v", tcase.name), func(t *testing.T) { + p := NewOpenMetricsParser([]byte(tcase.input), labels.NewSymbolTable(), WithOMParserCTSeriesSkipped()) + got := testParse(t, p) + resetValAndLset(got) // Keep this test focused on metric, basic entries and CT only. + requireEntries(t, tcase.expected, got) + }) } } -func TestDeepCopy(t *testing.T) { - input := []byte(`# HELP go_goroutines A gauge goroutines. -# TYPE go_goroutines gauge -go_goroutines 33 123.123 -# TYPE go_gc_duration_seconds summary -go_gc_duration_seconds -go_gc_duration_seconds_created`) - - st := labels.NewSymbolTable() - parser := NewOpenMetricsParser(input, st, WithOMParserCTSeriesSkipped()).(*OpenMetricsParser) - - // Modify the original parser state - _, err := parser.Next() - require.NoError(t, err) - require.Equal(t, "go_goroutines", string(parser.l.b[parser.offsets[0]:parser.offsets[1]])) - require.True(t, parser.skipCTSeries) - - // Create a deep copy of the parser - copyParser := deepCopy(parser) - etype, err := copyParser.Next() - require.NoError(t, err) - require.Equal(t, EntryType, etype) - require.True(t, parser.skipCTSeries) - require.False(t, copyParser.skipCTSeries) - - // Modify the original parser further - parser.Next() - parser.Next() - parser.Next() - require.Equal(t, "go_gc_duration_seconds", string(parser.l.b[parser.offsets[0]:parser.offsets[1]])) - require.Equal(t, "summary", string(parser.mtype)) - require.False(t, copyParser.skipCTSeries) - require.True(t, parser.skipCTSeries) - - // Ensure the copy remains unchanged - copyParser.Next() - copyParser.Next() - require.Equal(t, "go_gc_duration_seconds", string(copyParser.l.b[copyParser.offsets[0]:copyParser.offsets[1]])) - require.False(t, copyParser.skipCTSeries) +func resetValAndLset(e []parsedEntry) { + for i := range e { + e[i].v = 0 + e[i].lset = labels.EmptyLabels() + } } diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index a611f3aea7..17b0c3db8b 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -239,7 +239,8 @@ func (p *PromParser) Metric(l *labels.Labels) string { label := unreplace(s[a:b]) c := p.offsets[i+2] - p.start d := p.offsets[i+3] - p.start - value := unreplace(s[c:d]) + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) } @@ -502,13 +503,13 @@ func unreplace(s string) string { } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } func parseFloat(s string) (float64, error) { // Keep to pre-Go 1.13 float formats. if strings.ContainsAny(s, "pP_") { - return 0, fmt.Errorf("unsupported character in float") + return 0, errors.New("unsupported character in float") } return strconv.ParseFloat(s, 64) } diff --git a/model/textparse/promparse_test.go b/model/textparse/promparse_test.go index 7971d23b7e..e8cf66f539 100644 --- a/model/textparse/promparse_test.go +++ b/model/textparse/promparse_test.go @@ -14,37 +14,15 @@ package textparse import ( - "bytes" - "errors" "io" - "os" - "strings" "testing" - "github.com/klauspost/compress/gzip" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/prometheus/common/expfmt" - "github.com/prometheus/common/model" - - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" ) -type expectedParse struct { - lset labels.Labels - m string - t *int64 - v float64 - typ model.MetricType - help string - unit string - comment string - e *exemplar.Exemplar - ct *int64 -} - func TestPromParse(t *testing.T) { input := `# HELP go_gc_duration_seconds A summary of the GC invocation durations. # TYPE go_gc_duration_seconds summary @@ -53,6 +31,13 @@ go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05 go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05 go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05 go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05 +# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests. +# TYPE prometheus_http_request_duration_seconds histogram +prometheus_http_request_duration_seconds_bucket{handler="/",le="1"} 423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="2"} 1423 +prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"} 1423 +prometheus_http_request_duration_seconds_sum{handler="/"} 2000 +prometheus_http_request_duration_seconds_count{handler="/"} 1423 # Hrandom comment starting with prefix of HELP # wind_speed{A="2",c="3"} 12345 @@ -72,13 +57,12 @@ some:aggregate:rate5m{a_b="c"} 1 go_goroutines 33 123123 _metric_starting_with_underscore 1 testmetric{_label_starting_with_underscore="foo"} 1 -testmetric{label="\"bar\""} 1` +testmetric{label="\"bar\""} 1 +testmetric{le="10"} 1` input += "\n# HELP metric foo\x00bar" input += "\nnull_byte_metric{a=\"abc\x00\"} 1" - int64p := func(x int64) *int64 { return &x } - - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go_gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -88,7 +72,7 @@ testmetric{label="\"bar\""} 1` }, { m: `go_gc_duration_seconds{quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"), }, { m: `go_gc_duration_seconds{quantile="0.25",}`, v: 7.424100000000001e-05, @@ -105,6 +89,32 @@ testmetric{label="\"bar\""} 1` m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"), + }, { + m: "prometheus_http_request_duration_seconds", + help: "Histogram of latencies for HTTP requests.", + }, { + m: "prometheus_http_request_duration_seconds", + typ: model.MetricTypeHistogram, + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="1"}`, + v: 423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "1.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="2"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "2.0"), + }, { + m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "+Inf"), + }, { + m: `prometheus_http_request_duration_seconds_sum{handler="/"}`, + v: 2000, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_sum", "handler", "/"), + }, { + m: `prometheus_http_request_duration_seconds_count{handler="/"}`, + v: 1423, + lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_count", "handler", "/"), }, { comment: "# Hrandom comment starting with prefix of HELP", }, { @@ -140,7 +150,7 @@ testmetric{label="\"bar\""} 1` v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "1.0", "a", "b"), }, { - // NOTE: Unlike OpenMetrics, Promparse allows spaces between label terms. This appears to be unintended and should probably be fixed. + // NOTE: Unlike OpenMetrics, PromParser allows spaces between label terms. This appears to be unintended and should probably be fixed. m: `go_gc_duration_seconds { quantile = "2.0" a = "b" }`, v: 8.3835e-05, lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "2.0", "a", "b"), @@ -175,6 +185,10 @@ testmetric{label="\"bar\""} 1` m: "testmetric{label=\"\\\"bar\\\"\"}", v: 1, lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`), + }, { + m: `testmetric{le="10"}`, + v: 1, + lset: labels.FromStrings("__name__", "testmetric", "le", "10"), }, { m: "metric", help: "foo\x00bar", @@ -186,80 +200,8 @@ testmetric{label="\"bar\""} 1` } p := NewPromParser([]byte(input), labels.NewSymbolTable()) - checkParseResults(t, p, exp) -} - -func checkParseResults(t *testing.T, p Parser, exp []expectedParse) { - checkParseResultsWithCT(t, p, exp, false) -} - -func checkParseResultsWithCT(t *testing.T, p Parser, exp []expectedParse, ctLinesRemoved bool) { - i := 0 - - var res labels.Labels - - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - m, ts, v := p.Series() - - p.Metric(&res) - - if ctLinesRemoved { - // Are CT series skipped? - _, typ := p.Type() - if TypeRequiresCT(typ) && strings.HasSuffix(res.Get(labels.MetricName), "_created") { - t.Fatalf("we exped created lines skipped") - } - } - - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].t, ts) - require.Equal(t, exp[i].v, v) - testutil.RequireEqual(t, exp[i].lset, res) - - var e exemplar.Exemplar - found := p.Exemplar(&e) - if exp[i].e == nil { - require.False(t, found) - } else { - require.True(t, found) - testutil.RequireEqual(t, *exp[i].e, e) - } - if ct := p.CreatedTimestamp(); ct != nil { - require.Equal(t, *exp[i].ct, *ct) - } else { - require.Nil(t, exp[i].ct) - } - - case EntryType: - m, typ := p.Type() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].typ, typ) - - case EntryHelp: - m, h := p.Help() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].help, string(h)) - - case EntryUnit: - m, u := p.Unit() - require.Equal(t, exp[i].m, string(m)) - require.Equal(t, exp[i].unit, string(u)) - - case EntryComment: - require.Equal(t, exp[i].comment, string(p.Comment())) - } - - i++ - } - require.Len(t, exp, i) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestUTF8PromParse(t *testing.T) { @@ -283,7 +225,7 @@ func TestUTF8PromParse(t *testing.T) { {"go.gc_duration_seconds_count"} 99 {"Heizölrückstoßabdämpfung 10€ metric with \"interesting\" {character\nchoices}","strange©™\n'quoted' \"name\""="6"} 10.0` - exp := []expectedParse{ + exp := []parsedEntry{ { m: "go.gc_duration_seconds", help: "A summary of the GC invocation durations.", @@ -293,7 +235,7 @@ func TestUTF8PromParse(t *testing.T) { }, { m: `{"go.gc_duration_seconds",quantile="0"}`, v: 4.9351e-05, - lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"), + lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"), }, { m: `{"go.gc_duration_seconds",quantile="0.25",}`, v: 7.424100000000001e-05, @@ -339,7 +281,8 @@ choices}`, "strange©™\n'quoted' \"name\"", "6"), } p := NewPromParser([]byte(input), labels.NewSymbolTable()) - checkParseResults(t, p, exp) + got := testParse(t, p) + requireEntries(t, exp, got) } func TestPromParseErrors(t *testing.T) { @@ -423,8 +366,7 @@ func TestPromParseErrors(t *testing.T) { for err == nil { _, err = p.Next() } - require.Error(t, err) - require.Equal(t, c.err, err.Error(), "test %d", i) + require.EqualError(t, err, c.err, "test %d", i) } } @@ -483,194 +425,6 @@ func TestPromNullByteHandling(t *testing.T) { continue } - require.Error(t, err) - require.Equal(t, c.err, err.Error(), "test %d", i) - } -} - -const ( - promtestdataSampleCount = 410 -) - -func BenchmarkParse(b *testing.B) { - for parserName, parser := range map[string]func([]byte, *labels.SymbolTable) Parser{ - "prometheus": NewPromParser, - "openmetrics": func(b []byte, st *labels.SymbolTable) Parser { - return NewOpenMetricsParser(b, st) - }, - } { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - buf, err := io.ReadAll(f) - require.NoError(b, err) - - b.Run(parserName+"/no-decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run(parserName+"/decode-metric/"+fn, func(b *testing.B) { - total := 0 - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - var res labels.Labels - p.Metric(&res) - - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run(parserName+"/decode-metric-reuse/"+fn, func(b *testing.B) { - total := 0 - var res labels.Labels - - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - st := labels.NewSymbolTable() - for i := 0; i < b.N; i += promtestdataSampleCount { - p := parser(buf, st) - - Outer: - for i < b.N { - t, err := p.Next() - switch t { - case EntryInvalid: - if errors.Is(err, io.EOF) { - break Outer - } - b.Fatal(err) - case EntrySeries: - m, _, _ := p.Series() - - p.Metric(&res) - - total += len(m) - i++ - } - } - } - _ = total - }) - b.Run("expfmt-text/"+fn, func(b *testing.B) { - if parserName != "prometheus" { - b.Skip() - } - b.SetBytes(int64(len(buf) / promtestdataSampleCount)) - b.ReportAllocs() - b.ResetTimer() - - total := 0 - - for i := 0; i < b.N; i += promtestdataSampleCount { - decSamples := make(model.Vector, 0, 50) - sdec := expfmt.SampleDecoder{ - Dec: expfmt.NewDecoder(bytes.NewReader(buf), expfmt.NewFormat(expfmt.TypeTextPlain)), - Opts: &expfmt.DecodeOptions{ - Timestamp: model.TimeFromUnixNano(0), - }, - } - - for { - if err = sdec.Decode(&decSamples); err != nil { - break - } - total += len(decSamples) - decSamples = decSamples[:0] - } - } - _ = total - }) - } - } -} - -func BenchmarkGzip(b *testing.B) { - for _, fn := range []string{"promtestdata.txt", "promtestdata.nometa.txt"} { - b.Run(fn, func(b *testing.B) { - f, err := os.Open(fn) - require.NoError(b, err) - defer f.Close() - - var buf bytes.Buffer - gw := gzip.NewWriter(&buf) - - n, err := io.Copy(gw, f) - require.NoError(b, err) - require.NoError(b, gw.Close()) - - gbuf, err := io.ReadAll(&buf) - require.NoError(b, err) - - k := b.N / promtestdataSampleCount - - b.ReportAllocs() - b.SetBytes(n / promtestdataSampleCount) - b.ResetTimer() - - total := 0 - - for i := 0; i < k; i++ { - gr, err := gzip.NewReader(bytes.NewReader(gbuf)) - require.NoError(b, err) - - d, err := io.ReadAll(gr) - require.NoError(b, err) - require.NoError(b, gr.Close()) - - total += len(d) - } - _ = total - }) + require.EqualError(t, err, c.err, "test %d", i) } } diff --git a/model/textparse/protobufparse.go b/model/textparse/protobufparse.go index e384a75fca..a77e1d728f 100644 --- a/model/textparse/protobufparse.go +++ b/model/textparse/protobufparse.go @@ -20,7 +20,9 @@ import ( "fmt" "io" "math" + "strconv" "strings" + "sync" "unicode/utf8" "github.com/gogo/protobuf/proto" @@ -34,6 +36,15 @@ import ( dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) +// floatFormatBufPool is exclusively used in formatOpenMetricsFloat. +var floatFormatBufPool = sync.Pool{ + New: func() interface{} { + // To contain at most 17 digits and additional syntax for a float64. + b := make([]byte, 0, 24) + return &b + }, +} + // ProtobufParser is a very inefficient way of unmarshaling the old Prometheus // protobuf format and then present it as it if were parsed by a // Prometheus-2-style text parser. This is only done so that we can easily plug @@ -457,6 +468,12 @@ func (p *ProtobufParser) Next() (Entry, error) { p.state = EntryHelp case EntryHelp: + if p.mf.Unit != "" { + p.state = EntryUnit + } else { + p.state = EntryType + } + case EntryUnit: p.state = EntryType case EntryType: t := p.mf.GetType() @@ -604,7 +621,7 @@ func readDelimited(b []byte, mf *dto.MetricFamily) (n int, err error) { return totalLength, mf.Unmarshal(b[varIntLength:totalLength]) } -// formatOpenMetricsFloat works like the usual Go string formatting of a fleat +// formatOpenMetricsFloat works like the usual Go string formatting of a float // but appends ".0" if the resulting number would otherwise contain neither a // "." nor an "e". func formatOpenMetricsFloat(f float64) string { @@ -623,11 +640,15 @@ func formatOpenMetricsFloat(f float64) string { case math.IsInf(f, -1): return "-Inf" } - s := fmt.Sprint(f) - if strings.ContainsAny(s, "e.") { - return s + bp := floatFormatBufPool.Get().(*[]byte) + defer floatFormatBufPool.Put(bp) + + *bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64) + if bytes.ContainsAny(*bp, "e.") { + return string(*bp) } - return s + ".0" + *bp = append(*bp, '.', '0') + return string(*bp) } // isNativeHistogram returns false iff the provided histograms has no spans at diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index cf34ae52df..065459a69a 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -16,8 +16,6 @@ package textparse import ( "bytes" "encoding/binary" - "errors" - "io" "testing" "github.com/gogo/protobuf/proto" @@ -27,12 +25,12 @@ import ( "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/testutil" - dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" ) -func createTestProtoBuf(t *testing.T) *bytes.Buffer { +func createTestProtoBuf(t testing.TB) *bytes.Buffer { + t.Helper() + testMetricFamilies := []string{ `name: "go_build_info" help: "Build information about the main Go module." @@ -411,6 +409,49 @@ metric: < > > +`, + `name: "test_histogram3" +help: "Similar histogram as before but now with integer buckets." +type: HISTOGRAM +metric: < + histogram: < + sample_count: 6 + sample_sum: 50 + bucket: < + cumulative_count: 2 + upper_bound: -20 + > + bucket: < + cumulative_count: 4 + upper_bound: 20 + exemplar: < + label: < + name: "dummyID" + value: "59727" + > + value: 15 + timestamp: < + seconds: 1625851153 + nanos: 146848499 + > + > + > + bucket: < + cumulative_count: 6 + upper_bound: 30 + exemplar: < + label: < + name: "dummyID" + value: "5617" + > + value: 25 + > + > + schema: 0 + zero_threshold: 0 + > +> + `, `name: "test_histogram_family" help: "Test histogram metric family with two very simple histograms." @@ -783,32 +824,17 @@ metric: < } func TestProtobufParse(t *testing.T) { - type parseResult struct { - lset labels.Labels - m string - t int64 - v float64 - typ model.MetricType - help string - unit string - comment string - shs *histogram.Histogram - fhs *histogram.FloatHistogram - e []exemplar.Exemplar - ct int64 - } - inputBuf := createTestProtoBuf(t) scenarios := []struct { name string parser Parser - expected []parseResult + expected []parsedEntry }{ { name: "ignore classic buckets of native histograms", parser: NewProtobufParser(inputBuf.Bytes(), false, labels.NewSymbolTable()), - expected: []parseResult{ + expected: []parsedEntry{ { m: "go_build_info", help: "Build information about the main Go module.", @@ -830,6 +856,9 @@ func TestProtobufParse(t *testing.T) { { m: "go_memstats_alloc_bytes_total", help: "Total number of bytes allocated, even if freed.", + }, + { + m: "go_memstats_alloc_bytes_total", unit: "bytes", }, { @@ -842,7 +871,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "go_memstats_alloc_bytes_total", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "42"), Value: 12, HasTs: true, Ts: 1625851151233}, }, }, @@ -856,7 +885,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "something_untyped", - t: 1234567, + t: int64p(1234567), v: 42, lset: labels.FromStrings( "__name__", "something_untyped", @@ -872,7 +901,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -893,7 +922,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -907,7 +936,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, Count: 175, @@ -929,7 +958,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -943,7 +972,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ Count: 175.0, ZeroCount: 2.0, @@ -964,7 +993,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -978,7 +1007,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gauge_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, Count: 175.0, @@ -1000,7 +1029,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -1041,7 +1070,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram2_bucket", "le", "-0.00038", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00038, HasTs: true, Ts: 1625851153146}, }, }, @@ -1052,7 +1081,7 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram2_bucket", "le", "1.0", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.000295, HasTs: false}, }, }, @@ -1064,6 +1093,66 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", @@ -1235,7 +1324,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_counter_with_createdtimestamp", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), @@ -1251,7 +1340,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), @@ -1259,7 +1348,7 @@ func TestProtobufParse(t *testing.T) { { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), @@ -1274,7 +1363,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -1294,7 +1383,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_gaugehistogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -1314,7 +1403,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_native_histogram_exemplars", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1335,7 +1424,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, @@ -1350,7 +1439,7 @@ func TestProtobufParse(t *testing.T) { }, { m: "test_histogram_with_native_histogram_exemplars2", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1371,7 +1460,7 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, @@ -1380,16 +1469,16 @@ func TestProtobufParse(t *testing.T) { { name: "parse classic and native buckets", parser: NewProtobufParser(inputBuf.Bytes(), true, labels.NewSymbolTable()), - expected: []parseResult{ - { // 0 + expected: []parsedEntry{ + { m: "go_build_info", help: "Build information about the main Go module.", }, - { // 1 + { m: "go_build_info", typ: model.MetricTypeGauge, }, - { // 2 + { m: "go_build_info\xFFchecksum\xFF\xFFpath\xFFgithub.com/prometheus/client_golang\xFFversion\xFF(devel)", v: 1, lset: labels.FromStrings( @@ -1399,51 +1488,55 @@ func TestProtobufParse(t *testing.T) { "version", "(devel)", ), }, - { // 3 + { m: "go_memstats_alloc_bytes_total", help: "Total number of bytes allocated, even if freed.", }, - { // 4 + { + m: "go_memstats_alloc_bytes_total", + unit: "bytes", + }, + { m: "go_memstats_alloc_bytes_total", typ: model.MetricTypeCounter, }, - { // 5 + { m: "go_memstats_alloc_bytes_total", v: 1.546544e+06, lset: labels.FromStrings( "__name__", "go_memstats_alloc_bytes_total", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "42"), Value: 12, HasTs: true, Ts: 1625851151233}, }, }, - { // 6 + { m: "something_untyped", help: "Just to test the untyped type.", }, - { // 7 + { m: "something_untyped", typ: model.MetricTypeUnknown, }, - { // 8 + { m: "something_untyped", - t: 1234567, + t: int64p(1234567), v: 42, lset: labels.FromStrings( "__name__", "something_untyped", ), }, - { // 9 + { m: "test_histogram", help: "Test histogram with many buckets removed to keep it manageable in size.", }, - { // 10 + { m: "test_histogram", typ: model.MetricTypeHistogram, }, - { // 11 + { m: "test_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -1464,79 +1557,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 12 + { m: "test_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_count", ), }, - { // 13 + { m: "test_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_sum", ), }, - { // 14 + { m: "test_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 15 + { m: "test_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 16 + { m: "test_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 17 + { m: "test_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_bucket", "le", "+Inf", ), }, - { // 18 + { m: "test_gauge_histogram", help: "Like test_histogram but as gauge histogram.", }, - { // 19 + { m: "test_gauge_histogram", typ: model.MetricTypeGaugeHistogram, }, - { // 20 + { m: "test_gauge_histogram", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, Count: 175, @@ -1558,79 +1651,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 21 + { m: "test_gauge_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_histogram_count", ), }, - { // 22 + { m: "test_gauge_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_gauge_histogram_sum", ), }, - { // 23 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 24 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 25 + { m: "test_gauge_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 26 + { m: "test_gauge_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_histogram_bucket", "le", "+Inf", ), }, - { // 27 + { m: "test_float_histogram", help: "Test float histogram with many buckets removed to keep it manageable in size.", }, - { // 28 + { m: "test_float_histogram", typ: model.MetricTypeHistogram, }, - { // 29 + { m: "test_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ Count: 175.0, ZeroCount: 2.0, @@ -1651,79 +1744,79 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 30 + { m: "test_float_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_float_histogram_count", ), }, - { // 31 + { m: "test_float_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_float_histogram_sum", ), }, - { // 32 + { m: "test_float_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 33 + { m: "test_float_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 34 + { m: "test_float_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 35 + { m: "test_float_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_float_histogram_bucket", "le", "+Inf", ), }, - { // 36 + { m: "test_gauge_float_histogram", help: "Like test_float_histogram but as gauge histogram.", }, - { // 37 + { m: "test_gauge_float_histogram", typ: model.MetricTypeGaugeHistogram, }, - { // 38 + { m: "test_gauge_float_histogram", - t: 1234568, + t: int64p(1234568), fhs: &histogram.FloatHistogram{ CounterResetHint: histogram.GaugeType, Count: 175.0, @@ -1745,91 +1838,91 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_gauge_float_histogram", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 39 + { m: "test_gauge_float_histogram_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_count", ), }, - { // 40 + { m: "test_gauge_float_histogram_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_sum", ), }, - { // 41 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0004899999999999998", ), }, - { // 42 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 43 + { m: "test_gauge_float_histogram_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 44 + { m: "test_gauge_float_histogram_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_gauge_float_histogram_bucket", "le", "+Inf", ), }, - { // 45 + { m: "test_histogram2", help: "Similar histogram as before but now without sparse buckets.", }, - { // 46 + { m: "test_histogram2", typ: model.MetricTypeHistogram, }, - { // 47 + { m: "test_histogram2_count", v: 175, lset: labels.FromStrings( "__name__", "test_histogram2_count", ), }, - { // 48 + { m: "test_histogram2_sum", v: 0.000828, lset: labels.FromStrings( "__name__", "test_histogram2_sum", ), }, - { // 49 + { m: "test_histogram2_bucket\xffle\xff-0.00048", v: 2, lset: labels.FromStrings( @@ -1837,29 +1930,29 @@ func TestProtobufParse(t *testing.T) { "le", "-0.00048", ), }, - { // 50 + { m: "test_histogram2_bucket\xffle\xff-0.00038", v: 4, lset: labels.FromStrings( "__name__", "test_histogram2_bucket", "le", "-0.00038", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00038, HasTs: true, Ts: 1625851153146}, }, }, - { // 51 + { m: "test_histogram2_bucket\xffle\xff1.0", v: 16, lset: labels.FromStrings( "__name__", "test_histogram2_bucket", "le", "1.0", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.000295, HasTs: false}, }, }, - { // 52 + { m: "test_histogram2_bucket\xffle\xff+Inf", v: 175, lset: labels.FromStrings( @@ -1867,15 +1960,75 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 53 + { + m: "test_histogram3", + help: "Similar histogram as before but now with integer buckets.", + }, + { + m: "test_histogram3", + typ: model.MetricTypeHistogram, + }, + { + m: "test_histogram3_count", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_count", + ), + }, + { + m: "test_histogram3_sum", + v: 50, + lset: labels.FromStrings( + "__name__", "test_histogram3_sum", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff-20.0", + v: 2, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "-20.0", + ), + }, + { + m: "test_histogram3_bucket\xffle\xff20.0", + v: 4, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "20.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "59727"), Value: 15, HasTs: true, Ts: 1625851153146}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff30.0", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "30.0", + ), + es: []exemplar.Exemplar{ + {Labels: labels.FromStrings("dummyID", "5617"), Value: 25, HasTs: false}, + }, + }, + { + m: "test_histogram3_bucket\xffle\xff+Inf", + v: 6, + lset: labels.FromStrings( + "__name__", "test_histogram3_bucket", + "le", "+Inf", + ), + }, + { m: "test_histogram_family", help: "Test histogram metric family with two very simple histograms.", }, - { // 54 + { m: "test_histogram_family", typ: model.MetricTypeHistogram, }, - { // 55 + { m: "test_histogram_family\xfffoo\xffbar", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -1893,7 +2046,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 56 + { m: "test_histogram_family_count\xfffoo\xffbar", v: 5, lset: labels.FromStrings( @@ -1901,7 +2054,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 57 + { m: "test_histogram_family_sum\xfffoo\xffbar", v: 12.1, lset: labels.FromStrings( @@ -1909,7 +2062,7 @@ func TestProtobufParse(t *testing.T) { "foo", "bar", ), }, - { // 58 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff1.1", v: 2, lset: labels.FromStrings( @@ -1918,7 +2071,7 @@ func TestProtobufParse(t *testing.T) { "le", "1.1", ), }, - { // 59 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff2.2", v: 3, lset: labels.FromStrings( @@ -1927,7 +2080,7 @@ func TestProtobufParse(t *testing.T) { "le", "2.2", ), }, - { // 60 + { m: "test_histogram_family_bucket\xfffoo\xffbar\xffle\xff+Inf", v: 5, lset: labels.FromStrings( @@ -1936,7 +2089,7 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 61 + { m: "test_histogram_family\xfffoo\xffbaz", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -1954,7 +2107,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 62 + { m: "test_histogram_family_count\xfffoo\xffbaz", v: 6, lset: labels.FromStrings( @@ -1962,7 +2115,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 63 + { m: "test_histogram_family_sum\xfffoo\xffbaz", v: 13.1, lset: labels.FromStrings( @@ -1970,7 +2123,7 @@ func TestProtobufParse(t *testing.T) { "foo", "baz", ), }, - { // 64 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff1.1", v: 1, lset: labels.FromStrings( @@ -1979,7 +2132,7 @@ func TestProtobufParse(t *testing.T) { "le", "1.1", ), }, - { // 65 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff2.2", v: 5, lset: labels.FromStrings( @@ -1988,7 +2141,7 @@ func TestProtobufParse(t *testing.T) { "le", "2.2", ), }, - { // 66 + { m: "test_histogram_family_bucket\xfffoo\xffbaz\xffle\xff+Inf", v: 6, lset: labels.FromStrings( @@ -1997,15 +2150,15 @@ func TestProtobufParse(t *testing.T) { "le", "+Inf", ), }, - { // 67 + { m: "test_float_histogram_with_zerothreshold_zero", help: "Test float histogram with a zero threshold of zero.", }, - { // 68 + { m: "test_float_histogram_with_zerothreshold_zero", typ: model.MetricTypeHistogram, }, - { // 69 + { m: "test_float_histogram_with_zerothreshold_zero", fhs: &histogram.FloatHistogram{ Count: 5.0, @@ -2021,15 +2174,15 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_float_histogram_with_zerothreshold_zero", ), }, - { // 70 + { m: "rpc_durations_seconds", help: "RPC latency distributions.", }, - { // 71 + { m: "rpc_durations_seconds", typ: model.MetricTypeSummary, }, - { // 72 + { m: "rpc_durations_seconds_count\xffservice\xffexponential", v: 262, lset: labels.FromStrings( @@ -2037,7 +2190,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 73 + { m: "rpc_durations_seconds_sum\xffservice\xffexponential", v: 0.00025551262820703587, lset: labels.FromStrings( @@ -2045,7 +2198,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 74 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.5", v: 6.442786329648548e-07, lset: labels.FromStrings( @@ -2054,7 +2207,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 75 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.9", v: 1.9435742936658396e-06, lset: labels.FromStrings( @@ -2063,7 +2216,7 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 76 + { m: "rpc_durations_seconds\xffservice\xffexponential\xffquantile\xff0.99", v: 4.0471608667037015e-06, lset: labels.FromStrings( @@ -2072,37 +2225,37 @@ func TestProtobufParse(t *testing.T) { "service", "exponential", ), }, - { // 77 + { m: "without_quantiles", help: "A summary without quantiles.", }, - { // 78 + { m: "without_quantiles", typ: model.MetricTypeSummary, }, - { // 79 + { m: "without_quantiles_count", v: 42, lset: labels.FromStrings( "__name__", "without_quantiles_count", ), }, - { // 80 + { m: "without_quantiles_sum", v: 1.234, lset: labels.FromStrings( "__name__", "without_quantiles_sum", ), }, - { // 81 + { m: "empty_histogram", help: "A histogram without observations and with a zero threshold of zero but with a no-op span to identify it as a native histogram.", }, - { // 82 + { m: "empty_histogram", typ: model.MetricTypeHistogram, }, - { // 83 + { m: "empty_histogram", shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, @@ -2113,57 +2266,57 @@ func TestProtobufParse(t *testing.T) { "__name__", "empty_histogram", ), }, - { // 84 + { m: "test_counter_with_createdtimestamp", help: "A counter with a created timestamp.", }, - { // 85 + { m: "test_counter_with_createdtimestamp", typ: model.MetricTypeCounter, }, - { // 86 + { m: "test_counter_with_createdtimestamp", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_counter_with_createdtimestamp", ), }, - { // 87 + { m: "test_summary_with_createdtimestamp", help: "A summary with a created timestamp.", }, - { // 88 + { m: "test_summary_with_createdtimestamp", typ: model.MetricTypeSummary, }, - { // 89 + { m: "test_summary_with_createdtimestamp_count", v: 42, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_count", ), }, - { // 90 + { m: "test_summary_with_createdtimestamp_sum", v: 1.234, - ct: 1000, + ct: int64p(1000), lset: labels.FromStrings( "__name__", "test_summary_with_createdtimestamp_sum", ), }, - { // 91 + { m: "test_histogram_with_createdtimestamp", help: "A histogram with a created timestamp.", }, - { // 92 + { m: "test_histogram_with_createdtimestamp", typ: model.MetricTypeHistogram, }, - { // 93 + { m: "test_histogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.UnknownCounterReset, PositiveSpans: []histogram.Span{}, @@ -2173,17 +2326,17 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_histogram_with_createdtimestamp", ), }, - { // 94 + { m: "test_gaugehistogram_with_createdtimestamp", help: "A gauge histogram with a created timestamp.", }, - { // 95 + { m: "test_gaugehistogram_with_createdtimestamp", typ: model.MetricTypeGaugeHistogram, }, - { // 96 + { m: "test_gaugehistogram_with_createdtimestamp", - ct: 1000, + ct: int64p(1000), shs: &histogram.Histogram{ CounterResetHint: histogram.GaugeType, PositiveSpans: []histogram.Span{}, @@ -2193,17 +2346,17 @@ func TestProtobufParse(t *testing.T) { "__name__", "test_gaugehistogram_with_createdtimestamp", ), }, - { // 97 + { m: "test_histogram_with_native_histogram_exemplars", help: "A histogram with native histogram exemplars.", }, - { // 98 + { m: "test_histogram_with_native_histogram_exemplars", typ: model.MetricTypeHistogram, }, - { // 99 + { m: "test_histogram_with_native_histogram_exemplars", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2224,80 +2377,80 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, {Labels: labels.FromStrings("dummyID", "59772"), Value: -0.00052, HasTs: true, Ts: 1625851160156}, }, }, - { // 100 + { m: "test_histogram_with_native_histogram_exemplars_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_count", ), }, - { // 101 + { m: "test_histogram_with_native_histogram_exemplars_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_sum", ), }, - { // 102 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0004899999999999998", ), }, - { // 103 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0003899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59727"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 104 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "-0.0002899999999999998", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "5617"), Value: -0.00029, HasTs: false}, }, }, - { // 105 + { m: "test_histogram_with_native_histogram_exemplars_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars_bucket", "le", "+Inf", ), }, - { // 106 + { m: "test_histogram_with_native_histogram_exemplars2", help: "Another histogram with native histogram exemplars.", }, - { // 107 + { m: "test_histogram_with_native_histogram_exemplars2", typ: model.MetricTypeHistogram, }, - { // 108 + { m: "test_histogram_with_native_histogram_exemplars2", - t: 1234568, + t: int64p(1234568), shs: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2318,56 +2471,56 @@ func TestProtobufParse(t *testing.T) { lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2", ), - e: []exemplar.Exemplar{ + es: []exemplar.Exemplar{ {Labels: labels.FromStrings("dummyID", "59780"), Value: -0.00039, HasTs: true, Ts: 1625851155146}, }, }, - { // 109 + { m: "test_histogram_with_native_histogram_exemplars2_count", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_count", ), }, - { // 110 + { m: "test_histogram_with_native_histogram_exemplars2_sum", - t: 1234568, + t: int64p(1234568), v: 0.0008280461746287094, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_sum", ), }, - { // 111 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0004899999999999998", - t: 1234568, + t: int64p(1234568), v: 2, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0004899999999999998", ), }, - { // 112 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0003899999999999998", - t: 1234568, + t: int64p(1234568), v: 4, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0003899999999999998", ), }, - { // 113 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff-0.0002899999999999998", - t: 1234568, + t: int64p(1234568), v: 16, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", "le", "-0.0002899999999999998", ), }, - { // 114 + { m: "test_histogram_with_native_histogram_exemplars2_bucket\xffle\xff+Inf", - t: 1234568, + t: int64p(1234568), v: 175, lset: labels.FromStrings( "__name__", "test_histogram_with_native_histogram_exemplars2_bucket", @@ -2381,94 +2534,11 @@ func TestProtobufParse(t *testing.T) { for _, scenario := range scenarios { t.Run(scenario.name, func(t *testing.T) { var ( - i int - res labels.Labels p = scenario.parser exp = scenario.expected ) - - for { - et, err := p.Next() - if errors.Is(err, io.EOF) { - break - } - require.NoError(t, err) - - switch et { - case EntrySeries: - m, ts, v := p.Series() - - var e exemplar.Exemplar - p.Metric(&res) - eFound := p.Exemplar(&e) - ct := p.CreatedTimestamp() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if ts != nil { - require.Equal(t, exp[i].t, *ts, "i: %d", i) - } else { - require.Equal(t, int64(0), exp[i].t, "i: %d", i) - } - require.Equal(t, exp[i].v, v, "i: %d", i) - testutil.RequireEqual(t, exp[i].lset, res, "i: %d", i) - if len(exp[i].e) == 0 { - require.False(t, eFound, "i: %d", i) - } else { - require.True(t, eFound, "i: %d", i) - testutil.RequireEqual(t, exp[i].e[0], e, "i: %d", i) - require.False(t, p.Exemplar(&e), "too many exemplars returned, i: %d", i) - } - if exp[i].ct != 0 { - require.NotNilf(t, ct, "i: %d", i) - require.Equal(t, exp[i].ct, *ct, "i: %d", i) - } else { - require.Nilf(t, ct, "i: %d", i) - } - - case EntryHistogram: - m, ts, shs, fhs := p.Histogram() - p.Metric(&res) - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if ts != nil { - require.Equal(t, exp[i].t, *ts, "i: %d", i) - } else { - require.Equal(t, int64(0), exp[i].t, "i: %d", i) - } - testutil.RequireEqual(t, exp[i].lset, res, "i: %d", i) - require.Equal(t, exp[i].m, string(m), "i: %d", i) - if shs != nil { - require.Equal(t, exp[i].shs, shs, "i: %d", i) - } else { - require.Equal(t, exp[i].fhs, fhs, "i: %d", i) - } - j := 0 - for e := (exemplar.Exemplar{}); p.Exemplar(&e); j++ { - testutil.RequireEqual(t, exp[i].e[j], e, "i: %d", i) - e = exemplar.Exemplar{} - } - require.Len(t, exp[i].e, j, "not enough exemplars found, i: %d", i) - - case EntryType: - m, typ := p.Type() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].typ, typ, "i: %d", i) - - case EntryHelp: - m, h := p.Help() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].help, string(h), "i: %d", i) - - case EntryUnit: - m, u := p.Unit() - require.Equal(t, exp[i].m, string(m), "i: %d", i) - require.Equal(t, exp[i].unit, string(u), "i: %d", i) - - case EntryComment: - require.Equal(t, exp[i].comment, string(p.Comment()), "i: %d", i) - } - - i++ - } - require.Len(t, exp, i) + got := testParse(t, p) + requireEntries(t, exp, got) }) } } diff --git a/model/textparse/testdata/omhistogramdata.txt b/model/textparse/testdata/omhistogramdata.txt new file mode 100644 index 0000000000..1876168355 --- /dev/null +++ b/model/textparse/testdata/omhistogramdata.txt @@ -0,0 +1,45 @@ +# HELP golang_manual_histogram_seconds This is a histogram with manually selected parameters +# TYPE golang_manual_histogram_seconds histogram +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5001",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5001"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5001"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5002"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.005"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.01"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.025"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.05"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.1"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.25"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 1 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 10.0 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 1 +# EOF \ No newline at end of file diff --git a/model/textparse/testdata/omtestdata.txt b/model/textparse/testdata/omtestdata.txt new file mode 100644 index 0000000000..0f5f78b8b9 --- /dev/null +++ b/model/textparse/testdata/omtestdata.txt @@ -0,0 +1,64 @@ +# HELP go_build_info Build information about the main Go module. +# TYPE go_build_info gauge +go_build_info{checksum="",path="",version=""} 1.0 +# HELP promhttp_metric_handler_errors Total number of internal errors encountered by the promhttp metric handler. +# TYPE promhttp_metric_handler_errors counter +promhttp_metric_handler_errors_total{cause="encoding"} 0.0 +promhttp_metric_handler_errors_created{cause="encoding"} 1.726839813016397e+09 +promhttp_metric_handler_errors_total{cause="gathering"} 0.0 +promhttp_metric_handler_errors_created{cause="gathering"} 1.726839813016395e+09 +# HELP rpc_durations_histogram_seconds RPC latency distributions. +# TYPE rpc_durations_histogram_seconds histogram +rpc_durations_histogram_seconds_bucket{le="-0.00099"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.00089"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0007899999999999999"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0006899999999999999"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0005899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0004899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0003899999999999998"} 0 +rpc_durations_histogram_seconds_bucket{le="-0.0002899999999999998"} 3 # {dummyID="17783"} -0.0003825067330956884 1.7268398142239082e+09 +rpc_durations_histogram_seconds_bucket{le="-0.0001899999999999998"} 5 # {dummyID="84741"} -0.00020178290006788965 1.726839814829977e+09 +rpc_durations_histogram_seconds_bucket{le="-8.999999999999979e-05"} 5 +rpc_durations_histogram_seconds_bucket{le="1.0000000000000216e-05"} 8 # {dummyID="19206"} -4.6156147425468016e-05 1.7268398151337721e+09 +rpc_durations_histogram_seconds_bucket{le="0.00011000000000000022"} 9 # {dummyID="3974"} 9.528436760156754e-05 1.726839814526797e+09 +rpc_durations_histogram_seconds_bucket{le="0.00021000000000000023"} 11 # {dummyID="29640"} 0.00017459624183458996 1.7268398139220061e+09 +rpc_durations_histogram_seconds_bucket{le="0.0003100000000000002"} 15 # {dummyID="9818"} 0.0002791130914009552 1.7268398149821382e+09 +rpc_durations_histogram_seconds_bucket{le="0.0004100000000000002"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0005100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0006100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0007100000000000003"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0008100000000000004"} 15 +rpc_durations_histogram_seconds_bucket{le="0.0009100000000000004"} 15 +rpc_durations_histogram_seconds_bucket{le="+Inf"} 15 +rpc_durations_histogram_seconds_sum -8.452185437166741e-05 +rpc_durations_histogram_seconds_count 15 +rpc_durations_histogram_seconds_created 1.726839813016302e+09 +# HELP rpc_durations_seconds RPC latency distributions. +# TYPE rpc_durations_seconds summary +rpc_durations_seconds{service="exponential",quantile="0.5"} 7.689368882420941e-07 +rpc_durations_seconds{service="exponential",quantile="0.9"} 1.6537614174305048e-06 +rpc_durations_seconds{service="exponential",quantile="0.99"} 2.0965499063061924e-06 +rpc_durations_seconds_sum{service="exponential"} 2.0318666372575776e-05 +rpc_durations_seconds_count{service="exponential"} 22 +rpc_durations_seconds_created{service="exponential"} 1.7268398130168908e+09 +rpc_durations_seconds{service="normal",quantile="0.5"} -5.066758674917046e-06 +rpc_durations_seconds{service="normal",quantile="0.9"} 0.0002935723711788224 +rpc_durations_seconds{service="normal",quantile="0.99"} 0.0003023094636293776 +rpc_durations_seconds_sum{service="normal"} -8.452185437166741e-05 +rpc_durations_seconds_count{service="normal"} 15 +rpc_durations_seconds_created{service="normal"} 1.726839813016714e+09 +rpc_durations_seconds{service="uniform",quantile="0.5"} 9.005014931474918e-05 +rpc_durations_seconds{service="uniform",quantile="0.9"} 0.00017801230208182325 +rpc_durations_seconds{service="uniform",quantile="0.99"} 0.00018641524538180192 +rpc_durations_seconds_sum{service="uniform"} 0.0011666095700533677 +rpc_durations_seconds_count{service="uniform"} 11 +rpc_durations_seconds_created{service="uniform"} 1.72683981301684e+09 +# HELP rpc_requests Total number of RPC requests received. +# TYPE rpc_requests counter +rpc_requests_total{service="exponential"} 22.0 +rpc_requests_created{service="exponential"} 1.726839813016893e+09 +rpc_requests_total{service="normal"} 15.0 +rpc_requests_created{service="normal"} 1.726839813016717e+09 +rpc_requests_total{service="uniform"} 11.0 +rpc_requests_created{service="uniform"} 1.7268398130168471e+09 +# EOF diff --git a/model/textparse/promtestdata.nometa.txt b/model/textparse/testdata/promtestdata.nometa.txt similarity index 100% rename from model/textparse/promtestdata.nometa.txt rename to model/textparse/testdata/promtestdata.nometa.txt diff --git a/model/textparse/promtestdata.txt b/model/textparse/testdata/promtestdata.txt similarity index 100% rename from model/textparse/promtestdata.txt rename to model/textparse/testdata/promtestdata.txt diff --git a/notifier/notifier.go b/notifier/notifier.go index 218e4cb8c7..956fd4652a 100644 --- a/notifier/notifier.go +++ b/notifier/notifier.go @@ -16,25 +16,28 @@ package notifier import ( "bytes" "context" + "crypto/md5" + "encoding/hex" "encoding/json" "fmt" "io" + "log/slog" "net/http" "net/url" "path" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/go-openapi/strfmt" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/sigv4" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" + "github.com/prometheus/sigv4" "go.uber.org/atomic" + "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" @@ -117,7 +120,7 @@ type Manager struct { stopRequested chan struct{} alertmanagers map[string]*alertmanagerSet - logger log.Logger + logger *slog.Logger } // Options are the configurable parameters of a Handler. @@ -157,7 +160,7 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag Namespace: namespace, Subsystem: subsystem, Name: "errors_total", - Help: "Total number of errors sending alert notifications.", + Help: "Total number of sent alerts affected by errors.", }, []string{alertmanagerLabel}, ), @@ -218,12 +221,12 @@ func do(ctx context.Context, client *http.Client, req *http.Request) (*http.Resp } // NewManager is the manager constructor. -func NewManager(o *Options, logger log.Logger) *Manager { +func NewManager(o *Options, logger *slog.Logger) *Manager { if o.Do == nil { o.Do = do } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } n := &Manager{ @@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs amSets := make(map[string]*alertmanagerSet) + // configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig, + // helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config. + configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers)) + for _, oldAmSet := range n.alertmanagers { + hash, err := oldAmSet.configHash() + if err != nil { + return err + } + configToAlertmanagers[hash] = oldAmSet + } for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() { ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics) @@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error { return err } + hash, err := ams.configHash() + if err != nil { + return err + } + + if oldAmSet, ok := configToAlertmanagers[hash]; ok { + ams.ams = oldAmSet.ams + ams.droppedAms = oldAmSet.droppedAms + } + amSets[k] = ams } @@ -319,7 +342,7 @@ func (n *Manager) Run(tsets <-chan map[string][]*targetgroup.Group) { }() wg.Wait() - level.Info(n.logger).Log("msg", "Notification manager stopped") + n.logger.Info("Notification manager stopped") } // sendLoop continuously consumes the notifications queue and sends alerts to @@ -376,20 +399,20 @@ func (n *Manager) sendOneBatch() { func (n *Manager) drainQueue() { if !n.opts.DrainOnShutdown { if n.queueLen() > 0 { - level.Warn(n.logger).Log("msg", "Draining remaining notifications on shutdown is disabled, and some notifications have been dropped", "count", n.queueLen()) + n.logger.Warn("Draining remaining notifications on shutdown is disabled, and some notifications have been dropped", "count", n.queueLen()) n.metrics.dropped.Add(float64(n.queueLen())) } return } - level.Info(n.logger).Log("msg", "Draining any remaining notifications...") + n.logger.Info("Draining any remaining notifications...") for n.queueLen() > 0 { n.sendOneBatch() } - level.Info(n.logger).Log("msg", "Remaining notifications drained") + n.logger.Info("Remaining notifications drained") } func (n *Manager) reload(tgs map[string][]*targetgroup.Group) { @@ -399,7 +422,7 @@ func (n *Manager) reload(tgs map[string][]*targetgroup.Group) { for id, tgroup := range tgs { am, ok := n.alertmanagers[id] if !ok { - level.Error(n.logger).Log("msg", "couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id)) + n.logger.Error("couldn't sync alert manager set", "err", fmt.Sprintf("invalid id:%v", id)) continue } am.sync(tgroup) @@ -422,7 +445,7 @@ func (n *Manager) Send(alerts ...*Alert) { if d := len(alerts) - n.opts.QueueCapacity; d > 0 { alerts = alerts[d:] - level.Warn(n.logger).Log("msg", "Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) + n.logger.Warn("Alert batch larger than queue capacity, dropping alerts", "num_dropped", d) n.metrics.dropped.Add(float64(d)) } @@ -431,7 +454,7 @@ func (n *Manager) Send(alerts ...*Alert) { if d := (len(n.queue) + len(alerts)) - n.opts.QueueCapacity; d > 0 { n.queue = n.queue[d:] - level.Warn(n.logger).Log("msg", "Alert notification queue full, dropping alerts", "num_dropped", d) + n.logger.Warn("Alert notification queue full, dropping alerts", "num_dropped", d) n.metrics.dropped.Add(float64(d)) } n.queue = append(n.queue, alerts...) @@ -519,10 +542,10 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { begin := time.Now() - // v1Payload and v2Payload represent 'alerts' marshaled for Alertmanager API - // v1 or v2. Marshaling happens below. Reference here is for caching between + // cachedPayload represent 'alerts' marshaled for Alertmanager API v2. + // Marshaling happens below. Reference here is for caching between // for loop iterations. - var v1Payload, v2Payload []byte + var cachedPayload []byte n.mtx.RLock() amSets := n.alertmanagers @@ -553,42 +576,29 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { continue } // We can't use the cached values from previous iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } switch ams.cfg.APIVersion { - case config.AlertmanagerAPIVersionV1: - { - if v1Payload == nil { - v1Payload, err = json.Marshal(amAlerts) - if err != nil { - level.Error(n.logger).Log("msg", "Encoding alerts for Alertmanager API v1 failed", "err", err) - ams.mtx.RUnlock() - return false - } - } - - payload = v1Payload - } case config.AlertmanagerAPIVersionV2: { - if v2Payload == nil { + if cachedPayload == nil { openAPIAlerts := alertsToOpenAPIAlerts(amAlerts) - v2Payload, err = json.Marshal(openAPIAlerts) + cachedPayload, err = json.Marshal(openAPIAlerts) if err != nil { - level.Error(n.logger).Log("msg", "Encoding alerts for Alertmanager API v2 failed", "err", err) + n.logger.Error("Encoding alerts for Alertmanager API v2 failed", "err", err) ams.mtx.RUnlock() return false } } - payload = v2Payload + payload = cachedPayload } default: { - level.Error(n.logger).Log( - "msg", fmt.Sprintf("Invalid Alertmanager API version '%v', expected one of '%v'", ams.cfg.APIVersion, config.SupportedAlertmanagerAPIVersions), + n.logger.Error( + fmt.Sprintf("Invalid Alertmanager API version '%v', expected one of '%v'", ams.cfg.APIVersion, config.SupportedAlertmanagerAPIVersions), "err", err, ) ams.mtx.RUnlock() @@ -598,7 +608,7 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { if len(ams.cfg.AlertRelabelConfigs) > 0 { // We can't use the cached values on the next iteration. - v1Payload, v2Payload = nil, nil + cachedPayload = nil } for _, am := range ams.ams { @@ -609,13 +619,13 @@ func (n *Manager) sendAll(alerts ...*Alert) bool { go func(ctx context.Context, client *http.Client, url string, payload []byte, count int) { if err := n.sendOne(ctx, client, url, payload); err != nil { - level.Error(n.logger).Log("alertmanager", url, "count", count, "msg", "Error sending alert", "err", err) - n.metrics.errors.WithLabelValues(url).Inc() + n.logger.Error("Error sending alerts", "alertmanager", url, "count", count, "err", err) + n.metrics.errors.WithLabelValues(url).Add(float64(count)) } else { numSuccess.Inc() } n.metrics.latency.WithLabelValues(url).Observe(time.Since(begin).Seconds()) - n.metrics.sent.WithLabelValues(url).Add(float64(len(amAlerts))) + n.metrics.sent.WithLabelValues(url).Add(float64(count)) wg.Done() }(ctx, ams.client, am.url().String(), payload, len(amAlerts)) @@ -689,7 +699,7 @@ func (n *Manager) sendOne(ctx context.Context, c *http.Client, url string, b []b // // Stop is safe to call multiple times. func (n *Manager) Stop() { - level.Info(n.logger).Log("msg", "Stopping notification manager...") + n.logger.Info("Stopping notification manager...") n.stopOnce.Do(func() { close(n.stopRequested) @@ -724,10 +734,10 @@ type alertmanagerSet struct { mtx sync.RWMutex ams []alertmanager droppedAms []alertmanager - logger log.Logger + logger *slog.Logger } -func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger log.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { +func newAlertmanagerSet(cfg *config.AlertmanagerConfig, logger *slog.Logger, metrics *alertMetrics) (*alertmanagerSet, error) { client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "alertmanager") if err != nil { return nil, err @@ -761,7 +771,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { for _, tg := range tgs { ams, droppedAms, err := AlertmanagerFromGroup(tg, s.cfg) if err != nil { - level.Error(s.logger).Log("msg", "Creating discovered Alertmanagers failed", "err", err) + s.logger.Error("Creating discovered Alertmanagers failed", "err", err) continue } allAms = append(allAms, ams...) @@ -770,6 +780,7 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { s.mtx.Lock() defer s.mtx.Unlock() + previousAms := s.ams // Set new Alertmanagers and deduplicate them along their unique URL. s.ams = []alertmanager{} s.droppedAms = []alertmanager{} @@ -789,6 +800,26 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) { seen[us] = struct{}{} s.ams = append(s.ams, am) } + // Now remove counters for any removed Alertmanagers. + for _, am := range previousAms { + us := am.url().String() + if _, ok := seen[us]; ok { + continue + } + s.metrics.latency.DeleteLabelValues(us) + s.metrics.sent.DeleteLabelValues(us) + s.metrics.errors.DeleteLabelValues(us) + seen[us] = struct{}{} + } +} + +func (s *alertmanagerSet) configHash() (string, error) { + b, err := yaml.Marshal(s.cfg) + if err != nil { + return "", err + } + hash := md5.Sum(b) + return hex.EncodeToString(hash[:]), nil } func postPath(pre string, v config.AlertmanagerAPIVersion) string { diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index cf922a537c..97b0274f29 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -26,11 +26,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/config" + _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" @@ -49,27 +50,27 @@ func TestPostPath(t *testing.T) { }{ { in: "", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/", - out: "/api/v1/alerts", + out: "/api/v2/alerts", }, { in: "/prefix", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "/prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, { in: "prefix//", - out: "/prefix/api/v1/alerts", + out: "/prefix/api/v2/alerts", }, } for _, c := range cases { - require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV1)) + require.Equal(t, c.out, postPath(c.in, config.AlertmanagerAPIVersionV2)) } } @@ -743,7 +744,7 @@ func TestHangingNotifier(t *testing.T) { // Initialize the discovery manager // This is relevant as the updates aren't sent continually in real life, but only each updatert. - // The old implementation of TestHangingNotifier didn't take that into acount. + // The old implementation of TestHangingNotifier didn't take that into account. ctx, cancel := context.WithCancel(context.Background()) defer cancel() reg := prometheus.NewRegistry() @@ -751,7 +752,7 @@ func TestHangingNotifier(t *testing.T) { require.NoError(t, err) sdManager := discovery.NewManager( ctx, - log.NewNopLogger(), + promslog.NewNopLogger(), reg, sdMetrics, discovery.Name("sd-manager"), @@ -1017,3 +1018,107 @@ func TestStop_DrainingEnabled(t *testing.T) { require.Equal(t, int64(2), alertsReceived.Load()) } + +func TestApplyConfig(t *testing.T) { + targetURL := "alertmanager:9093" + targetGroup := &targetgroup.Group{ + Targets: []model.LabelSet{ + { + "__address__": model.LabelValue(targetURL), + }, + }, + } + alertmanagerURL := fmt.Sprintf("http://%s/api/v2/alerts", targetURL) + + n := NewManager(&Options{}, nil) + cfg := &config.Config{} + s := ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json +` + // 1. Ensure known alertmanagers are not dropped during ApplyConfig. + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1) + + // First, apply the config and reload. + require.NoError(t, n.ApplyConfig(cfg)) + tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} + n.reload(tgs) + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // Reapply the config. + require.NoError(t, n.ApplyConfig(cfg)) + // Ensure the known alertmanagers are not dropped. + require.Len(t, n.Alertmanagers(), 1) + require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) + + // 2. Ensure known alertmanagers are not dropped during ApplyConfig even when + // the config order changes. + s = ` +alerting: + alertmanagers: + - static_configs: + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 1) + // Ensure no unnecessary alertmanagers are injected. + require.Empty(t, n.alertmanagers["config-0"].ams) + // Ensure the config order is taken into account. + ams := n.alertmanagers["config-1"].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + + // 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + - file_sd_configs: + - files: + - foo.json +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Len(t, n.Alertmanagers(), 2) + for cfgIdx := range 2 { + ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams + require.Len(t, ams, 1) + require.Equal(t, alertmanagerURL, ams[0].url().String()) + } + + // 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig. + s = ` +alerting: + alertmanagers: + - file_sd_configs: + - files: + - foo.json + path_prefix: /bar + - file_sd_configs: + - files: + - foo.json + relabel_configs: + - source_labels: ['__address__'] + regex: 'doesntmatter:1234' + action: drop +` + require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg)) + require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2) + + require.NoError(t, n.ApplyConfig(cfg)) + require.Empty(t, n.Alertmanagers()) +} diff --git a/promql/bench_test.go b/promql/bench_test.go index 74e85b0548..943baceecb 100644 --- a/promql/bench_test.go +++ b/promql/bench_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" @@ -117,7 +119,7 @@ func rangeQueryCases() []benchCase { }, // Holt-Winters and long ranges. { - expr: "holt_winters(a_X[1d], 0.3, 0.3)", + expr: "double_exponential_smoothing(a_X[1d], 0.3, 0.3)", }, { expr: "changes(a_X[1d])", @@ -380,6 +382,126 @@ func BenchmarkNativeHistograms(b *testing.B) { } } +func BenchmarkInfoFunction(b *testing.B) { + // Initialize test storage and generate test series data. + testStorage := teststorage.New(b) + defer testStorage.Close() + + start := time.Unix(0, 0) + end := start.Add(2 * time.Hour) + step := 30 * time.Second + + // Generate time series data for the benchmark. + generateInfoFunctionTestSeries(b, testStorage, 100, 2000, 3600) + + // Define test cases with queries to benchmark. + cases := []struct { + name string + query string + }{ + { + name: "Joining info metrics with other metrics with group_left example 1", + query: "rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info{k8s_cluster_name=\"us-east\"}", + }, + { + name: "Joining info metrics with other metrics with info() example 1", + query: `info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name="us-east"})`, + }, + { + name: "Joining info metrics with other metrics with group_left example 2", + query: "sum by (k8s_cluster_name, http_status_code) (rate(http_server_request_duration_seconds_count[2m]) * on (job, instance) group_left (k8s_cluster_name) target_info)", + }, + { + name: "Joining info metrics with other metrics with info() example 2", + query: `sum by (k8s_cluster_name, http_status_code) (info(rate(http_server_request_duration_seconds_count[2m]), {k8s_cluster_name=~".+"}))`, + }, + } + + // Benchmark each query type. + for _, tc := range cases { + // Initialize the PromQL engine once for all benchmarks. + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 50000000, + Timeout: 100 * time.Second, + EnableAtModifier: true, + EnableNegativeOffset: true, + } + engine := promql.NewEngine(opts) + b.Run(tc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StopTimer() // Stop the timer to exclude setup time. + qry, err := engine.NewRangeQuery(context.Background(), testStorage, nil, tc.query, start, end, step) + require.NoError(b, err) + b.StartTimer() + result := qry.Exec(context.Background()) + require.NoError(b, result.Err) + } + }) + } + + // Report allocations. + b.ReportAllocs() +} + +// Helper function to generate target_info and http_server_request_duration_seconds_count series for info function benchmarking. +func generateInfoFunctionTestSeries(tb testing.TB, stor *teststorage.TestStorage, infoSeriesNum, interval, numIntervals int) { + tb.Helper() + + ctx := context.Background() + statusCodes := []string{"200", "400", "500"} + + // Generate target_info metrics with instance and job labels, and k8s_cluster_name label. + // Generate http_server_request_duration_seconds_count metrics with instance and job labels, and http_status_code label. + // the classic target_info metrics is gauge type. + metrics := make([]labels.Labels, 0, infoSeriesNum+len(statusCodes)) + for i := 0; i < infoSeriesNum; i++ { + clusterName := "us-east" + if i >= infoSeriesNum/2 { + clusterName = "eu-south" + } + metrics = append(metrics, labels.FromStrings( + "__name__", "target_info", + "instance", "instance"+strconv.Itoa(i), + "job", "job"+strconv.Itoa(i), + "k8s_cluster_name", clusterName, + )) + } + + for _, statusCode := range statusCodes { + metrics = append(metrics, labels.FromStrings( + "__name__", "http_server_request_duration_seconds_count", + "instance", "instance0", + "job", "job0", + "http_status_code", statusCode, + )) + } + + // Append the generated metrics and samples to the storage. + refs := make([]storage.SeriesRef, len(metrics)) + + for i := 0; i < numIntervals; i++ { + a := stor.Appender(context.Background()) + ts := int64(i * interval) + for j, metric := range metrics[:infoSeriesNum] { + ref, _ := a.Append(refs[j], metric, ts, 1) + refs[j] = ref + } + + for j, metric := range metrics[infoSeriesNum:] { + ref, _ := a.Append(refs[j+infoSeriesNum], metric, ts, float64(i)) + refs[j+infoSeriesNum] = ref + } + + require.NoError(tb, a.Commit()) + } + + stor.DB.ForceHeadMMap() // Ensure we have at most one head chunk for every series. + stor.DB.Compact(ctx) +} + func generateNativeHistogramSeries(app storage.Appender, numSeries int) error { commonLabels := []string{labels.MetricName, "native_histogram_series", "foo", "bar"} series := make([][]*histogram.Histogram, numSeries) diff --git a/promql/engine.go b/promql/engine.go index 3edb30a973..13f8b06972 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "reflect" "runtime" @@ -30,10 +31,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -43,6 +43,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" @@ -125,7 +126,8 @@ type QueryEngine interface { // QueryLogger is an interface that can be used to log all the queries logged // by the engine. type QueryLogger interface { - Log(...interface{}) error + Log(context.Context, slog.Level, string, ...any) + With(args ...any) Close() error } @@ -288,7 +290,7 @@ type QueryTracker interface { // EngineOpts contains configuration options used when creating a new Engine. type EngineOpts struct { - Logger log.Logger + Logger *slog.Logger Reg prometheus.Registerer MaxSamples int Timeout time.Duration @@ -326,7 +328,7 @@ type EngineOpts struct { // Engine handles the lifetime of queries from beginning to end. // It is connected to a querier. type Engine struct { - logger log.Logger + logger *slog.Logger metrics *engineMetrics timeout time.Duration maxSamplesPerQuery int @@ -344,7 +346,7 @@ type Engine struct { // NewEngine returns a new engine. func NewEngine(opts EngineOpts) *Engine { if opts.Logger == nil { - opts.Logger = log.NewNopLogger() + opts.Logger = promslog.NewNopLogger() } queryResultSummary := prometheus.NewSummaryVec(prometheus.SummaryOpts{ @@ -403,7 +405,7 @@ func NewEngine(opts EngineOpts) *Engine { if opts.LookbackDelta == 0 { opts.LookbackDelta = defaultLookbackDelta if l := opts.Logger; l != nil { - level.Debug(l).Log("msg", "Lookback delta is zero, setting to default value", "value", defaultLookbackDelta) + l.Debug("Lookback delta is zero, setting to default value", "value", defaultLookbackDelta) } } @@ -435,6 +437,10 @@ func NewEngine(opts EngineOpts) *Engine { // Close closes ng. func (ng *Engine) Close() error { + if ng == nil { + return nil + } + if ng.activeQueryTracker != nil { return ng.activeQueryTracker.Close() } @@ -451,7 +457,7 @@ func (ng *Engine) SetQueryLogger(l QueryLogger) { // not make reload fail; only log a warning. err := ng.queryLogger.Close() if err != nil { - level.Warn(ng.logger).Log("msg", "Error while closing the previous query log file", "err", err) + ng.logger.Warn("Error while closing the previous query log file", "err", err) } } @@ -641,10 +647,10 @@ func (ng *Engine) exec(ctx context.Context, q *query) (v parser.Value, ws annota f = append(f, k, v) } } - if err := l.Log(f...); err != nil { - ng.metrics.queryLogFailures.Inc() - level.Error(ng.logger).Log("msg", "can't log query", "err", err) - } + l.Log(context.Background(), slog.LevelInfo, "promql query logged", f...) + // TODO: @tjhop -- do we still need this metric/error log if logger doesn't return errors? + // ng.metrics.queryLogFailures.Inc() + // ng.logger.Error("can't log query", "err", err) } ng.queryLoggerLock.RUnlock() }() @@ -724,17 +730,17 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval startTimestamp: start, endTimestamp: start, interval: 1, - ctx: ctxInnerEval, maxSamples: ng.maxSamplesPerQuery, logger: ng.logger, lookbackDelta: s.LookbackDelta, samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(start, start, 1) - val, warnings, err := evaluator.Eval(s.Expr) + val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) evalSpanTimer.Finish() @@ -783,16 +789,16 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval startTimestamp: timeMilliseconds(s.Start), endTimestamp: timeMilliseconds(s.End), interval: durationMilliseconds(s.Interval), - ctx: ctxInnerEval, maxSamples: ng.maxSamplesPerQuery, logger: ng.logger, lookbackDelta: s.LookbackDelta, samplesStats: query.sampleStats, noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ng.enableDelayedNameRemoval, + querier: querier, } query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval) - val, warnings, err := evaluator.Eval(s.Expr) + val, warnings, err := evaluator.Eval(ctxInnerEval, s.Expr) evalSpanTimer.Finish() @@ -907,11 +913,17 @@ func getTimeRangesForSelector(s *parser.EvalStmt, n *parser.VectorSelector, path } if evalRange == 0 { - start -= durationMilliseconds(s.LookbackDelta) + // Reduce the start by one fewer ms than the lookback delta + // because wo want to exclude samples that are precisely the + // lookback delta before the eval time. + start -= durationMilliseconds(s.LookbackDelta) - 1 } else { - // For all matrix queries we want to ensure that we have (end-start) + range selected - // this way we have `range` data before the start time - start -= durationMilliseconds(evalRange) + // For all matrix queries we want to ensure that we have + // (end-start) + range selected this way we have `range` data + // before the start time. We subtract one from the range to + // exclude samples positioned directly at the lower boundary of + // the range. + start -= durationMilliseconds(evalRange) - 1 } offsetMilliseconds := durationMilliseconds(n.OriginalOffset) @@ -996,6 +1008,8 @@ func extractGroupsFromPath(p []parser.Node) (bool, []string) { return false, nil } +// checkAndExpandSeriesSet expands expr's UnexpandedSeriesSet into expr's Series. +// If the Series field is already non-nil, it's a no-op. func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations.Annotations, error) { switch e := expr.(type) { case *parser.MatrixSelector: @@ -1004,6 +1018,8 @@ func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations if e.Series != nil { return nil, nil } + span := trace.SpanFromContext(ctx) + span.AddEvent("expand start", trace.WithAttributes(attribute.String("selector", e.String()))) series, ws, err := expandSeriesSet(ctx, e.UnexpandedSeriesSet) if e.SkipHistogramBuckets { for i := range series { @@ -1011,6 +1027,7 @@ func checkAndExpandSeriesSet(ctx context.Context, expr parser.Expr) (annotations } } e.Series = series + span.AddEvent("expand end", trace.WithAttributes(attribute.Int("num_series", len(series)))) return ws, err } return nil, nil @@ -1040,19 +1057,18 @@ func (e errWithWarnings) Error() string { return e.err.Error() } // querier and reports errors. On timeout or cancellation of its context it // terminates. type evaluator struct { - ctx context.Context - startTimestamp int64 // Start time in milliseconds. endTimestamp int64 // End time in milliseconds. interval int64 // Interval in milliseconds. maxSamples int currentSamples int - logger log.Logger + logger *slog.Logger lookbackDelta time.Duration samplesStats *stats.QuerySamples noStepSubqueryIntervalFn func(rangeMillis int64) int64 enableDelayedNameRemoval bool + querier storage.Querier } // errorf causes a panic with the input formatted into an error. @@ -1078,7 +1094,7 @@ func (ev *evaluator) recover(expr parser.Expr, ws *annotations.Annotations, errp buf := make([]byte, 64<<10) buf = buf[:runtime.Stack(buf, false)] - level.Error(ev.logger).Log("msg", "runtime panic during query evaluation", "expr", expr.String(), "err", e, "stacktrace", string(buf)) + ev.logger.Error("runtime panic during query evaluation", "expr", expr.String(), "err", e, "stacktrace", string(buf)) *errp = fmt.Errorf("unexpected error: %w", err) case errWithWarnings: *errp = err.err @@ -1090,10 +1106,10 @@ func (ev *evaluator) recover(expr parser.Expr, ws *annotations.Annotations, errp } } -func (ev *evaluator) Eval(expr parser.Expr) (v parser.Value, ws annotations.Annotations, err error) { +func (ev *evaluator) Eval(ctx context.Context, expr parser.Expr) (v parser.Value, ws annotations.Annotations, err error) { defer ev.recover(expr, &ws, &err) - v, ws = ev.eval(expr) + v, ws = ev.eval(ctx, expr) if ev.enableDelayedNameRemoval { ev.cleanupMetricLabels(v) } @@ -1144,7 +1160,7 @@ func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) { // function call results. // The prepSeries function (if provided) can be used to prepare the helper // for each series, then passed to each call funcCall. -func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper), funcCall func([]parser.Value, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) { +func (ev *evaluator) rangeEval(ctx context.Context, prepSeries func(labels.Labels, *EvalSeriesHelper), funcCall func([]parser.Value, [][]EvalSeriesHelper, *EvalNodeHelper) (Vector, annotations.Annotations), exprs ...parser.Expr) (Matrix, annotations.Annotations) { numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 matrixes := make([]Matrix, len(exprs)) origMatrixes := make([]Matrix, len(exprs)) @@ -1155,7 +1171,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) // Functions will take string arguments from the expressions, not the values. if e != nil && e.Type() != parser.ValueTypeString { // ev.currentSamples will be updated to the correct value within the ev.eval call. - val, ws := ev.eval(e) + val, ws := ev.eval(ctx, e) warnings.Merge(ws) matrixes[i] = val.(Matrix) @@ -1207,45 +1223,24 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) } for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } // Reset number of samples in memory after each timestamp. ev.currentSamples = tempNumSamples // Gather input vectors for this timestamp. for i := range exprs { - vectors[i] = vectors[i][:0] - + var bh []EvalSeriesHelper + var sh []EvalSeriesHelper if prepSeries != nil { - bufHelpers[i] = bufHelpers[i][:0] - } - - for si, series := range matrixes[i] { - switch { - case len(series.Floats) > 0 && series.Floats[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName}) - // Move input vectors forward so we don't have to re-scan the same - // past points at the next step. - matrixes[i][si].Floats = series.Floats[1:] - case len(series.Histograms) > 0 && series.Histograms[0].T == ts: - vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName}) - matrixes[i][si].Histograms = series.Histograms[1:] - default: - continue - } - if prepSeries != nil { - bufHelpers[i] = append(bufHelpers[i], seriesHelpers[i][si]) - } - // Don't add histogram size here because we only - // copy the pointer above, not the whole - // histogram. - ev.currentSamples++ - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } + bh = bufHelpers[i][:0] + sh = seriesHelpers[i] } + vectors[i], bh = ev.gatherVector(ts, matrixes[i], vectors[i], bh, sh) args[i] = vectors[i] - ev.samplesStats.UpdatePeak(ev.currentSamples) + if prepSeries != nil { + bufHelpers[i] = bh + } } // Make the function call. @@ -1317,7 +1312,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper) return mat, warnings } -func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping []string, inputMatrix Matrix, param float64) (Matrix, annotations.Annotations) { +func (ev *evaluator) rangeEvalAgg(ctx context.Context, aggExpr *parser.AggregateExpr, sortedGrouping []string, inputMatrix Matrix, param float64) (Matrix, annotations.Annotations) { // Keep a copy of the original point slice so that it can be returned to the pool. origMatrix := slices.Clone(inputMatrix) defer func() { @@ -1397,7 +1392,7 @@ func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping } for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } // Reset number of samples in memory after each timestamp. @@ -1446,14 +1441,87 @@ func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping return result, warnings } +// evalSeries generates a Matrix between ev.startTimestamp and ev.endTimestamp (inclusive), each point spaced ev.interval apart, from series given offset. +// For every storage.Series iterator in series, the method iterates in ev.interval sized steps from ev.startTimestamp until and including ev.endTimestamp, +// collecting every corresponding sample (obtained via ev.vectorSelectorSingle) into a Series. +// All of the generated Series are collected into a Matrix, that gets returned. +func (ev *evaluator) evalSeries(ctx context.Context, series []storage.Series, offset time.Duration, recordOrigT bool) Matrix { + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + + mat := make(Matrix, 0, len(series)) + var prevSS *Series + it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) + var chkIter chunkenc.Iterator + for _, s := range series { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + chkIter = s.Iterator(chkIter) + it.Reset(chkIter) + ss := Series{ + Metric: s.Labels(), + } + + for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { + step++ + origT, f, h, ok := ev.vectorSelectorSingle(it, offset, ts) + if !ok { + continue + } + + if h == nil { + ev.currentSamples++ + ev.samplesStats.IncrementSamplesAtStep(step, 1) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Floats == nil { + ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) + } + if recordOrigT { + // This is an info metric, where we want to track the original sample timestamp. + // Info metric values should be 1 by convention, therefore we can re-use this + // space in the sample. + f = float64(origT) + } + ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) + } else { + if recordOrigT { + ev.error(fmt.Errorf("this should be an info metric, with float samples: %s", ss.Metric)) + } + + point := HPoint{H: h, T: ts} + histSize := point.size() + ev.currentSamples += histSize + ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + if ss.Histograms == nil { + ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) + } + ss.Histograms = append(ss.Histograms, point) + } + } + + if len(ss.Floats)+len(ss.Histograms) > 0 { + mat = append(mat, ss) + prevSS = &mat[len(mat)-1] + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + return mat +} + // evalSubquery evaluates given SubqueryExpr and returns an equivalent // evaluated MatrixSelector in its place. Note that the Name and LabelMatchers are not set. -func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { +func (ev *evaluator) evalSubquery(ctx context.Context, subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, annotations.Annotations) { samplesStats := ev.samplesStats // Avoid double counting samples when running a subquery, those samples will be counted in later stage. ev.samplesStats = ev.samplesStats.NewChild() - val, ws := ev.eval(subq) - // But do incorporate the peak from the subquery + val, ws := ev.eval(ctx, subq) + // But do incorporate the peak from the subquery. samplesStats.UpdatePeakFromSubquery(ev.samplesStats) ev.samplesStats = samplesStats mat := val.(Matrix) @@ -1479,18 +1547,20 @@ func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSele } // eval evaluates the given expression as the given AST expression node requires. -func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotations) { +func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value, annotations.Annotations) { // This is the top-level evaluation method. // Thus, we check for timeout/cancellation here. - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 // Create a new span to help investigate inner evaluation performances. - ctxWithSpan, span := otel.Tracer("").Start(ev.ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String()) - ev.ctx = ctxWithSpan + ctx, span := otel.Tracer("").Start(ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String()) defer span.End() + if ss, ok := expr.(interface{ ShortString() string }); ok { + span.SetAttributes(attribute.String("operation", ss.ShortString())) + } switch e := expr.(type) { case *parser.AggregateExpr: @@ -1511,7 +1581,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio sortedGrouping = append(sortedGrouping, valueLabel.Val) slices.Sort(sortedGrouping) } - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.aggregationCountValues(e, sortedGrouping, valueLabel.Val, v[0].(Vector), enh) }, e.Expr) } @@ -1521,16 +1591,16 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio // param is the number k for topk/bottomk, or q for quantile. var fParam float64 if param != nil { - val, ws := ev.eval(param) + val, ws := ev.eval(ctx, param) warnings.Merge(ws) fParam = val.(Matrix)[0].Floats[0].F } // Now fetch the data to be aggregated. - val, ws := ev.eval(e.Expr) + val, ws := ev.eval(ctx, e.Expr) warnings.Merge(ws) inputMatrix := val.(Matrix) - result, ws := ev.rangeEvalAgg(e, sortedGrouping, inputMatrix, fParam) + result, ws := ev.rangeEvalAgg(ctx, e, sortedGrouping, inputMatrix, fParam) warnings.Merge(ws) ev.currentSamples = originalNumSamples + result.TotalSamples() ev.samplesStats.UpdatePeak(ev.currentSamples) @@ -1548,7 +1618,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio unwrapParenExpr(&arg) vs, ok := arg.(*parser.VectorSelector) if ok { - return ev.rangeEvalTimestampFunctionOverVectorSelector(vs, call, e) + return ev.rangeEvalTimestampFunctionOverVectorSelector(ctx, vs, call, e) } } @@ -1572,7 +1642,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio matrixArgIndex = i matrixArg = true // Replacing parser.SubqueryExpr with parser.MatrixSelector. - val, totalSamples, ws := ev.evalSubquery(subq) + val, totalSamples, ws := ev.evalSubquery(ctx, subq) e.Args[i] = val warnings.Merge(ws) defer func() { @@ -1587,14 +1657,16 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio // Special handling for functions that work on series not samples. switch e.Func.Name { case "label_replace": - return ev.evalLabelReplace(e.Args) + return ev.evalLabelReplace(ctx, e.Args) case "label_join": - return ev.evalLabelJoin(e.Args) + return ev.evalLabelJoin(ctx, e.Args) + case "info": + return ev.evalInfo(ctx, e.Args) } if !matrixArg { // Does not have a matrix argument. - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec, annos := call(v, e.Args, enh) return vec, warnings.Merge(annos) }, e.Args...) @@ -1606,7 +1678,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio otherInArgs := make([]Vector, len(e.Args)) for i, e := range e.Args { if i != matrixArgIndex { - val, ws := ev.eval(e) + val, ws := ev.eval(ctx, e) otherArgs[i] = val.(Matrix) otherInArgs[i] = Vector{Sample{}} inArgs[i] = otherInArgs[i] @@ -1620,7 +1692,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio sel := arg.(*parser.MatrixSelector) selVS := sel.VectorSelector.(*parser.VectorSelector) - ws, err := checkAndExpandSeriesSet(ev.ctx, sel) + ws, err := checkAndExpandSeriesSet(ctx, sel) warnings.Merge(ws) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), warnings}) @@ -1650,7 +1722,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio dropName := e.Func.Name != "last_over_time" for i, s := range selVS.Series { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } ev.currentSamples -= len(floats) + totalHPointSize(histograms) @@ -1731,9 +1803,8 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio ev.samplesStats.UpdatePeak(ev.currentSamples) if e.Func.Name == "rate" || e.Func.Name == "increase" { - samples := inMatrix[0] - metricName := samples.Metric.Get(labels.MetricName) - if metricName != "" && len(samples.Floats) > 0 && + metricName := inMatrix[0].Metric.Get(labels.MetricName) + if metricName != "" && len(ss.Floats) > 0 && !strings.HasSuffix(metricName, "_total") && !strings.HasSuffix(metricName, "_sum") && !strings.HasSuffix(metricName, "_count") && @@ -1796,10 +1867,10 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio return mat, warnings case *parser.ParenExpr: - return ev.eval(e.Expr) + return ev.eval(ctx, e.Expr) case *parser.UnaryExpr: - val, ws := ev.eval(e.Expr) + val, ws := ev.eval(ctx, e.Expr) mat := val.(Matrix) if e.Op == parser.SUB { for i := range mat { @@ -1810,6 +1881,9 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio for j := range mat[i].Floats { mat[i].Floats[j].F = -mat[i].Floats[j].F } + for j := range mat[i].Histograms { + mat[i].Histograms[j].H = mat[i].Histograms[j].H.Copy().Mul(-1) + } } if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() { ev.errorf("vector cannot contain metrics with the same labelset") @@ -1820,7 +1894,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio case *parser.BinaryExpr: switch lt, rt := e.LHS.Type(), e.RHS.Type(); { case lt == parser.ValueTypeScalar && rt == parser.ValueTypeScalar: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { val := scalarBinop(e.Op, v[0].(Vector)[0].F, v[1].(Vector)[0].F) return append(enh.Out, Sample{F: val}), nil }, e.LHS, e.RHS) @@ -1833,114 +1907,66 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio } switch e.Op { case parser.LAND: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorAnd(v[0].(Vector), v[1].(Vector), e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) case parser.LOR: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorOr(v[0].(Vector), v[1].(Vector), e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) case parser.LUNLESS: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return ev.VectorUnless(v[0].(Vector), v[1].(Vector), e.VectorMatching, sh[0], sh[1], enh), nil }, e.LHS, e.RHS) default: - return ev.rangeEval(initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh) + return ev.rangeEval(ctx, initSignatures, func(v []parser.Value, sh [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + vec, err := ev.VectorBinop(e.Op, v[0].(Vector), v[1].(Vector), e.VectorMatching, e.ReturnBool, sh[0], sh[1], enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case lt == parser.ValueTypeVector && rt == parser.ValueTypeScalar: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh) + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + vec, err := ev.VectorscalarBinop(e.Op, v[0].(Vector), Scalar{V: v[1].(Vector)[0].F}, false, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) case lt == parser.ValueTypeScalar && rt == parser.ValueTypeVector: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh) + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + vec, err := ev.VectorscalarBinop(e.Op, v[1].(Vector), Scalar{V: v[0].(Vector)[0].F}, true, e.ReturnBool, enh, e.PositionRange()) return vec, handleVectorBinopError(err, e) }, e.LHS, e.RHS) } case *parser.NumberLiteral: - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + span.SetAttributes(attribute.Float64("value", e.Val)) + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return append(enh.Out, Sample{F: e.Val, Metric: labels.EmptyLabels()}), nil }) case *parser.StringLiteral: + span.SetAttributes(attribute.String("value", e.Val)) return String{V: e.Val, T: ev.startTimestamp}, nil case *parser.VectorSelector: - ws, err := checkAndExpandSeriesSet(ev.ctx, e) + ws, err := checkAndExpandSeriesSet(ctx, e) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } - mat := make(Matrix, 0, len(e.Series)) - var prevSS *Series - it := storage.NewMemoizedEmptyIterator(durationMilliseconds(ev.lookbackDelta)) - var chkIter chunkenc.Iterator - for i, s := range e.Series { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { - ev.error(err) - } - chkIter = s.Iterator(chkIter) - it.Reset(chkIter) - ss := Series{ - Metric: e.Series[i].Labels(), - } - - for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { - step++ - _, f, h, ok := ev.vectorSelectorSingle(it, e, ts) - if ok { - if h == nil { - ev.currentSamples++ - ev.samplesStats.IncrementSamplesAtStep(step, 1) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Floats == nil { - ss.Floats = reuseOrGetFPointSlices(prevSS, numSteps) - } - ss.Floats = append(ss.Floats, FPoint{F: f, T: ts}) - } else { - point := HPoint{H: h, T: ts} - histSize := point.size() - ev.currentSamples += histSize - ev.samplesStats.IncrementSamplesAtStep(step, int64(histSize)) - if ev.currentSamples > ev.maxSamples { - ev.error(ErrTooManySamples(env)) - } - if ss.Histograms == nil { - ss.Histograms = reuseOrGetHPointSlices(prevSS, numSteps) - } - ss.Histograms = append(ss.Histograms, point) - } - } - } - - if len(ss.Floats)+len(ss.Histograms) > 0 { - mat = append(mat, ss) - prevSS = &mat[len(mat)-1] - } - } - ev.samplesStats.UpdatePeak(ev.currentSamples) + mat := ev.evalSeries(ctx, e.Series, e.Offset, false) return mat, ws case *parser.MatrixSelector: if ev.startTimestamp != ev.endTimestamp { panic(errors.New("cannot do range evaluation of matrix selector")) } - return ev.matrixSelector(e) + return ev.matrixSelector(ctx, e) case *parser.SubqueryExpr: offsetMillis := durationMilliseconds(e.Offset) rangeMillis := durationMilliseconds(e.Range) newEv := &evaluator{ endTimestamp: ev.endTimestamp - offsetMillis, - ctx: ev.ctx, currentSamples: ev.currentSamples, maxSamples: ev.maxSamples, logger: ev.logger, @@ -1948,6 +1974,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } if e.Step != 0 { @@ -1959,7 +1986,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio // Start with the first timestamp after (ev.startTimestamp - offset - range) // that is aligned with the step (multiple of 'newEv.interval'). newEv.startTimestamp = newEv.interval * ((ev.startTimestamp - offsetMillis - rangeMillis) / newEv.interval) - if newEv.startTimestamp < (ev.startTimestamp - offsetMillis - rangeMillis) { + if newEv.startTimestamp <= (ev.startTimestamp - offsetMillis - rangeMillis) { newEv.startTimestamp += newEv.interval } @@ -1970,7 +1997,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio setOffsetForAtModifier(newEv.startTimestamp, e.Expr) } - res, ws := newEv.eval(e.Expr) + res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples ev.samplesStats.UpdatePeakFromSubquery(newEv.samplesStats) ev.samplesStats.IncrementSamplesAtTimestamp(ev.endTimestamp, newEv.samplesStats.TotalSamples) @@ -1978,14 +2005,13 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio case *parser.StepInvariantExpr: switch ce := e.Expr.(type) { case *parser.StringLiteral, *parser.NumberLiteral: - return ev.eval(ce) + return ev.eval(ctx, ce) } newEv := &evaluator{ startTimestamp: ev.startTimestamp, endTimestamp: ev.startTimestamp, // Always a single evaluation. interval: ev.interval, - ctx: ev.ctx, currentSamples: ev.currentSamples, maxSamples: ev.maxSamples, logger: ev.logger, @@ -1993,8 +2019,9 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio samplesStats: ev.samplesStats.NewChild(), noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn, enableDelayedNameRemoval: ev.enableDelayedNameRemoval, + querier: ev.querier, } - res, ws := newEv.eval(e.Expr) + res, ws := newEv.eval(ctx, e.Expr) ev.currentSamples = newEv.currentSamples ev.samplesStats.UpdatePeakFromSubquery(newEv.samplesStats) for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval { @@ -2017,7 +2044,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio } for i := range mat { if len(mat[i].Floats)+len(mat[i].Histograms) != 1 { - panic(fmt.Errorf("unexpected number of samples")) + panic(errors.New("unexpected number of samples")) } for ts := ev.startTimestamp + ev.interval; ts <= ev.endTimestamp; ts += ev.interval { if len(mat[i].Floats) > 0 { @@ -2070,8 +2097,8 @@ func reuseOrGetFPointSlices(prevSS *Series, numSteps int) (r []FPoint) { return getFPointSlice(numSteps) } -func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.VectorSelector, call FunctionCall, e *parser.Call) (parser.Value, annotations.Annotations) { - ws, err := checkAndExpandSeriesSet(ev.ctx, vs) +func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(ctx context.Context, vs *parser.VectorSelector, call FunctionCall, e *parser.Call) (parser.Value, annotations.Annotations) { + ws, err := checkAndExpandSeriesSet(ctx, vs) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } @@ -2079,10 +2106,10 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec seriesIterators := make([]*storage.MemoizedSeriesIterator, len(vs.Series)) for i, s := range vs.Series { it := s.Iterator(nil) - seriesIterators[i] = storage.NewMemoizedIterator(it, durationMilliseconds(ev.lookbackDelta)) + seriesIterators[i] = storage.NewMemoizedIterator(it, durationMilliseconds(ev.lookbackDelta)-1) } - return ev.rangeEval(nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + return ev.rangeEval(ctx, nil, func(v []parser.Value, _ [][]EvalSeriesHelper, enh *EvalNodeHelper) (Vector, annotations.Annotations) { if vs.Timestamp != nil { // This is a special case for "timestamp()" when the @ modifier is used, to ensure that // we return a point for each time step in this case. @@ -2093,7 +2120,7 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec vec := make(Vector, 0, len(vs.Series)) for i, s := range vs.Series { it := seriesIterators[i] - t, _, _, ok := ev.vectorSelectorSingle(it, vs, enh.Ts) + t, _, _, ok := ev.vectorSelectorSingle(it, vs.Offset, enh.Ts) if !ok { continue } @@ -2117,10 +2144,10 @@ func (ev *evaluator) rangeEvalTimestampFunctionOverVectorSelector(vs *parser.Vec } // vectorSelectorSingle evaluates an instant vector for the iterator of one time series. -func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, node *parser.VectorSelector, ts int64) ( +func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, offset time.Duration, ts int64) ( int64, float64, *histogram.FloatHistogram, bool, ) { - refTime := ts - durationMilliseconds(node.Offset) + refTime := ts - durationMilliseconds(offset) var t int64 var v float64 var h *histogram.FloatHistogram @@ -2141,7 +2168,7 @@ func (ev *evaluator) vectorSelectorSingle(it *storage.MemoizedSeriesIterator, no if valueType == chunkenc.ValNone || t > refTime { var ok bool t, v, h, ok = it.PeekPrev() - if !ok || t < refTime-durationMilliseconds(ev.lookbackDelta) { + if !ok || t <= refTime-durationMilliseconds(ev.lookbackDelta) { return 0, 0, nil, false } } @@ -2218,7 +2245,7 @@ func putMatrixSelectorHPointSlice(p []HPoint) { } // matrixSelector evaluates a *parser.MatrixSelector expression. -func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annotations.Annotations) { +func (ev *evaluator) matrixSelector(ctx context.Context, node *parser.MatrixSelector) (Matrix, annotations.Annotations) { var ( vs = node.VectorSelector.(*parser.VectorSelector) @@ -2229,7 +2256,7 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annota it = storage.NewBuffer(durationMilliseconds(node.Range)) ) - ws, err := checkAndExpandSeriesSet(ev.ctx, node) + ws, err := checkAndExpandSeriesSet(ctx, node) if err != nil { ev.error(errWithWarnings{fmt.Errorf("expanding series: %w", err), ws}) } @@ -2237,7 +2264,7 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, annota var chkIter chunkenc.Iterator series := vs.Series for i, s := range series { - if err := contextDone(ev.ctx, "expression evaluation"); err != nil { + if err := contextDone(ctx, "expression evaluation"); err != nil { ev.error(err) } chkIter = s.Iterator(chkIter) @@ -2275,20 +2302,20 @@ func (ev *evaluator) matrixIterSlice( mintFloats, mintHistograms := mint, mint // First floats... - if len(floats) > 0 && floats[len(floats)-1].T >= mint { + if len(floats) > 0 && floats[len(floats)-1].T > mint { // There is an overlap between previous and current ranges, retain common // points. In most such cases: // (a) the overlap is significantly larger than the eval step; and/or // (b) the number of samples is relatively small. // so a linear search will be as fast as a binary search. var drop int - for drop = 0; floats[drop].T < mint; drop++ { + for drop = 0; floats[drop].T <= mint; drop++ { } ev.currentSamples -= drop copy(floats, floats[drop:]) floats = floats[:len(floats)-drop] // Only append points with timestamps after the last timestamp we have. - mintFloats = floats[len(floats)-1].T + 1 + mintFloats = floats[len(floats)-1].T } else { ev.currentSamples -= len(floats) if floats != nil { @@ -2297,14 +2324,14 @@ func (ev *evaluator) matrixIterSlice( } // ...then the same for histograms. TODO(beorn7): Use generics? - if len(histograms) > 0 && histograms[len(histograms)-1].T >= mint { + if len(histograms) > 0 && histograms[len(histograms)-1].T > mint { // There is an overlap between previous and current ranges, retain common // points. In most such cases: // (a) the overlap is significantly larger than the eval step; and/or // (b) the number of samples is relatively small. // so a linear search will be as fast as a binary search. var drop int - for drop = 0; histograms[drop].T < mint; drop++ { + for drop = 0; histograms[drop].T <= mint; drop++ { } // Rotate the buffer around the drop index so that points before mint can be // reused to store new histograms. @@ -2315,7 +2342,7 @@ func (ev *evaluator) matrixIterSlice( histograms = histograms[:len(histograms)-drop] ev.currentSamples -= totalHPointSize(histograms) // Only append points with timestamps after the last timestamp we have. - mintHistograms = histograms[len(histograms)-1].T + 1 + mintHistograms = histograms[len(histograms)-1].T } else { ev.currentSamples -= totalHPointSize(histograms) if histograms != nil { @@ -2339,7 +2366,7 @@ loop: case chunkenc.ValFloatHistogram, chunkenc.ValHistogram: t := buf.AtT() // Values in the buffer are guaranteed to be smaller than maxt. - if t >= mintHistograms { + if t > mintHistograms { if histograms == nil { histograms = getMatrixSelectorHPoints() } @@ -2365,7 +2392,7 @@ loop: continue loop } // Values in the buffer are guaranteed to be smaller than maxt. - if t >= mintFloats { + if t > mintFloats { ev.currentSamples++ if ev.currentSamples > ev.maxSamples { ev.error(ErrTooManySamples(env)) @@ -2500,7 +2527,7 @@ func (ev *evaluator) VectorUnless(lhs, rhs Vector, matching *parser.VectorMatchi } // VectorBinop evaluates a binary operation between two Vectors, excluding set operators. -func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *parser.VectorMatching, returnBool bool, lhsh, rhsh []EvalSeriesHelper, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { if matching.Card == parser.CardManyToMany { panic("many-to-many only allowed for set operators") } @@ -2574,12 +2601,14 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching * fl, fr = fr, fl hl, hr = hr, hl } - floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr) + floatValue, histogramValue, keep, err := vectorElemBinop(op, fl, fr, hl, hr, pos) if err != nil { lastErr = err + continue } switch { case returnBool: + histogramValue = nil if keep { floatValue = 1.0 } else { @@ -2683,7 +2712,7 @@ func resultMetric(lhs, rhs labels.Labels, op parser.ItemType, matching *parser.V } // VectorscalarBinop evaluates a binary operation between a Vector and a Scalar. -func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper) (Vector, error) { +func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scalar, swap, returnBool bool, enh *EvalNodeHelper, pos posrange.PositionRange) (Vector, error) { var lastErr error for _, lhsSample := range lhs { lf, rf := lhsSample.F, rhs.V @@ -2695,9 +2724,10 @@ func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scala lf, rf = rf, lf lh, rh = rh, lh } - float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh) + float, histogram, keep, err := vectorElemBinop(op, lf, rf, lh, rh, pos) if err != nil { lastErr = err + continue } // Catch cases where the scalar is the LHS in a scalar-vector comparison operation. // We want to always keep the vector element value as the output value, even if it's on the RHS. @@ -2762,57 +2792,85 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 { } // vectorElemBinop evaluates a binary operation between two Vector elements. -func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram) (float64, *histogram.FloatHistogram, bool, error) { - switch op { - case parser.ADD: - if hlhs != nil && hrhs != nil { - res, err := hlhs.Copy().Add(hrhs) - if err != nil { - return 0, nil, false, err +func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram.FloatHistogram, pos posrange.PositionRange) (float64, *histogram.FloatHistogram, bool, error) { + opName := parser.ItemTypeStr[op] + switch { + case hlhs == nil && hrhs == nil: + { + switch op { + case parser.ADD: + return lhs + rhs, nil, true, nil + case parser.SUB: + return lhs - rhs, nil, true, nil + case parser.MUL: + return lhs * rhs, nil, true, nil + case parser.DIV: + return lhs / rhs, nil, true, nil + case parser.POW: + return math.Pow(lhs, rhs), nil, true, nil + case parser.MOD: + return math.Mod(lhs, rhs), nil, true, nil + case parser.EQLC: + return lhs, nil, lhs == rhs, nil + case parser.NEQ: + return lhs, nil, lhs != rhs, nil + case parser.GTR: + return lhs, nil, lhs > rhs, nil + case parser.LSS: + return lhs, nil, lhs < rhs, nil + case parser.GTE: + return lhs, nil, lhs >= rhs, nil + case parser.LTE: + return lhs, nil, lhs <= rhs, nil + case parser.ATAN2: + return math.Atan2(lhs, rhs), nil, true, nil } - return 0, res.Compact(0), true, nil } - return lhs + rhs, nil, true, nil - case parser.SUB: - if hlhs != nil && hrhs != nil { - res, err := hlhs.Copy().Sub(hrhs) - if err != nil { - return 0, nil, false, err + case hlhs == nil && hrhs != nil: + { + switch op { + case parser.MUL: + return 0, hrhs.Copy().Mul(lhs).Compact(0), true, nil + case parser.ADD, parser.SUB, parser.DIV, parser.POW, parser.MOD, parser.EQLC, parser.NEQ, parser.GTR, parser.LSS, parser.GTE, parser.LTE, parser.ATAN2: + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("float", opName, "histogram", pos) } - return 0, res.Compact(0), true, nil } - return lhs - rhs, nil, true, nil - case parser.MUL: - if hlhs != nil && hrhs == nil { - return 0, hlhs.Copy().Mul(rhs), true, nil + case hlhs != nil && hrhs == nil: + { + switch op { + case parser.MUL: + return 0, hlhs.Copy().Mul(rhs).Compact(0), true, nil + case parser.DIV: + return 0, hlhs.Copy().Div(rhs).Compact(0), true, nil + case parser.ADD, parser.SUB, parser.POW, parser.MOD, parser.EQLC, parser.NEQ, parser.GTR, parser.LSS, parser.GTE, parser.LTE, parser.ATAN2: + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", opName, "float", pos) + } } - if hlhs == nil && hrhs != nil { - return 0, hrhs.Copy().Mul(lhs), true, nil + case hlhs != nil && hrhs != nil: + { + switch op { + case parser.ADD: + res, err := hlhs.Copy().Add(hrhs) + if err != nil { + return 0, nil, false, err + } + return 0, res.Compact(0), true, nil + case parser.SUB: + res, err := hlhs.Copy().Sub(hrhs) + if err != nil { + return 0, nil, false, err + } + return 0, res.Compact(0), true, nil + case parser.EQLC: + // This operation expects that both histograms are compacted. + return 0, hlhs, hlhs.Equals(hrhs), nil + case parser.NEQ: + // This operation expects that both histograms are compacted. + return 0, hlhs, !hlhs.Equals(hrhs), nil + case parser.MUL, parser.DIV, parser.POW, parser.MOD, parser.GTR, parser.LSS, parser.GTE, parser.LTE, parser.ATAN2: + return 0, nil, false, annotations.NewIncompatibleTypesInBinOpInfo("histogram", opName, "histogram", pos) + } } - return lhs * rhs, nil, true, nil - case parser.DIV: - if hlhs != nil && hrhs == nil { - return 0, hlhs.Copy().Div(rhs), true, nil - } - return lhs / rhs, nil, true, nil - case parser.POW: - return math.Pow(lhs, rhs), nil, true, nil - case parser.MOD: - return math.Mod(lhs, rhs), nil, true, nil - case parser.EQLC: - return lhs, nil, lhs == rhs, nil - case parser.NEQ: - return lhs, nil, lhs != rhs, nil - case parser.GTR: - return lhs, nil, lhs > rhs, nil - case parser.LSS: - return lhs, nil, lhs < rhs, nil - case parser.GTE: - return lhs, nil, lhs >= rhs, nil - case parser.LTE: - return lhs, nil, lhs <= rhs, nil - case parser.ATAN2: - return math.Atan2(lhs, rhs), nil, true, nil } panic(fmt.Errorf("operator %q not allowed for operations between Vectors", op)) } @@ -2870,12 +2928,38 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix group.hasHistogram = true } case parser.STDVAR, parser.STDDEV: - group.floatValue = 0 + switch { + case h != nil: + // Ignore histograms for STDVAR and STDDEV. + group.seen = false + if op == parser.STDVAR { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("stdvar", e.Expr.PositionRange())) + } else { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("stddev", e.Expr.PositionRange())) + } + case math.IsNaN(f), math.IsInf(f, 0): + group.floatValue = math.NaN() + default: + group.floatValue = 0 + } case parser.QUANTILE: + if h != nil { + group.seen = false + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("quantile", e.Expr.PositionRange())) + } group.heap = make(vectorByValueHeap, 1) group.heap[0] = Sample{F: f} case parser.GROUP: group.floatValue = 1 + case parser.MIN, parser.MAX: + if h != nil { + group.seen = false + if op == parser.MIN { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("min", e.Expr.PositionRange())) + } else { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("max", e.Expr.PositionRange())) + } + } } continue } @@ -2974,11 +3058,19 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix // Do nothing. Required to avoid the panic in `default:` below. case parser.MAX: + if h != nil { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("max", e.Expr.PositionRange())) + continue + } if group.floatValue < f || math.IsNaN(group.floatValue) { group.floatValue = f } case parser.MIN: + if h != nil { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("min", e.Expr.PositionRange())) + continue + } if group.floatValue > f || math.IsNaN(group.floatValue) { group.floatValue = f } @@ -2992,9 +3084,19 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix delta := f - group.floatMean group.floatMean += delta / group.groupCount group.floatValue += delta * (f - group.floatMean) + } else { + if op == parser.STDVAR { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("stdvar", e.Expr.PositionRange())) + } else { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("stddev", e.Expr.PositionRange())) + } } case parser.QUANTILE: + if h != nil { + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("quantile", e.Expr.PositionRange())) + continue + } group.heap = append(group.heap, Sample{F: f}) default: @@ -3082,18 +3184,19 @@ func (ev *evaluator) aggregationK(e *parser.AggregateExpr, k int, r float64, inp seriesLoop: for si := range inputMatrix { - f, _, ok := ev.nextValues(enh.Ts, &inputMatrix[si]) + f, h, ok := ev.nextValues(enh.Ts, &inputMatrix[si]) if !ok { continue } - s = Sample{Metric: inputMatrix[si].Metric, F: f, DropName: inputMatrix[si].DropName} + s = Sample{Metric: inputMatrix[si].Metric, F: f, H: h, DropName: inputMatrix[si].DropName} group := &groups[seriesToResult[si]] // Initialize this group if it's the first time we've seen it. if !group.seen { // LIMIT_RATIO is a special case, as we may not add this very sample to the heap, // while we also don't know the final size of it. - if op == parser.LIMIT_RATIO { + switch op { + case parser.LIMIT_RATIO: *group = groupedAggregation{ seen: true, heap: make(vectorByValueHeap, 0), @@ -3101,12 +3204,34 @@ seriesLoop: if ratiosampler.AddRatioSample(r, &s) { heap.Push(&group.heap, &s) } - } else { + case parser.LIMITK: *group = groupedAggregation{ seen: true, heap: make(vectorByValueHeap, 1, k), } group.heap[0] = s + case parser.TOPK: + *group = groupedAggregation{ + seen: true, + heap: make(vectorByValueHeap, 0, k), + } + if s.H != nil { + group.seen = false + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("topk", e.PosRange)) + } else { + heap.Push(&group.heap, &s) + } + case parser.BOTTOMK: + *group = groupedAggregation{ + seen: true, + heap: make(vectorByValueHeap, 0, k), + } + if s.H != nil { + group.seen = false + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("bottomk", e.PosRange)) + } else { + heap.Push(&group.heap, &s) + } } continue } @@ -3115,6 +3240,9 @@ seriesLoop: case parser.TOPK: // We build a heap of up to k elements, with the smallest element at heap[0]. switch { + case s.H != nil: + // Ignore histogram sample and add info annotation. + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("topk", e.PosRange)) case len(group.heap) < k: heap.Push(&group.heap, &s) case group.heap[0].F < s.F || (math.IsNaN(group.heap[0].F) && !math.IsNaN(s.F)): @@ -3128,6 +3256,9 @@ seriesLoop: case parser.BOTTOMK: // We build a heap of up to k elements, with the biggest element at heap[0]. switch { + case s.H != nil: + // Ignore histogram sample and add info annotation. + annos.Add(annotations.NewHistogramIgnoredInAggregationInfo("bottomk", e.PosRange)) case len(group.heap) < k: heap.Push((*vectorByReverseValueHeap)(&group.heap), &s) case group.heap[0].F > s.F || (math.IsNaN(group.heap[0].F) && !math.IsNaN(s.F)): @@ -3170,10 +3301,14 @@ seriesLoop: mat = make(Matrix, 0, len(groups)) } - add := func(lbls labels.Labels, f float64, dropName bool) { + add := func(lbls labels.Labels, f float64, h *histogram.FloatHistogram, dropName bool) { // If this could be an instant query, add directly to the matrix so the result is in consistent order. if ev.endTimestamp == ev.startTimestamp { - mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}, DropName: dropName}) + if h != nil { + mat = append(mat, Series{Metric: lbls, Histograms: []HPoint{{T: enh.Ts, H: h}}, DropName: dropName}) + } else { + mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}, DropName: dropName}) + } } else { // Otherwise the results are added into seriess elements. hash := lbls.Hash() @@ -3181,7 +3316,7 @@ seriesLoop: if !ok { ss = Series{Metric: lbls, DropName: dropName} } - addToSeries(&ss, enh.Ts, f, nil, numSteps) + addToSeries(&ss, enh.Ts, f, h, numSteps) seriess[hash] = ss } } @@ -3196,7 +3331,7 @@ seriesLoop: sort.Sort(sort.Reverse(aggr.heap)) } for _, v := range aggr.heap { - add(v.Metric, v.F, v.DropName) + add(v.Metric, v.F, v.H, v.DropName) } case parser.BOTTOMK: @@ -3205,12 +3340,12 @@ seriesLoop: sort.Sort(sort.Reverse((*vectorByReverseValueHeap)(&aggr.heap))) } for _, v := range aggr.heap { - add(v.Metric, v.F, v.DropName) + add(v.Metric, v.F, v.H, v.DropName) } case parser.LIMITK, parser.LIMIT_RATIO: for _, v := range aggr.heap { - add(v.Metric, v.F, v.DropName) + add(v.Metric, v.F, v.H, v.DropName) } } } @@ -3230,7 +3365,11 @@ func (ev *evaluator) aggregationCountValues(e *parser.AggregateExpr, grouping [] var buf []byte for _, s := range vec { enh.resetBuilder(s.Metric) - enh.lb.Set(valueLabel, strconv.FormatFloat(s.F, 'f', -1, 64)) + if s.H == nil { + enh.lb.Set(valueLabel, strconv.FormatFloat(s.F, 'f', -1, 64)) + } else { + enh.lb.Set(valueLabel, s.H.String()) + } metric := enh.lb.Labels() // Considering the count_values() @@ -3331,6 +3470,9 @@ func handleVectorBinopError(err error, e *parser.BinaryExpr) annotations.Annotat } metricName := "" pos := e.PositionRange() + if errors.Is(err, annotations.PromQLInfo) || errors.Is(err, annotations.PromQLWarning) { + return annotations.New().Add(err) + } if errors.Is(err, histogram.ErrHistogramsIncompatibleSchema) { return annotations.New().Add(annotations.NewMixedExponentialCustomHistogramsWarning(metricName, pos)) } else if errors.Is(err, histogram.ErrHistogramsIncompatibleBounds) { @@ -3339,7 +3481,7 @@ func handleVectorBinopError(err error, e *parser.BinaryExpr) annotations.Annotat return nil } -// groupingKey builds and returns the grouping key for the given metric and +// generateGroupingKey builds and returns the grouping key for the given metric and // grouping labels. func generateGroupingKey(metric labels.Labels, grouping []string, without bool, buf []byte) (uint64, []byte) { if without { @@ -3564,7 +3706,7 @@ func detectHistogramStatsDecoding(expr parser.Expr) { if n, ok := node.(*parser.BinaryExpr); ok { detectHistogramStatsDecoding(n.LHS) detectHistogramStatsDecoding(n.RHS) - return fmt.Errorf("stop") + return errors.New("stop") } n, ok := (node).(*parser.VectorSelector) @@ -3586,7 +3728,7 @@ func detectHistogramStatsDecoding(expr parser.Expr) { break } } - return fmt.Errorf("stop") + return errors.New("stop") }) } @@ -3657,3 +3799,41 @@ func newHistogramStatsSeries(series storage.Series) *histogramStatsSeries { func (s histogramStatsSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { return NewHistogramStatsIterator(s.Series.Iterator(it)) } + +// gatherVector gathers a Vector for ts from the series in input. +// output is used as a buffer. +// If bufHelpers and seriesHelpers are provided, seriesHelpers[i] is appended to bufHelpers for every input index i. +// The gathered Vector and bufHelper are returned. +func (ev *evaluator) gatherVector(ts int64, input Matrix, output Vector, bufHelpers, seriesHelpers []EvalSeriesHelper) (Vector, []EvalSeriesHelper) { + output = output[:0] + for i, series := range input { + switch { + case len(series.Floats) > 0 && series.Floats[0].T == ts: + s := series.Floats[0] + output = append(output, Sample{Metric: series.Metric, F: s.F, T: ts, DropName: series.DropName}) + // Move input vectors forward so we don't have to re-scan the same + // past points at the next step. + input[i].Floats = series.Floats[1:] + case len(series.Histograms) > 0 && series.Histograms[0].T == ts: + s := series.Histograms[0] + output = append(output, Sample{Metric: series.Metric, H: s.H, T: ts, DropName: series.DropName}) + input[i].Histograms = series.Histograms[1:] + default: + continue + } + if len(seriesHelpers) > 0 { + bufHelpers = append(bufHelpers, seriesHelpers[i]) + } + + // Don't add histogram size here because we only + // copy the pointer above, not the whole + // histogram. + ev.currentSamples++ + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + } + ev.samplesStats.UpdatePeak(ev.currentSamples) + + return output, bufHelpers +} diff --git a/promql/engine_internal_test.go b/promql/engine_internal_test.go index cb501b2fdf..0962c218c7 100644 --- a/promql/engine_internal_test.go +++ b/promql/engine_internal_test.go @@ -14,22 +14,21 @@ package promql import ( + "bytes" "errors" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/util/annotations" ) func TestRecoverEvaluatorRuntime(t *testing.T) { - var output []interface{} - logger := log.Logger(log.LoggerFunc(func(keyvals ...interface{}) error { - output = append(output, keyvals...) - return nil - })) + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) ev := &evaluator{logger: logger} expr, _ := parser.ParseExpr("sum(up)") @@ -38,7 +37,7 @@ func TestRecoverEvaluatorRuntime(t *testing.T) { defer func() { require.EqualError(t, err, "unexpected error: runtime error: index out of range [123] with length 0") - require.Contains(t, output, "sum(up)") + require.Contains(t, output.String(), "sum(up)") }() defer ev.recover(expr, nil, &err) @@ -48,7 +47,7 @@ func TestRecoverEvaluatorRuntime(t *testing.T) { } func TestRecoverEvaluatorError(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} + ev := &evaluator{logger: promslog.NewNopLogger()} var err error e := errors.New("custom error") @@ -62,7 +61,7 @@ func TestRecoverEvaluatorError(t *testing.T) { } func TestRecoverEvaluatorErrorWithWarnings(t *testing.T) { - ev := &evaluator{logger: log.NewNopLogger()} + ev := &evaluator{logger: promslog.NewNopLogger()} var err error var ws annotations.Annotations diff --git a/promql/engine_test.go b/promql/engine_test.go index 947c0e1ed8..8b8dc3909c 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -17,8 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" + "math" "sort" - "strconv" + "strings" "sync" "testing" "time" @@ -28,14 +30,15 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/stats" - "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -321,271 +324,271 @@ func TestSelectHintsSetCorrectly(t *testing.T) { { query: "foo", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000}, + {Start: 5001, End: 10000}, }, }, { query: "foo @ 15", start: 10000, expected: []*storage.SelectHints{ - {Start: 10000, End: 15000}, + {Start: 10001, End: 15000}, }, }, { query: "foo @ 1", start: 10000, expected: []*storage.SelectHints{ - {Start: -4000, End: 1000}, + {Start: -3999, End: 1000}, }, }, { query: "foo[2m]", start: 200000, expected: []*storage.SelectHints{ - {Start: 80000, End: 200000, Range: 120000}, + {Start: 80001, End: 200000, Range: 120000}, }, }, { query: "foo[2m] @ 180", start: 200000, expected: []*storage.SelectHints{ - {Start: 60000, End: 180000, Range: 120000}, + {Start: 60001, End: 180000, Range: 120000}, }, }, { query: "foo[2m] @ 300", start: 200000, expected: []*storage.SelectHints{ - {Start: 180000, End: 300000, Range: 120000}, + {Start: 180001, End: 300000, Range: 120000}, }, }, { query: "foo[2m] @ 60", start: 200000, expected: []*storage.SelectHints{ - {Start: -60000, End: 60000, Range: 120000}, + {Start: -59999, End: 60000, Range: 120000}, }, }, { query: "foo[2m] offset 2m", start: 300000, expected: []*storage.SelectHints{ - {Start: 60000, End: 180000, Range: 120000}, + {Start: 60001, End: 180000, Range: 120000}, }, }, { query: "foo[2m] @ 200 offset 2m", start: 300000, expected: []*storage.SelectHints{ - {Start: -40000, End: 80000, Range: 120000}, + {Start: -39999, End: 80000, Range: 120000}, }, }, { query: "foo[2m:1s]", start: 300000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Step: 1000}, + {Start: 175001, End: 300000, Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s])", start: 300000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 300000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 300)", start: 200000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 300000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 200)", start: 200000, expected: []*storage.SelectHints{ - {Start: 75000, End: 200000, Func: "count_over_time", Step: 1000}, + {Start: 75001, End: 200000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 100)", start: 200000, expected: []*storage.SelectHints{ - {Start: -25000, End: 100000, Func: "count_over_time", Step: 1000}, + {Start: -24999, End: 100000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 165000, End: 290000, Func: "count_over_time", Step: 1000}, + {Start: 165001, End: 290000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 155000, End: 280000, Func: "count_over_time", Step: 1000}, + {Start: 155001, End: 280000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, expected: []*storage.SelectHints{ - {Start: -45000, End: 80000, Func: "count_over_time", Step: 1000}, + {Start: -44999, End: 80000, Func: "count_over_time", Step: 1000}, }, }, { query: "foo", start: 10000, end: 20000, expected: []*storage.SelectHints{ - {Start: 5000, End: 20000, Step: 1000}, + {Start: 5001, End: 20000, Step: 1000}, }, }, { query: "foo @ 15", start: 10000, end: 20000, expected: []*storage.SelectHints{ - {Start: 10000, End: 15000, Step: 1000}, + {Start: 10001, End: 15000, Step: 1000}, }, }, { query: "foo @ 1", start: 10000, end: 20000, expected: []*storage.SelectHints{ - {Start: -4000, End: 1000, Step: 1000}, + {Start: -3999, End: 1000, Step: 1000}, }, }, { query: "rate(foo[2m] @ 180)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 60000, End: 180000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 60001, End: 180000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m] @ 300)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 180000, End: 300000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 180001, End: 300000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m] @ 60)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: -60000, End: 60000, Range: 120000, Func: "rate", Step: 1000}, + {Start: -59999, End: 60000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m])", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 80000, End: 500000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 80001, End: 500000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m] offset 2m)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 60000, End: 380000, Range: 120000, Func: "rate", Step: 1000}, + {Start: 60001, End: 380000, Range: 120000, Func: "rate", Step: 1000}, }, }, { query: "rate(foo[2m:1s])", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 175000, End: 500000, Func: "rate", Step: 1000}, + {Start: 175001, End: 500000, Func: "rate", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s])", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 175000, End: 500000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 500000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 165000, End: 490000, Func: "count_over_time", Step: 1000}, + {Start: 165001, End: 490000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 300)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 175000, End: 300000, Func: "count_over_time", Step: 1000}, + {Start: 175001, End: 300000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 200)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: 75000, End: 200000, Func: "count_over_time", Step: 1000}, + {Start: 75001, End: 200000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time(foo[2m:1s] @ 100)", start: 200000, end: 500000, expected: []*storage.SelectHints{ - {Start: -25000, End: 100000, Func: "count_over_time", Step: 1000}, + {Start: -24999, End: 100000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 155000, End: 480000, Func: "count_over_time", Step: 1000}, + {Start: 155001, End: 480000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { // When the @ is on the vector selector, the enclosing subquery parameters // don't affect the hint ranges. query: "count_over_time((foo @ 200 offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: 185000, End: 190000, Func: "count_over_time", Step: 1000}, + {Start: 185001, End: 190000, Func: "count_over_time", Step: 1000}, }, }, { query: "count_over_time((foo offset 10s)[2m:1s] @ 100 offset 10s)", start: 300000, end: 500000, expected: []*storage.SelectHints{ - {Start: -45000, End: 80000, Func: "count_over_time", Step: 1000}, + {Start: -44999, End: 80000, Func: "count_over_time", Step: 1000}, }, }, { query: "sum by (dim1) (foo)", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000, Func: "sum", By: true, Grouping: []string{"dim1"}}, + {Start: 5001, End: 10000, Func: "sum", By: true, Grouping: []string{"dim1"}}, }, }, { query: "sum without (dim1) (foo)", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000, Func: "sum", Grouping: []string{"dim1"}}, + {Start: 5001, End: 10000, Func: "sum", Grouping: []string{"dim1"}}, }, }, { query: "sum by (dim1) (avg_over_time(foo[1s]))", start: 10000, expected: []*storage.SelectHints{ - {Start: 9000, End: 10000, Func: "avg_over_time", Range: 1000}, + {Start: 9001, End: 10000, Func: "avg_over_time", Range: 1000}, }, }, { query: "sum by (dim1) (max by (dim2) (foo))", start: 10000, expected: []*storage.SelectHints{ - {Start: 5000, End: 10000, Func: "max", By: true, Grouping: []string{"dim2"}}, + {Start: 5001, End: 10000, Func: "max", By: true, Grouping: []string{"dim2"}}, }, }, { query: "(max by (dim1) (foo))[5s:1s]", start: 10000, expected: []*storage.SelectHints{ - {Start: 0, End: 10000, Func: "max", By: true, Grouping: []string{"dim1"}, Step: 1000}, + {Start: 1, End: 10000, Func: "max", By: true, Grouping: []string{"dim1"}, Step: 1000}, }, }, { query: "(sum(http_requests{group=~\"p.*\"})+max(http_requests{group=~\"c.*\"}))[20s:5s]", start: 120000, expected: []*storage.SelectHints{ - {Start: 95000, End: 120000, Func: "sum", By: true, Step: 5000}, - {Start: 95000, End: 120000, Func: "max", By: true, Step: 5000}, + {Start: 95001, End: 120000, Func: "sum", By: true, Step: 5000}, + {Start: 95001, End: 120000, Func: "max", By: true, Step: 5000}, }, }, { query: "foo @ 50 + bar @ 250 + baz @ 900", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 45000, End: 50000, Step: 1000}, - {Start: 245000, End: 250000, Step: 1000}, - {Start: 895000, End: 900000, Step: 1000}, + {Start: 45001, End: 50000, Step: 1000}, + {Start: 245001, End: 250000, Step: 1000}, + {Start: 895001, End: 900000, Step: 1000}, }, }, { query: "foo @ 50 + bar + baz @ 900", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 45000, End: 50000, Step: 1000}, - {Start: 95000, End: 500000, Step: 1000}, - {Start: 895000, End: 900000, Step: 1000}, + {Start: 45001, End: 50000, Step: 1000}, + {Start: 95001, End: 500000, Step: 1000}, + {Start: 895001, End: 900000, Step: 1000}, }, }, { query: "rate(foo[2s] @ 50) + bar @ 250 + baz @ 900", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 48000, End: 50000, Step: 1000, Func: "rate", Range: 2000}, - {Start: 245000, End: 250000, Step: 1000}, - {Start: 895000, End: 900000, Step: 1000}, + {Start: 48001, End: 50000, Step: 1000, Func: "rate", Range: 2000}, + {Start: 245001, End: 250000, Step: 1000}, + {Start: 895001, End: 900000, Step: 1000}, }, }, { query: "rate(foo[2s:1s] @ 50) + bar + baz", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 43000, End: 50000, Step: 1000, Func: "rate"}, - {Start: 95000, End: 500000, Step: 1000}, - {Start: 95000, End: 500000, Step: 1000}, + {Start: 43001, End: 50000, Step: 1000, Func: "rate"}, + {Start: 95001, End: 500000, Step: 1000}, + {Start: 95001, End: 500000, Step: 1000}, }, }, { query: "rate(foo[2s:1s] @ 50) + bar + rate(baz[2m:1s] @ 900 offset 2m) ", start: 100000, end: 500000, expected: []*storage.SelectHints{ - {Start: 43000, End: 50000, Step: 1000, Func: "rate"}, - {Start: 95000, End: 500000, Step: 1000}, - {Start: 655000, End: 780000, Step: 1000, Func: "rate"}, + {Start: 43001, End: 50000, Step: 1000, Func: "rate"}, + {Start: 95001, End: 500000, Step: 1000}, + {Start: 655001, End: 780000, Step: 1000, Func: "rate"}, }, }, { // Hints are based on the inner most subquery timestamp. query: `sum_over_time(sum_over_time(metric{job="1"}[100s])[100s:25s] @ 50)[3s:1s] @ 3000`, start: 100000, expected: []*storage.SelectHints{ - {Start: -150000, End: 50000, Range: 100000, Func: "sum_over_time", Step: 25000}, + {Start: -149999, End: 50000, Range: 100000, Func: "sum_over_time", Step: 25000}, }, }, { // Hints are based on the inner most subquery timestamp. query: `sum_over_time(sum_over_time(metric{job="1"}[100s])[100s:25s] @ 3000)[3s:1s] @ 50`, expected: []*storage.SelectHints{ - {Start: 2800000, End: 3000000, Range: 100000, Func: "sum_over_time", Step: 25000}, + {Start: 2800001, End: 3000000, Range: 100000, Func: "sum_over_time", Step: 25000}, }, }, } { @@ -935,22 +938,20 @@ load 10s }, }, { - Query: "max_over_time(metricWith1SampleEvery10Seconds[59s])[20s:5s]", + Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])[20s:5s]", Start: time.Unix(201, 0), PeakSamples: 10, - TotalSamples: 24, // (1 sample / 10 seconds * 60 seconds) * 20/5 (using 59s so we always return 6 samples - // as if we run a query on 00 looking back 60 seconds we will return 7 samples; - // see next test). + TotalSamples: 24, // (1 sample / 10 seconds * 60 seconds) * 4 TotalSamplesPerStep: stats.TotalSamplesPerStep{ 201000: 24, }, }, { - Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])[20s:5s]", + Query: "max_over_time(metricWith1SampleEvery10Seconds[61s])[20s:5s]", Start: time.Unix(201, 0), PeakSamples: 11, TotalSamples: 26, // (1 sample / 10 seconds * 60 seconds) * 4 + 2 as - // max_over_time(metricWith1SampleEvery10Seconds[60s]) @ 190 and 200 will return 7 samples. + // max_over_time(metricWith1SampleEvery10Seconds[61s]) @ 190 and 200 will return 7 samples. TotalSamplesPerStep: stats.TotalSamplesPerStep{ 201000: 26, }, @@ -959,10 +960,9 @@ load 10s Query: "max_over_time(metricWith1HistogramEvery10Seconds[60s])[20s:5s]", Start: time.Unix(201, 0), PeakSamples: 78, - TotalSamples: 338, // (1 histogram (size 13 HPoint) / 10 seconds * 60 seconds) * 4 + 2 * 13 as - // max_over_time(metricWith1SampleEvery10Seconds[60s]) @ 190 and 200 will return 7 samples. + TotalSamples: 312, // (1 histogram (size 13) / 10 seconds * 60 seconds) * 4 TotalSamplesPerStep: stats.TotalSamplesPerStep{ - 201000: 338, + 201000: 312, }, }, { @@ -1427,23 +1427,23 @@ load 10s }, { // The peak samples in memory is during the first evaluation: - // - Subquery takes 22 samples, 11 for each bigmetric, - // - Result is calculated per series where the series samples is buffered, hence 11 more here. + // - Subquery takes 20 samples, 10 for each bigmetric. + // - Result is calculated per series where the series samples is buffered, hence 10 more here. // - The result of two series is added before the last series buffer is discarded, so 2 more here. - // Hence at peak it is 22 (subquery) + 11 (buffer of a series) + 2 (result from 2 series). + // Hence at peak it is 20 (subquery) + 10 (buffer of a series) + 2 (result from 2 series). // The subquery samples and the buffer is discarded before duplicating. Query: `rate(bigmetric[10s:1s] @ 10)`, - MaxSamples: 35, + MaxSamples: 32, Start: time.Unix(0, 0), End: time.Unix(10, 0), Interval: 5 * time.Second, }, { // Here the reasoning is same as above. But LHS and RHS are done one after another. - // So while one of them takes 35 samples at peak, we need to hold the 2 sample + // So while one of them takes 32 samples at peak, we need to hold the 2 sample // result of the other till then. Query: `rate(bigmetric[10s:1s] @ 10) + rate(bigmetric[10s:1s] @ 30)`, - MaxSamples: 37, + MaxSamples: 34, Start: time.Unix(0, 0), End: time.Unix(10, 0), Interval: 5 * time.Second, @@ -1451,28 +1451,28 @@ load 10s { // promql.Sample as above but with only 1 part as step invariant. // Here the peak is caused by the non-step invariant part as it touches more time range. - // Hence at peak it is 2*21 (subquery from 0s to 20s) - // + 11 (buffer of a series per evaluation) + // Hence at peak it is 2*20 (subquery from 0s to 20s) + // + 10 (buffer of a series per evaluation) // + 6 (result from 2 series at 3 eval times). Query: `rate(bigmetric[10s:1s]) + rate(bigmetric[10s:1s] @ 30)`, - MaxSamples: 59, + MaxSamples: 56, Start: time.Unix(10, 0), End: time.Unix(20, 0), Interval: 5 * time.Second, }, { // Nested subquery. - // We saw that innermost rate takes 35 samples which is still the peak + // We saw that innermost rate takes 32 samples which is still the peak // since the other two subqueries just duplicate the result. - Query: `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[100s:20s] @ 2000`, - MaxSamples: 35, + Query: `rate(rate(bigmetric[10:1s] @ 10)[100s:25s] @ 1000)[100s:20s] @ 2000`, + MaxSamples: 32, Start: time.Unix(10, 0), }, { // Nested subquery. - // Now the outmost subquery produces more samples than inner most rate. + // Now the outermost subquery produces more samples than innermost rate. Query: `rate(rate(bigmetric[10s:1s] @ 10)[100s:25s] @ 1000)[17s:1s] @ 2000`, - MaxSamples: 36, + MaxSamples: 34, Start: time.Unix(10, 0), }, } @@ -1579,11 +1579,11 @@ load 1ms start: 10, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 28, T: 280000}, {F: 29, T: 290000}, {F: 30, T: 300000}}, + Floats: []promql.FPoint{{F: 29, T: 290000}, {F: 30, T: 300000}}, Metric: lbls1, }, promql.Series{ - Floats: []promql.FPoint{{F: 56, T: 280000}, {F: 58, T: 290000}, {F: 60, T: 300000}}, + Floats: []promql.FPoint{{F: 58, T: 290000}, {F: 60, T: 300000}}, Metric: lbls2, }, }, @@ -1592,7 +1592,7 @@ load 1ms start: 100, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 3, T: -2000}, {F: 2, T: -1000}, {F: 1, T: 0}}, + Floats: []promql.FPoint{{F: 2, T: -1000}, {F: 1, T: 0}}, Metric: lblsneg, }, }, @@ -1601,7 +1601,7 @@ load 1ms start: 100, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 504, T: -503000}, {F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, + Floats: []promql.FPoint{{F: 503, T: -502000}, {F: 502, T: -501000}, {F: 501, T: -500000}}, Metric: lblsneg, }, }, @@ -1610,13 +1610,26 @@ load 1ms start: 100, result: promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 2342, T: 2342}, {F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, + Floats: []promql.FPoint{{F: 2343, T: 2343}, {F: 2344, T: 2344}, {F: 2345, T: 2345}}, Metric: lblsms, }, }, }, { query: "metric[100s:25s] @ 300", start: 100, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}}, + Metric: lbls1, + }, + promql.Series{ + Floats: []promql.FPoint{{F: 44, T: 225000}, {F: 50, T: 250000}, {F: 54, T: 275000}, {F: 60, T: 300000}}, + Metric: lbls2, + }, + }, + }, { + query: "metric[100s1ms:25s] @ 300", // Add 1ms to the range to see the legacy behavior of the previous test. + start: 100, result: promql.Matrix{ promql.Series{ Floats: []promql.FPoint{{F: 20, T: 200000}, {F: 22, T: 225000}, {F: 25, T: 250000}, {F: 27, T: 275000}, {F: 30, T: 300000}}, @@ -1630,6 +1643,15 @@ load 1ms }, { query: "metric_neg[50s:25s] @ 0", start: 100, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 26, T: -25000}, {F: 1, T: 0}}, + Metric: lblsneg, + }, + }, + }, { + query: "metric_neg[50s1ms:25s] @ 0", // Add 1ms to the range to see the legacy behavior of the previous test. + start: 100, result: promql.Matrix{ promql.Series{ Floats: []promql.FPoint{{F: 51, T: -50000}, {F: 26, T: -25000}, {F: 1, T: 0}}, @@ -1639,6 +1661,15 @@ load 1ms }, { query: "metric_neg[50s:25s] @ -100", start: 100, + result: promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 126, T: -125000}, {F: 101, T: -100000}}, + Metric: lblsneg, + }, + }, + }, { + query: "metric_neg[50s1ms:25s] @ -100", // Add 1ms to the range to see the legacy behavior of the previous test. + start: 100, result: promql.Matrix{ promql.Series{ Floats: []promql.FPoint{{F: 151, T: -150000}, {F: 126, T: -125000}, {F: 101, T: -100000}}, @@ -1646,7 +1677,7 @@ load 1ms }, }, }, { - query: `metric_ms[100ms:25ms] @ 2.345`, + query: `metric_ms[101ms:25ms] @ 2.345`, start: 100, result: promql.Matrix{ promql.Series{ @@ -1831,7 +1862,7 @@ func TestSubquerySelector(t *testing.T) { nil, promql.Matrix{ promql.Series{ - Floats: []promql.FPoint{{F: 2, T: 10000}, {F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, + Floats: []promql.FPoint{{F: 2, T: 15000}, {F: 2, T: 20000}, {F: 2, T: 25000}, {F: 2, T: 30000}}, Metric: labels.FromStrings("__name__", "metric"), }, }, @@ -1878,6 +1909,20 @@ func TestSubquerySelector(t *testing.T) { cases: []caseType{ { // Normal selector. Query: `http_requests{group=~"pro.*",instance="0"}[30s:10s]`, + Result: promql.Result{ + nil, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 10000, T: 10000000}, {F: 100, T: 10010000}, {F: 130, T: 10020000}}, + Metric: labels.FromStrings("__name__", "http_requests", "job", "api-server", "instance", "0", "group", "production"), + }, + }, + nil, + }, + Start: time.Unix(10020, 0), + }, + { // Normal selector. Add 1ms to the range to see the legacy behavior of the previous test. + Query: `http_requests{group=~"pro.*",instance="0"}[30s1ms:10s]`, Result: promql.Result{ nil, promql.Matrix{ @@ -1920,6 +1965,36 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `rate(http_requests[1m])[15s:5s]`, + Result: promql.Result{ + nil, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"), + DropName: true, + }, + promql.Series{ + Floats: []promql.FPoint{{F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"), + DropName: true, + }, + promql.Series{ + Floats: []promql.FPoint{{F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "0", "group", "production"), + DropName: true, + }, + promql.Series{ + Floats: []promql.FPoint{{F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}}, + Metric: labels.FromStrings("job", "api-server", "instance", "1", "group", "production"), + DropName: true, + }, + }, + nil, + }, + Start: time.Unix(8000, 0), + }, + { + Query: `rate(http_requests[1m])[15s1ms:5s]`, // Add 1ms to the range to see the legacy behavior of the previous test. Result: promql.Result{ nil, promql.Matrix{ @@ -1950,6 +2025,35 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`, + Result: promql.Result{ + nil, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}}, + Metric: labels.EmptyLabels(), + }, + }, + nil, + }, + Start: time.Unix(120, 0), + }, + { + Query: `sum(http_requests{group=~"pro.*"})[30s:10s]`, + Result: promql.Result{ + nil, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 300, T: 100000}, {F: 330, T: 110000}, {F: 360, T: 120000}}, + Metric: labels.EmptyLabels(), + }, + }, + nil, + }, + Start: time.Unix(121, 0), // 1s later doesn't change the result. + }, + { + // Add 1ms to the range to see the legacy behavior of the previous test. + Query: `sum(http_requests{group=~"pro.*"})[30s1ms:10s]`, Result: promql.Result{ nil, promql.Matrix{ @@ -1964,6 +2068,20 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `sum(http_requests)[40s:10s]`, + Result: promql.Result{ + nil, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 900, T: 90000}, {F: 1000, T: 100000}, {F: 1100, T: 110000}, {F: 1200, T: 120000}}, + Metric: labels.EmptyLabels(), + }, + }, + nil, + }, + Start: time.Unix(120, 0), + }, + { + Query: `sum(http_requests)[40s1ms:10s]`, // Add 1ms to the range to see the legacy behavior of the previous test. Result: promql.Result{ nil, promql.Matrix{ @@ -1978,6 +2096,21 @@ func TestSubquerySelector(t *testing.T) { }, { Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s:5s]`, + Result: promql.Result{ + nil, + promql.Matrix{ + promql.Series{ + Floats: []promql.FPoint{{F: 1000, T: 105000}, {F: 1100, T: 110000}, {F: 1100, T: 115000}, {F: 1200, T: 120000}}, + Metric: labels.EmptyLabels(), + }, + }, + nil, + }, + Start: time.Unix(120, 0), + }, + { + // Add 1ms to the range to see the legacy behavior of the previous test. + Query: `(sum(http_requests{group=~"p.*"})+sum(http_requests{group=~"c.*"}))[20s1ms:5s]`, Result: promql.Result{ nil, promql.Matrix{ @@ -2017,23 +2150,37 @@ func TestSubquerySelector(t *testing.T) { type FakeQueryLogger struct { closed bool logs []interface{} + attrs []any } func NewFakeQueryLogger() *FakeQueryLogger { return &FakeQueryLogger{ closed: false, logs: make([]interface{}, 0), + attrs: make([]any, 0), } } +// It implements the promql.QueryLogger interface. func (f *FakeQueryLogger) Close() error { f.closed = true return nil } -func (f *FakeQueryLogger) Log(l ...interface{}) error { - f.logs = append(f.logs, l...) - return nil +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) { + // Test usage only really cares about existence of keyvals passed in + // via args, just append in the log message before handling the + // provided args and any embedded kvs added via `.With()` on f.attrs. + log := append([]any{msg}, args...) + log = append(log, f.attrs...) + f.attrs = f.attrs[:0] + f.logs = append(f.logs, log...) +} + +// It implements the promql.QueryLogger interface. +func (f *FakeQueryLogger) With(args ...any) { + f.attrs = append(f.attrs, args...) } func TestQueryLogger_basic(t *testing.T) { @@ -2061,9 +2208,8 @@ func TestQueryLogger_basic(t *testing.T) { f1 := NewFakeQueryLogger() engine.SetQueryLogger(f1) queryExec() - for i, field := range []interface{}{"params", map[string]interface{}{"query": "test statement"}} { - require.Equal(t, field, f1.logs[i]) - } + require.Contains(t, f1.logs, `params`) + require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"}) l := len(f1.logs) queryExec() @@ -2109,11 +2255,8 @@ func TestQueryLogger_fields(t *testing.T) { res := query.Exec(ctx) require.NoError(t, res.Err) - expected := []string{"foo", "bar"} - for i, field := range expected { - v := f1.logs[len(f1.logs)-len(expected)+i].(string) - require.Equal(t, field, v) - } + require.Contains(t, f1.logs, `foo`) + require.Contains(t, f1.logs, `bar`) } func TestQueryLogger_error(t *testing.T) { @@ -2139,9 +2282,10 @@ func TestQueryLogger_error(t *testing.T) { res := query.Exec(ctx) require.Error(t, res.Err, "query should have failed") - for i, field := range []interface{}{"params", map[string]interface{}{"query": "test statement"}, "error", testErr} { - require.Equal(t, f1.logs[i], field) - } + require.Contains(t, f1.logs, `params`) + require.Contains(t, f1.logs, map[string]interface{}{"query": "test statement"}) + require.Contains(t, f1.logs, `error`) + require.Contains(t, f1.logs, testErr) } func TestPreprocessAndWrapWithStepInvariantExpr(t *testing.T) { @@ -3019,6 +3163,29 @@ func TestEngineOptsValidation(t *testing.T) { } } +func TestEngine_Close(t *testing.T) { + t.Run("nil engine", func(t *testing.T) { + var ng *promql.Engine + require.NoError(t, ng.Close()) + }) + + t.Run("non-nil engine", func(t *testing.T) { + ng := promql.NewEngine(promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 0, + Timeout: 100 * time.Second, + NoStepSubqueryIntervalFn: nil, + EnableAtModifier: true, + EnableNegativeOffset: true, + EnablePerStepStats: false, + LookbackDelta: 0, + EnableDelayedNameRemoval: true, + }) + require.NoError(t, ng.Close()) + }) +} + func TestInstantQueryWithRangeVectorSelector(t *testing.T) { engine := newTestEngine(t) @@ -3037,7 +3204,7 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { ts time.Time }{ "matches series with points in range": { - expr: "some_metric[1m]", + expr: "some_metric[2m]", ts: baseT.Add(2 * time.Minute), expected: promql.Matrix{ { @@ -3073,7 +3240,6 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { { Metric: labels.FromStrings("__name__", "some_metric_with_stale_marker"), Floats: []promql.FPoint{ - {T: timestamp.FromTime(baseT), F: 0}, {T: timestamp.FromTime(baseT.Add(time.Minute)), F: 1}, {T: timestamp.FromTime(baseT.Add(3 * time.Minute)), F: 3}, }, @@ -3095,241 +3261,6 @@ func TestInstantQueryWithRangeVectorSelector(t *testing.T) { } } -func TestNativeHistogram_SubOperator(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - cases := []struct { - histograms []histogram.Histogram - expected histogram.FloatHistogram - }{ - { - histograms: []histogram.Histogram{ - { - Schema: 0, - Count: 41, - Sum: 2345.6, - ZeroThreshold: 0.001, - ZeroCount: 5, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 4}, - {Offset: 0, Length: 0}, - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 2, Length: 0}, - {Offset: 2, Length: 3}, - }, - NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, - }, - { - Schema: 0, - Count: 11, - Sum: 1234.5, - ZeroThreshold: 0.001, - ZeroCount: 3, - PositiveSpans: []histogram.Span{ - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, -1}, - NegativeSpans: []histogram.Span{ - {Offset: 2, Length: 2}, - }, - NegativeBuckets: []int64{3, -1}, - }, - }, - expected: histogram.FloatHistogram{ - Schema: 0, - Count: 30, - Sum: 1111.1, - ZeroThreshold: 0.001, - ZeroCount: 2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 4}, - }, - PositiveBuckets: []float64{1, 1, 2, 1, 1, 1}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 2}, - {Offset: 1, Length: 1}, - {Offset: 4, Length: 3}, - }, - NegativeBuckets: []float64{1, 1, 7, 5, 5, 2}, - }, - }, - { - histograms: []histogram.Histogram{ - { - Schema: 0, - Count: 41, - Sum: 2345.6, - ZeroThreshold: 0.001, - ZeroCount: 5, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 4}, - {Offset: 0, Length: 0}, - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 2, Length: 0}, - {Offset: 2, Length: 3}, - }, - NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, - }, - { - Schema: 1, - Count: 11, - Sum: 1234.5, - ZeroThreshold: 0.001, - ZeroCount: 3, - PositiveSpans: []histogram.Span{ - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, -1}, - NegativeSpans: []histogram.Span{ - {Offset: 2, Length: 2}, - }, - NegativeBuckets: []int64{3, -1}, - }, - }, - expected: histogram.FloatHistogram{ - Schema: 0, - Count: 30, - Sum: 1111.1, - ZeroThreshold: 0.001, - ZeroCount: 2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 1, Length: 5}, - }, - PositiveBuckets: []float64{1, 1, 2, 1, 1, 1}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 4, Length: 3}, - }, - NegativeBuckets: []float64{-2, 2, 2, 7, 5, 5, 2}, - }, - }, - { - histograms: []histogram.Histogram{ - { - Schema: 1, - Count: 11, - Sum: 1234.5, - ZeroThreshold: 0.001, - ZeroCount: 3, - PositiveSpans: []histogram.Span{ - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, -1}, - NegativeSpans: []histogram.Span{ - {Offset: 2, Length: 2}, - }, - NegativeBuckets: []int64{3, -1}, - }, - { - Schema: 0, - Count: 41, - Sum: 2345.6, - ZeroThreshold: 0.001, - ZeroCount: 5, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 4}, - {Offset: 0, Length: 0}, - {Offset: 0, Length: 3}, - }, - PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 2, Length: 0}, - {Offset: 2, Length: 3}, - }, - NegativeBuckets: []int64{1, 3, -2, 5, -2, 0, -3}, - }, - }, - expected: histogram.FloatHistogram{ - Schema: 0, - Count: -30, - Sum: -1111.1, - ZeroThreshold: 0.001, - ZeroCount: -2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 1, Length: 5}, - }, - PositiveBuckets: []float64{-1, -1, -2, -1, -1, -1}, - NegativeSpans: []histogram.Span{ - {Offset: 1, Length: 4}, - {Offset: 4, Length: 3}, - }, - NegativeBuckets: []float64{2, -2, -2, -7, -5, -5, -2}, - }, - }, - } - - idx0 := int64(0) - for _, c := range cases { - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("floatHistogram=%t %d", floatHisto, idx0), func(t *testing.T) { - engine := newTestEngine(t) - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - - ts := idx0 * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) - // Since we mutate h later, we need to create a copy here. - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil) - } - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryAndCheck := func(queryString string, exp promql.Vector) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - if len(vector) == len(exp) { - for i, e := range exp { - got := vector[i].H - if got != e.H { - // Error messages are better if we compare structs, not pointers. - require.Equal(t, *e.H, *got) - } - } - } - - testutil.RequireEqual(t, exp, vector) - } - - // - operator. - queryString := fmt.Sprintf(`%s{idx="0"}`, seriesName) - for idx := 1; idx < len(c.histograms); idx++ { - queryString += fmt.Sprintf(` - ignoring(idx) %s{idx="%d"}`, seriesName, idx) - } - queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expected, Metric: labels.EmptyLabels()}}) - }) - } - idx0++ - } -} - func TestQueryLookbackDelta(t *testing.T) { var ( load = `load 5m @@ -3347,43 +3278,43 @@ metric 0 1 2 }{ { name: "default lookback delta", - ts: lastDatapointTs.Add(defaultLookbackDelta), + ts: lastDatapointTs.Add(defaultLookbackDelta - time.Millisecond), expectSamples: true, }, { name: "outside default lookback delta", - ts: lastDatapointTs.Add(defaultLookbackDelta + time.Millisecond), + ts: lastDatapointTs.Add(defaultLookbackDelta), expectSamples: false, }, { name: "custom engine lookback delta", - ts: lastDatapointTs.Add(10 * time.Minute), + ts: lastDatapointTs.Add(10*time.Minute - time.Millisecond), engineLookback: 10 * time.Minute, expectSamples: true, }, { name: "outside custom engine lookback delta", - ts: lastDatapointTs.Add(10*time.Minute + time.Millisecond), + ts: lastDatapointTs.Add(10 * time.Minute), engineLookback: 10 * time.Minute, expectSamples: false, }, { name: "custom query lookback delta", - ts: lastDatapointTs.Add(20 * time.Minute), + ts: lastDatapointTs.Add(20*time.Minute - time.Millisecond), engineLookback: 10 * time.Minute, queryLookback: 20 * time.Minute, expectSamples: true, }, { name: "outside custom query lookback delta", - ts: lastDatapointTs.Add(20*time.Minute + time.Millisecond), + ts: lastDatapointTs.Add(20 * time.Minute), engineLookback: 10 * time.Minute, queryLookback: 20 * time.Minute, expectSamples: false, }, { name: "negative custom query lookback delta", - ts: lastDatapointTs.Add(20 * time.Minute), + ts: lastDatapointTs.Add(20*time.Minute - time.Millisecond), engineLookback: -10 * time.Minute, queryLookback: 20 * time.Minute, expectSamples: true, @@ -3450,18 +3381,18 @@ histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum: } } - qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[60s])", time.Unix(0, 0), time.Unix(0, 0).Add(1*time.Minute), time.Minute) + qry, err := engine.NewRangeQuery(context.Background(), storage, nil, "increase(histogram[90s])", time.Unix(0, 0), time.Unix(0, 0).Add(60*time.Second), time.Minute) require.NoError(t, err) verify(t, qry, []histogram.FloatHistogram{ { - Count: 2, - Sum: 2, // Increase from 4 to 6 is 2. + Count: 3, + Sum: 3, // Increase from 4 to 6 is 2. Interpolation adds 1. PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}}, // Two buckets changed between the first and second histogram. - PositiveBuckets: []float64{1, 1}, // Increase from 2 to 3 is 1 in both buckets. + PositiveBuckets: []float64{1.5, 1.5}, // Increase from 2 to 3 is 1 in both buckets. Interpolation adds 0.5. }, }) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[60s]", time.Unix(0, 0).Add(2*time.Minute)) + qry, err = engine.NewInstantQuery(context.Background(), storage, nil, "histogram[61s]", time.Unix(0, 0).Add(2*time.Minute)) require.NoError(t, err) verify(t, qry, []histogram.FloatHistogram{ { @@ -3478,3 +3409,249 @@ histogram {{sum:4 count:4 buckets:[2 2]}} {{sum:6 count:6 buckets:[3 3]}} {{sum: }, }) } + +func TestRateAnnotations(t *testing.T) { + testCases := map[string]struct { + data string + expr string + expectedWarningAnnotations []string + expectedInfoAnnotations []string + }{ + "info annotation when two samples are selected": { + data: ` + series 1 2 + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{ + `PromQL info: metric might not be a counter, name does not end in _total/_sum/_count/_bucket: "series" (1:6)`, + }, + }, + "no info annotations when no samples": { + data: ` + series + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{}, + }, + "no info annotations when selecting one sample": { + data: ` + series 1 2 + `, + expr: "rate(series[10s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{}, + }, + "no info annotations when no samples due to mixed data types": { + data: ` + series{label="a"} 1 {{schema:1 sum:15 count:10 buckets:[1 2 3]}} + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{ + `PromQL warning: encountered a mix of histograms and floats for metric name "series" (1:6)`, + }, + expectedInfoAnnotations: []string{}, + }, + "no info annotations when selecting two native histograms": { + data: ` + series{label="a"} {{schema:1 sum:10 count:5 buckets:[1 2 3]}} {{schema:1 sum:15 count:10 buckets:[1 2 3]}} + `, + expr: "rate(series[1m1s])", + expectedWarningAnnotations: []string{}, + expectedInfoAnnotations: []string{}, + }, + } + for name, testCase := range testCases { + t.Run(name, func(t *testing.T) { + store := promqltest.LoadedStorage(t, "load 1m\n"+strings.TrimSpace(testCase.data)) + t.Cleanup(func() { _ = store.Close() }) + + engine := newTestEngine(t) + query, err := engine.NewInstantQuery(context.Background(), store, nil, testCase.expr, timestamp.Time(0).Add(1*time.Minute)) + require.NoError(t, err) + t.Cleanup(query.Close) + + res := query.Exec(context.Background()) + require.NoError(t, res.Err) + + warnings, infos := res.Warnings.AsStrings(testCase.expr, 0, 0) + testutil.RequireEqual(t, testCase.expectedWarningAnnotations, warnings) + testutil.RequireEqual(t, testCase.expectedInfoAnnotations, infos) + }) + } +} + +func TestHistogramRateWithFloatStaleness(t *testing.T) { + // Make a chunk with two normal histograms of the same value. + h1 := histogram.Histogram{ + Schema: 2, + Count: 10, + Sum: 100, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{100}, + } + + c1 := chunkenc.NewHistogramChunk() + app, err := c1.Appender() + require.NoError(t, err) + var ( + newc chunkenc.Chunk + recoded bool + ) + + newc, recoded, app, err = app.AppendHistogram(nil, 0, h1.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + newc, recoded, _, err = app.AppendHistogram(nil, 10, h1.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + // Make a chunk with a single float stale marker. + c2 := chunkenc.NewXORChunk() + app, err = c2.Appender() + require.NoError(t, err) + + app.Append(20, math.Float64frombits(value.StaleNaN)) + + // Make a chunk with two normal histograms that have zero value. + h2 := histogram.Histogram{ + Schema: 2, + } + + c3 := chunkenc.NewHistogramChunk() + app, err = c3.Appender() + require.NoError(t, err) + + newc, recoded, app, err = app.AppendHistogram(nil, 30, h2.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + newc, recoded, _, err = app.AppendHistogram(nil, 40, h2.Copy(), false) + require.NoError(t, err) + require.False(t, recoded) + require.Nil(t, newc) + + querier := storage.MockQuerier{ + SelectMockFunction: func(_ bool, _ *storage.SelectHints, _ ...*labels.Matcher) storage.SeriesSet { + return &singleSeriesSet{ + series: mockSeries{chunks: []chunkenc.Chunk{c1, c2, c3}, labelSet: []string{"__name__", "foo"}}, + } + }, + } + + queriable := storage.MockQueryable{MockQuerier: &querier} + + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) + + q, err := engine.NewInstantQuery(context.Background(), &queriable, nil, "rate(foo[40s])", timestamp.Time(45)) + require.NoError(t, err) + defer q.Close() + + res := q.Exec(context.Background()) + require.NoError(t, res.Err) + + vec, err := res.Vector() + require.NoError(t, err) + + // Single sample result. + require.Len(t, vec, 1) + // The result is a histogram. + require.NotNil(t, vec[0].H) + // The result should be zero as the histogram has not increased, so the rate is zero. + require.Equal(t, 0.0, vec[0].H.Count) + require.Equal(t, 0.0, vec[0].H.Sum) +} + +type singleSeriesSet struct { + series storage.Series + consumed bool +} + +func (s *singleSeriesSet) Next() bool { c := s.consumed; s.consumed = true; return !c } +func (s singleSeriesSet) At() storage.Series { return s.series } +func (s singleSeriesSet) Err() error { return nil } +func (s singleSeriesSet) Warnings() annotations.Annotations { return nil } + +type mockSeries struct { + chunks []chunkenc.Chunk + labelSet []string +} + +func (s mockSeries) Labels() labels.Labels { + return labels.FromStrings(s.labelSet...) +} + +func (s mockSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator { + iterables := []chunkenc.Iterator{} + for _, c := range s.chunks { + iterables = append(iterables, c.Iterator(nil)) + } + return storage.ChainSampleIteratorFromIterators(it, iterables) +} + +func TestEvaluationWithDelayedNameRemovalDisabled(t *testing.T) { + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + EnableAtModifier: true, + MaxSamples: 10000, + Timeout: 10 * time.Second, + EnableDelayedNameRemoval: false, + } + engine := promqltest.NewTestEngineWithOpts(t, opts) + + promqltest.RunTest(t, ` +load 5m + metric_total{env="1"} 0 60 120 + another_metric{env="1"} 60 120 180 + +# Does not drop __name__ for vector selector +eval instant at 10m metric_total{env="1"} + metric_total{env="1"} 120 + +# Drops __name__ for unary operators +eval instant at 10m -metric_total + {env="1"} -120 + +# Drops __name__ for binary operators +eval instant at 10m metric_total + another_metric + {env="1"} 300 + +# Does not drop __name__ for binary comparison operators +eval instant at 10m metric_total <= another_metric + metric_total{env="1"} 120 + +# Drops __name__ for binary comparison operators with "bool" modifier +eval instant at 10m metric_total <= bool another_metric + {env="1"} 1 + +# Drops __name__ for vector-scalar operations +eval instant at 10m metric_total * 2 + {env="1"} 240 + +# Drops __name__ for instant-vector functions +eval instant at 10m clamp(metric_total, 0, 100) + {env="1"} 100 + +# Drops __name__ for round function +eval instant at 10m round(metric_total) + {env="1"} 120 + +# Drops __name__ for range-vector functions +eval instant at 10m rate(metric_total{env="1"}[10m]) + {env="1"} 0.2 + +# Does not drop __name__ for last_over_time function +eval instant at 10m last_over_time(metric_total{env="1"}[10m]) + metric_total{env="1"} 120 + +# Drops name for other _over_time functions +eval instant at 10m max_over_time(metric_total{env="1"}[10m]) + {env="1"} 120 +`, engine) +} diff --git a/promql/functions.go b/promql/functions.go index 9fa7fbe190..1c2d1ca823 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -14,6 +14,7 @@ package promql import ( + "context" "errors" "fmt" "math" @@ -130,10 +131,18 @@ func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNod sampledInterval := float64(lastT-firstT) / 1000 averageDurationBetweenSamples := sampledInterval / float64(numSamplesMinusOne) - // If the first/last samples are close to the boundaries of the range, - // extrapolate the result. This is as we expect that another sample - // will exist given the spacing between samples we've seen thus far, - // with an allowance for noise. + // If samples are close enough to the (lower or upper) boundary of the + // range, we extrapolate the rate all the way to the boundary in + // question. "Close enough" is defined as "up to 10% more than the + // average duration between samples within the range", see + // extrapolationThreshold below. Essentially, we are assuming a more or + // less regular spacing between samples, and if we don't see a sample + // where we would expect one, we assume the series does not cover the + // whole range, but starts and/or ends within the range. We still + // extrapolate the rate in this case, but not all the way to the + // boundary, but only by half of the average duration between samples + // (which is our guess for where the series actually starts or ends). + extrapolationThreshold := averageDurationBetweenSamples * 1.1 extrapolateToInterval := sampledInterval @@ -341,7 +350,7 @@ func calcTrendValue(i int, tf, s0, s1, b float64) float64 { // data. A lower smoothing factor increases the influence of historical data. The trend factor (0 < tf < 1) affects // how trends in historical data will affect the current data. A higher trend factor increases the influence. // of trends. Algorithm taken from https://en.wikipedia.org/wiki/Exponential_smoothing titled: "Double exponential smoothing". -func funcHoltWinters(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { +func funcDoubleExponentialSmoothing(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { samples := vals[0].(Matrix)[0] // The smoothing factor argument. @@ -406,22 +415,12 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(a.Metric, b.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -433,7 +432,8 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode return +1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil @@ -441,22 +441,12 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) === func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - // First, sort by the full label set. This ensures a consistent ordering in case sorting by the - // labels provided as arguments is not conclusive. + lbls := stringSliceFromArgs(args[1:]) slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - return labels.Compare(b.Metric, a.Metric) - }) - - labels := stringSliceFromArgs(args[1:]) - // Next, sort by the labels provided as arguments. - slices.SortFunc(vals[0].(Vector), func(a, b Sample) int { - // Iterate over each given label. - for _, label := range labels { + for _, label := range lbls { lv1 := a.Metric.Get(label) lv2 := b.Metric.Get(label) - // If we encounter multiple samples with the same label values, the sorting which was - // performed in the first step will act as a "tie breaker". if lv1 == lv2 { continue } @@ -468,21 +458,22 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval return -1 } - return 0 + // If all labels provided as arguments were equal, sort by the full label set. This ensures a consistent ordering. + return -labels.Compare(a.Metric, b.Metric) }) return vals[0].(Vector), nil } -// === clamp(Vector parser.ValueTypeVector, min, max Scalar) (Vector, Annotations) === -func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - vec := vals[0].(Vector) - minVal := vals[1].(Vector)[0].F - maxVal := vals[2].(Vector)[0].F +func clamp(vec Vector, minVal, maxVal float64, enh *EvalNodeHelper) (Vector, annotations.Annotations) { if maxVal < minVal { return enh.Out, nil } for _, el := range vec { + if el.H != nil { + // Process only float samples. + continue + } if !enh.enableDelayedNameRemoval { el.Metric = el.Metric.DropMetricName() } @@ -495,38 +486,26 @@ func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper return enh.Out, nil } +// === clamp(Vector parser.ValueTypeVector, min, max Scalar) (Vector, Annotations) === +func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { + vec := vals[0].(Vector) + minVal := vals[1].(Vector)[0].F + maxVal := vals[2].(Vector)[0].F + return clamp(vec, minVal, maxVal, enh) +} + // === clamp_max(Vector parser.ValueTypeVector, max Scalar) (Vector, Annotations) === func funcClampMax(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec := vals[0].(Vector) maxVal := vals[1].(Vector)[0].F - for _, el := range vec { - if !enh.enableDelayedNameRemoval { - el.Metric = el.Metric.DropMetricName() - } - enh.Out = append(enh.Out, Sample{ - Metric: el.Metric, - F: math.Min(maxVal, el.F), - DropName: true, - }) - } - return enh.Out, nil + return clamp(vec, math.Inf(-1), maxVal, enh) } // === clamp_min(Vector parser.ValueTypeVector, min Scalar) (Vector, Annotations) === func funcClampMin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { vec := vals[0].(Vector) minVal := vals[1].(Vector)[0].F - for _, el := range vec { - if !enh.enableDelayedNameRemoval { - el.Metric = el.Metric.DropMetricName() - } - enh.Out = append(enh.Out, Sample{ - Metric: el.Metric, - F: math.Max(minVal, el.F), - DropName: true, - }) - } - return enh.Out, nil + return clamp(vec, minVal, math.Inf(+1), enh) } // === round(Vector parser.ValueTypeVector, toNearest=1 Scalar) (Vector, Annotations) === @@ -542,7 +521,14 @@ func funcRound(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper toNearestInverse := 1.0 / toNearest for _, el := range vec { + if el.H != nil { + // Process only float samples. + continue + } f := math.Floor(el.F*toNearestInverse+0.5) / toNearestInverse + if !enh.enableDelayedNameRemoval { + el.Metric = el.Metric.DropMetricName() + } enh.Out = append(enh.Out, Sample{ Metric: el.Metric, F: f, @@ -1314,7 +1300,7 @@ func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *Ev } enh.Out = append(enh.Out, Sample{ Metric: sample.Metric, - F: histogramFraction(lower, upper, sample.H), + F: HistogramFraction(lower, upper, sample.H), DropName: true, }) } @@ -1366,7 +1352,7 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev mb = &metricWithBuckets{sample.Metric, nil} enh.signatureToMetricWithBuckets[string(enh.lblBuf)] = mb } - mb.buckets = append(mb.buckets, bucket{upperBound, sample.F}) + mb.buckets = append(mb.buckets, Bucket{upperBound, sample.F}) } // Now deal with the histograms. @@ -1388,14 +1374,14 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev } enh.Out = append(enh.Out, Sample{ Metric: sample.Metric, - F: histogramQuantile(q, sample.H), + F: HistogramQuantile(q, sample.H), DropName: true, }) } for _, mb := range enh.signatureToMetricWithBuckets { if len(mb.buckets) > 0 { - res, forcedMonotonicity, _ := bucketQuantile(q, mb.buckets) + res, forcedMonotonicity, _ := BucketQuantile(q, mb.buckets) enh.Out = append(enh.Out, Sample{ Metric: mb.metric, F: res, @@ -1443,27 +1429,60 @@ func funcResets(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelpe // === changes(Matrix parser.ValueTypeMatrix) (Vector, Annotations) === func funcChanges(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { floats := vals[0].(Matrix)[0].Floats + histograms := vals[0].(Matrix)[0].Histograms changes := 0 - - if len(floats) == 0 { - // TODO(beorn7): Only histogram values, still need to add support. + if len(floats) == 0 && len(histograms) == 0 { return enh.Out, nil } - prev := floats[0].F - for _, sample := range floats[1:] { - current := sample.F - if current != prev && !(math.IsNaN(current) && math.IsNaN(prev)) { - changes++ + var prevSample, curSample Sample + for iFloat, iHistogram := 0, 0; iFloat < len(floats) || iHistogram < len(histograms); { + switch { + // Process a float sample if no histogram sample remains or its timestamp is earlier. + // Process a histogram sample if no float sample remains or its timestamp is earlier. + case iHistogram >= len(histograms) || iFloat < len(floats) && floats[iFloat].T < histograms[iHistogram].T: + { + curSample.F = floats[iFloat].F + curSample.H = nil + iFloat++ + } + case iFloat >= len(floats) || iHistogram < len(histograms) && floats[iFloat].T > histograms[iHistogram].T: + { + curSample.H = histograms[iHistogram].H + iHistogram++ + } } - prev = current + // Skip the comparison for the first sample, just initialize prevSample. + if iFloat+iHistogram == 1 { + prevSample = curSample + continue + } + switch { + case prevSample.H == nil && curSample.H == nil: + { + if curSample.F != prevSample.F && !(math.IsNaN(curSample.F) && math.IsNaN(prevSample.F)) { + changes++ + } + } + case prevSample.H != nil && curSample.H == nil, prevSample.H == nil && curSample.H != nil: + { + changes++ + } + case prevSample.H != nil && curSample.H != nil: + { + if !curSample.H.Equals(prevSample.H) { + changes++ + } + } + } + prevSample = curSample } return append(enh.Out, Sample{F: float64(changes)}), nil } // label_replace function operates only on series; does not look at timestamps or values. -func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, annotations.Annotations) { +func (ev *evaluator) evalLabelReplace(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { var ( dst = stringFromArg(args[1]) repl = stringFromArg(args[2]) @@ -1471,7 +1490,7 @@ func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, an regexStr = stringFromArg(args[4]) ) - regex, err := regexp.Compile("^(?:" + regexStr + ")$") + regex, err := regexp.Compile("^(?s:" + regexStr + ")$") if err != nil { panic(fmt.Errorf("invalid regular expression in label_replace(): %s", regexStr)) } @@ -1479,7 +1498,7 @@ func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, an panic(fmt.Errorf("invalid destination label name in label_replace(): %s", dst)) } - val, ws := ev.eval(args[0]) + val, ws := ev.eval(ctx, args[0]) matrix := val.(Matrix) lb := labels.NewBuilder(labels.EmptyLabels()) @@ -1505,11 +1524,6 @@ func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, an return matrix, ws } -// === label_replace(Vector parser.ValueTypeVector, dst_label, replacement, src_labelname, regex parser.ValueTypeString) (Vector, Annotations) === -func funcLabelReplace(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - panic("funcLabelReplace wrong implementation called") -} - // === Vector(s Scalar) (Vector, Annotations) === func funcVector(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { return append(enh.Out, @@ -1520,7 +1534,7 @@ func funcVector(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelpe } // label_join function operates only on series; does not look at timestamps or values. -func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annotations.Annotations) { +func (ev *evaluator) evalLabelJoin(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { var ( dst = stringFromArg(args[1]) sep = stringFromArg(args[2]) @@ -1537,7 +1551,7 @@ func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annot panic(fmt.Errorf("invalid destination label name in label_join(): %s", dst)) } - val, ws := ev.eval(args[0]) + val, ws := ev.eval(ctx, args[0]) matrix := val.(Matrix) srcVals := make([]string, len(srcLabels)) lb := labels.NewBuilder(labels.EmptyLabels()) @@ -1561,11 +1575,6 @@ func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annot return matrix, ws } -// === label_join(vector model.ValVector, dest_labelname, separator, src_labelname...) (Vector, Annotations) === -func funcLabelJoin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) { - panic("funcLabelReplace wrong implementation called") -} - // Common code for date related functions. func dateWrapper(vals []parser.Value, enh *EvalNodeHelper, f func(time.Time) float64) Vector { if len(vals) == 0 { @@ -1577,6 +1586,10 @@ func dateWrapper(vals []parser.Value, enh *EvalNodeHelper, f func(time.Time) flo } for _, el := range vals[0].(Vector) { + if el.H != nil { + // Ignore histogram sample. + continue + } t := time.Unix(int64(el.F), 0).UTC() if !enh.enableDelayedNameRemoval { el.Metric = el.Metric.DropMetricName() @@ -1648,82 +1661,83 @@ func funcYear(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) // FunctionCalls is a list of all functions supported by PromQL, including their types. var FunctionCalls = map[string]FunctionCall{ - "abs": funcAbs, - "absent": funcAbsent, - "absent_over_time": funcAbsentOverTime, - "acos": funcAcos, - "acosh": funcAcosh, - "asin": funcAsin, - "asinh": funcAsinh, - "atan": funcAtan, - "atanh": funcAtanh, - "avg_over_time": funcAvgOverTime, - "ceil": funcCeil, - "changes": funcChanges, - "clamp": funcClamp, - "clamp_max": funcClampMax, - "clamp_min": funcClampMin, - "cos": funcCos, - "cosh": funcCosh, - "count_over_time": funcCountOverTime, - "days_in_month": funcDaysInMonth, - "day_of_month": funcDayOfMonth, - "day_of_week": funcDayOfWeek, - "day_of_year": funcDayOfYear, - "deg": funcDeg, - "delta": funcDelta, - "deriv": funcDeriv, - "exp": funcExp, - "floor": funcFloor, - "histogram_avg": funcHistogramAvg, - "histogram_count": funcHistogramCount, - "histogram_fraction": funcHistogramFraction, - "histogram_quantile": funcHistogramQuantile, - "histogram_sum": funcHistogramSum, - "histogram_stddev": funcHistogramStdDev, - "histogram_stdvar": funcHistogramStdVar, - "holt_winters": funcHoltWinters, - "hour": funcHour, - "idelta": funcIdelta, - "increase": funcIncrease, - "irate": funcIrate, - "label_replace": funcLabelReplace, - "label_join": funcLabelJoin, - "ln": funcLn, - "log10": funcLog10, - "log2": funcLog2, - "last_over_time": funcLastOverTime, - "mad_over_time": funcMadOverTime, - "max_over_time": funcMaxOverTime, - "min_over_time": funcMinOverTime, - "minute": funcMinute, - "month": funcMonth, - "pi": funcPi, - "predict_linear": funcPredictLinear, - "present_over_time": funcPresentOverTime, - "quantile_over_time": funcQuantileOverTime, - "rad": funcRad, - "rate": funcRate, - "resets": funcResets, - "round": funcRound, - "scalar": funcScalar, - "sgn": funcSgn, - "sin": funcSin, - "sinh": funcSinh, - "sort": funcSort, - "sort_desc": funcSortDesc, - "sort_by_label": funcSortByLabel, - "sort_by_label_desc": funcSortByLabelDesc, - "sqrt": funcSqrt, - "stddev_over_time": funcStddevOverTime, - "stdvar_over_time": funcStdvarOverTime, - "sum_over_time": funcSumOverTime, - "tan": funcTan, - "tanh": funcTanh, - "time": funcTime, - "timestamp": funcTimestamp, - "vector": funcVector, - "year": funcYear, + "abs": funcAbs, + "absent": funcAbsent, + "absent_over_time": funcAbsentOverTime, + "acos": funcAcos, + "acosh": funcAcosh, + "asin": funcAsin, + "asinh": funcAsinh, + "atan": funcAtan, + "atanh": funcAtanh, + "avg_over_time": funcAvgOverTime, + "ceil": funcCeil, + "changes": funcChanges, + "clamp": funcClamp, + "clamp_max": funcClampMax, + "clamp_min": funcClampMin, + "cos": funcCos, + "cosh": funcCosh, + "count_over_time": funcCountOverTime, + "days_in_month": funcDaysInMonth, + "day_of_month": funcDayOfMonth, + "day_of_week": funcDayOfWeek, + "day_of_year": funcDayOfYear, + "deg": funcDeg, + "delta": funcDelta, + "deriv": funcDeriv, + "exp": funcExp, + "floor": funcFloor, + "histogram_avg": funcHistogramAvg, + "histogram_count": funcHistogramCount, + "histogram_fraction": funcHistogramFraction, + "histogram_quantile": funcHistogramQuantile, + "histogram_sum": funcHistogramSum, + "histogram_stddev": funcHistogramStdDev, + "histogram_stdvar": funcHistogramStdVar, + "double_exponential_smoothing": funcDoubleExponentialSmoothing, + "hour": funcHour, + "idelta": funcIdelta, + "increase": funcIncrease, + "info": nil, + "irate": funcIrate, + "label_replace": nil, // evalLabelReplace not called via this map. + "label_join": nil, // evalLabelJoin not called via this map. + "ln": funcLn, + "log10": funcLog10, + "log2": funcLog2, + "last_over_time": funcLastOverTime, + "mad_over_time": funcMadOverTime, + "max_over_time": funcMaxOverTime, + "min_over_time": funcMinOverTime, + "minute": funcMinute, + "month": funcMonth, + "pi": funcPi, + "predict_linear": funcPredictLinear, + "present_over_time": funcPresentOverTime, + "quantile_over_time": funcQuantileOverTime, + "rad": funcRad, + "rate": funcRate, + "resets": funcResets, + "round": funcRound, + "scalar": funcScalar, + "sgn": funcSgn, + "sin": funcSin, + "sinh": funcSinh, + "sort": funcSort, + "sort_desc": funcSortDesc, + "sort_by_label": funcSortByLabel, + "sort_by_label_desc": funcSortByLabelDesc, + "sqrt": funcSqrt, + "stddev_over_time": funcStddevOverTime, + "stdvar_over_time": funcStdvarOverTime, + "sum_over_time": funcSumOverTime, + "tan": funcTan, + "tanh": funcTanh, + "time": funcTime, + "timestamp": funcTimestamp, + "vector": funcVector, + "year": funcYear, } // AtModifierUnsafeFunctions are the functions whose result diff --git a/promql/fuzz.go b/promql/fuzz.go index 5f08e6a72c..759055fb0d 100644 --- a/promql/fuzz.go +++ b/promql/fuzz.go @@ -61,17 +61,13 @@ const ( var symbolTable = labels.NewSymbolTable() func fuzzParseMetricWithContentType(in []byte, contentType string) int { - p, warning := textparse.New(in, contentType, false, symbolTable) - if warning != nil { + p, warning := textparse.New(in, contentType, "", false, false, symbolTable) + if p == nil || warning != nil { // An invalid content type is being passed, which should not happen // in this context. panic(warning) } - if contentType == "application/openmetrics-text" { - p = textparse.NewOpenMetricsParser(in, symbolTable) - } - var err error for { _, err = p.Next() @@ -95,7 +91,7 @@ func fuzzParseMetricWithContentType(in []byte, contentType string) int { // Note that this is not the parser for the text-based exposition-format; that // lives in github.com/prometheus/client_golang/text. func FuzzParseMetric(in []byte) int { - return fuzzParseMetricWithContentType(in, "") + return fuzzParseMetricWithContentType(in, "text/plain") } func FuzzParseOpenMetric(in []byte) int { diff --git a/promql/fuzz_test.go b/promql/fuzz_test.go index 1f0bbaa662..4a26798ded 100644 --- a/promql/fuzz_test.go +++ b/promql/fuzz_test.go @@ -29,7 +29,7 @@ func TestfuzzParseMetricWithContentTypePanicOnInvalid(t *testing.T) { } else { err, ok := p.(error) require.True(t, ok) - require.Contains(t, err.Error(), "duplicate parameter name") + require.ErrorContains(t, err, "duplicate parameter name") } }() diff --git a/promql/histogram_stats_iterator_test.go b/promql/histogram_stats_iterator_test.go index 7a2953d3e2..ea0e8b469f 100644 --- a/promql/histogram_stats_iterator_test.go +++ b/promql/histogram_stats_iterator_test.go @@ -14,7 +14,6 @@ package promql import ( - "fmt" "math" "testing" @@ -108,7 +107,7 @@ func TestHistogramStatsDecoding(t *testing.T) { decodedStats = append(decodedStats, h) } for i := 0; i < len(tc.histograms); i++ { - require.Equal(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, fmt.Sprintf("mismatch in counter reset hint for histogram %d", i)) + require.Equalf(t, tc.expectedHints[i], decodedStats[i].CounterResetHint, "mismatch in counter reset hint for histogram %d", i) h := tc.histograms[i] if value.IsStaleNaN(h.Sum) { require.True(t, value.IsStaleNaN(decodedStats[i].Sum)) diff --git a/promql/info.go b/promql/info.go new file mode 100644 index 0000000000..3fe9a2ce99 --- /dev/null +++ b/promql/info.go @@ -0,0 +1,454 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + + "github.com/grafana/regexp" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/annotations" +) + +const targetInfo = "target_info" + +// identifyingLabels are the labels we consider as identifying for info metrics. +// Currently hard coded, so we don't need knowledge of individual info metrics. +var identifyingLabels = []string{"instance", "job"} + +// evalInfo implements the info PromQL function. +func (ev *evaluator) evalInfo(ctx context.Context, args parser.Expressions) (parser.Value, annotations.Annotations) { + val, annots := ev.eval(ctx, args[0]) + mat := val.(Matrix) + // Map from data label name to matchers. + dataLabelMatchers := map[string][]*labels.Matcher{} + var infoNameMatchers []*labels.Matcher + if len(args) > 1 { + // TODO: Introduce a dedicated LabelSelector type. + labelSelector := args[1].(*parser.VectorSelector) + for _, m := range labelSelector.LabelMatchers { + dataLabelMatchers[m.Name] = append(dataLabelMatchers[m.Name], m) + if m.Name == labels.MetricName { + infoNameMatchers = append(infoNameMatchers, m) + } + } + } else { + infoNameMatchers = []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)} + } + + // Don't try to enrich info series. + ignoreSeries := map[int]struct{}{} +loop: + for i, s := range mat { + name := s.Metric.Get(labels.MetricName) + for _, m := range infoNameMatchers { + if m.Matches(name) { + ignoreSeries[i] = struct{}{} + continue loop + } + } + } + + selectHints := ev.infoSelectHints(args[0]) + infoSeries, ws, err := ev.fetchInfoSeries(ctx, mat, ignoreSeries, dataLabelMatchers, selectHints) + if err != nil { + ev.error(err) + } + annots.Merge(ws) + + res, ws := ev.combineWithInfoSeries(ctx, mat, infoSeries, ignoreSeries, dataLabelMatchers) + annots.Merge(ws) + return res, annots +} + +// infoSelectHints calculates the storage.SelectHints for selecting info series, given expr (first argument to info call). +func (ev *evaluator) infoSelectHints(expr parser.Expr) storage.SelectHints { + var nodeTimestamp *int64 + var offset int64 + parser.Inspect(expr, func(node parser.Node, path []parser.Node) error { + switch n := node.(type) { + case *parser.VectorSelector: + if n.Timestamp != nil { + nodeTimestamp = n.Timestamp + } + offset = durationMilliseconds(n.OriginalOffset) + return errors.New("end traversal") + default: + return nil + } + }) + + start := ev.startTimestamp + end := ev.endTimestamp + if nodeTimestamp != nil { + // The timestamp on the selector overrides everything. + start = *nodeTimestamp + end = *nodeTimestamp + } + // Reduce the start by one fewer ms than the lookback delta + // because wo want to exclude samples that are precisely the + // lookback delta before the eval time. + start -= durationMilliseconds(ev.lookbackDelta) - 1 + start -= offset + end -= offset + + return storage.SelectHints{ + Start: start, + End: end, + Step: ev.interval, + Func: "info", + } +} + +// fetchInfoSeries fetches info series given matching identifying labels in mat. +// Series in ignoreSeries are not fetched. +// dataLabelMatchers may be mutated. +func (ev *evaluator) fetchInfoSeries(ctx context.Context, mat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher, selectHints storage.SelectHints) (Matrix, annotations.Annotations, error) { + // A map of values for all identifying labels we are interested in. + idLblValues := map[string]map[string]struct{}{} + for i, s := range mat { + if _, exists := ignoreSeries[i]; exists { + continue + } + + // Register relevant values per identifying label for this series. + for _, l := range identifyingLabels { + val := s.Metric.Get(l) + if val == "" { + continue + } + + if idLblValues[l] == nil { + idLblValues[l] = map[string]struct{}{} + } + idLblValues[l][val] = struct{}{} + } + } + if len(idLblValues) == 0 { + return nil, nil, nil + } + + // Generate regexps for every interesting value per identifying label. + var sb strings.Builder + idLblRegexps := make(map[string]string, len(idLblValues)) + for name, vals := range idLblValues { + sb.Reset() + i := 0 + for v := range vals { + if i > 0 { + sb.WriteRune('|') + } + sb.WriteString(regexp.QuoteMeta(v)) + i++ + } + idLblRegexps[name] = sb.String() + } + + var infoLabelMatchers []*labels.Matcher + for name, re := range idLblRegexps { + infoLabelMatchers = append(infoLabelMatchers, labels.MustNewMatcher(labels.MatchRegexp, name, re)) + } + var nameMatcher *labels.Matcher + for name, ms := range dataLabelMatchers { + for i, m := range ms { + if m.Name == labels.MetricName { + nameMatcher = m + ms = slices.Delete(ms, i, i+1) + } + infoLabelMatchers = append(infoLabelMatchers, m) + } + if len(ms) > 0 { + dataLabelMatchers[name] = ms + } else { + delete(dataLabelMatchers, name) + } + } + if nameMatcher == nil { + // Default to using the target_info metric. + infoLabelMatchers = append([]*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, targetInfo)}, infoLabelMatchers...) + } + + infoIt := ev.querier.Select(ctx, false, &selectHints, infoLabelMatchers...) + infoSeries, ws, err := expandSeriesSet(ctx, infoIt) + if err != nil { + return nil, ws, err + } + + infoMat := ev.evalSeries(ctx, infoSeries, 0, true) + return infoMat, ws, nil +} + +// combineWithInfoSeries combines mat with select data labels from infoMat. +func (ev *evaluator) combineWithInfoSeries(ctx context.Context, mat, infoMat Matrix, ignoreSeries map[int]struct{}, dataLabelMatchers map[string][]*labels.Matcher) (Matrix, annotations.Annotations) { + buf := make([]byte, 0, 1024) + lb := labels.NewScratchBuilder(0) + sigFunction := func(name string) func(labels.Labels) string { + return func(lset labels.Labels) string { + lb.Reset() + lb.Add(labels.MetricName, name) + lset.MatchLabels(true, identifyingLabels...).Range(func(l labels.Label) { + lb.Add(l.Name, l.Value) + }) + lb.Sort() + return string(lb.Labels().Bytes(buf)) + } + } + + infoMetrics := map[string]struct{}{} + for _, is := range infoMat { + lblMap := is.Metric.Map() + infoMetrics[lblMap[labels.MetricName]] = struct{}{} + } + sigfs := make(map[string]func(labels.Labels) string, len(infoMetrics)) + for name := range infoMetrics { + sigfs[name] = sigFunction(name) + } + + // Keep a copy of the original point slices so they can be returned to the pool. + origMatrices := []Matrix{ + make(Matrix, len(mat)), + make(Matrix, len(infoMat)), + } + copy(origMatrices[0], mat) + copy(origMatrices[1], infoMat) + + numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1 + originalNumSamples := ev.currentSamples + + // Create an output vector that is as big as the input matrix with + // the most time series. + biggestLen := max(len(mat), len(infoMat)) + baseVector := make(Vector, 0, len(mat)) + infoVector := make(Vector, 0, len(infoMat)) + enh := &EvalNodeHelper{ + Out: make(Vector, 0, biggestLen), + } + type seriesAndTimestamp struct { + Series + ts int64 + } + seriess := make(map[uint64]seriesAndTimestamp, biggestLen) // Output series by series hash. + tempNumSamples := ev.currentSamples + + // For every base series, compute signature per info metric. + baseSigs := make([]map[string]string, 0, len(mat)) + for _, s := range mat { + sigs := make(map[string]string, len(infoMetrics)) + for infoName := range infoMetrics { + sigs[infoName] = sigfs[infoName](s.Metric) + } + baseSigs = append(baseSigs, sigs) + } + + infoSigs := make([]string, 0, len(infoMat)) + for _, s := range infoMat { + name := s.Metric.Map()[labels.MetricName] + infoSigs = append(infoSigs, sigfs[name](s.Metric)) + } + + var warnings annotations.Annotations + for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval { + if err := contextDone(ctx, "expression evaluation"); err != nil { + ev.error(err) + } + + // Reset number of samples in memory after each timestamp. + ev.currentSamples = tempNumSamples + // Gather input vectors for this timestamp. + baseVector, _ = ev.gatherVector(ts, mat, baseVector, nil, nil) + infoVector, _ = ev.gatherVector(ts, infoMat, infoVector, nil, nil) + + enh.Ts = ts + result, err := ev.combineWithInfoVector(baseVector, infoVector, ignoreSeries, baseSigs, infoSigs, enh, dataLabelMatchers) + if err != nil { + ev.error(err) + } + enh.Out = result[:0] // Reuse result vector. + + vecNumSamples := result.TotalSamples() + ev.currentSamples += vecNumSamples + // When we reset currentSamples to tempNumSamples during the next iteration of the loop it also + // needs to include the samples from the result here, as they're still in memory. + tempNumSamples += vecNumSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + if ev.currentSamples > ev.maxSamples { + ev.error(ErrTooManySamples(env)) + } + + // Add samples in result vector to output series. + for _, sample := range result { + h := sample.Metric.Hash() + ss, exists := seriess[h] + if exists { + if ss.ts == ts { // If we've seen this output series before at this timestamp, it's a duplicate. + ev.errorf("vector cannot contain metrics with the same labelset") + } + ss.ts = ts + } else { + ss = seriesAndTimestamp{Series{Metric: sample.Metric}, ts} + } + addToSeries(&ss.Series, enh.Ts, sample.F, sample.H, numSteps) + seriess[h] = ss + } + } + + // Reuse the original point slices. + for _, m := range origMatrices { + for _, s := range m { + putFPointSlice(s.Floats) + putHPointSlice(s.Histograms) + } + } + // Assemble the output matrix. By the time we get here we know we don't have too many samples. + numSamples := 0 + output := make(Matrix, 0, len(seriess)) + for _, ss := range seriess { + numSamples += len(ss.Floats) + totalHPointSize(ss.Histograms) + output = append(output, ss.Series) + } + ev.currentSamples = originalNumSamples + numSamples + ev.samplesStats.UpdatePeak(ev.currentSamples) + return output, warnings +} + +// combineWithInfoVector combines base and info Vectors. +// Base series in ignoreSeries are not combined. +func (ev *evaluator) combineWithInfoVector(base, info Vector, ignoreSeries map[int]struct{}, baseSigs []map[string]string, infoSigs []string, enh *EvalNodeHelper, dataLabelMatchers map[string][]*labels.Matcher) (Vector, error) { + if len(base) == 0 { + return nil, nil // Short-circuit: nothing is going to match. + } + + // All samples from the info Vector hashed by the matching label/values. + if enh.rightSigs == nil { + enh.rightSigs = make(map[string]Sample, len(enh.Out)) + } else { + clear(enh.rightSigs) + } + + for i, s := range info { + if s.H != nil { + ev.error(errors.New("info sample should be float")) + } + // We encode original info sample timestamps via the float value. + origT := int64(s.F) + + sig := infoSigs[i] + if existing, exists := enh.rightSigs[sig]; exists { + // We encode original info sample timestamps via the float value. + existingOrigT := int64(existing.F) + switch { + case existingOrigT > origT: + // Keep the other info sample, since it's newer. + case existingOrigT < origT: + // Keep this info sample, since it's newer. + enh.rightSigs[sig] = s + default: + // The two info samples have the same timestamp - conflict. + name := s.Metric.Map()[labels.MetricName] + ev.errorf("found duplicate series for info metric %s", name) + } + } else { + enh.rightSigs[sig] = s + } + } + + for i, bs := range base { + if _, exists := ignoreSeries[i]; exists { + // This series should not be enriched with info metric data labels. + enh.Out = append(enh.Out, Sample{ + Metric: bs.Metric, + F: bs.F, + H: bs.H, + }) + continue + } + + baseLabels := bs.Metric.Map() + enh.resetBuilder(labels.Labels{}) + + // For every info metric name, try to find an info series with the same signature. + seenInfoMetrics := map[string]struct{}{} + for infoName, sig := range baseSigs[i] { + is, exists := enh.rightSigs[sig] + if !exists { + continue + } + if _, exists := seenInfoMetrics[infoName]; exists { + continue + } + + err := is.Metric.Validate(func(l labels.Label) error { + if l.Name == labels.MetricName { + return nil + } + if _, exists := dataLabelMatchers[l.Name]; len(dataLabelMatchers) > 0 && !exists { + // Not among the specified data label matchers. + return nil + } + + if v := enh.lb.Get(l.Name); v != "" && v != l.Value { + return fmt.Errorf("conflicting label: %s", l.Name) + } + if _, exists := baseLabels[l.Name]; exists { + // Skip labels already on the base metric. + return nil + } + + enh.lb.Set(l.Name, l.Value) + return nil + }) + if err != nil { + return nil, err + } + seenInfoMetrics[infoName] = struct{}{} + } + + infoLbls := enh.lb.Labels() + if infoLbls.Len() == 0 { + // If there's at least one data label matcher not matching the empty string, + // we have to ignore this series as there are no matching info series. + allMatchersMatchEmpty := true + for _, ms := range dataLabelMatchers { + for _, m := range ms { + if !m.Matches("") { + allMatchersMatchEmpty = false + break + } + } + } + if !allMatchersMatchEmpty { + continue + } + } + + enh.resetBuilder(bs.Metric) + infoLbls.Range(func(l labels.Label) { + enh.lb.Set(l.Name, l.Value) + }) + + enh.Out = append(enh.Out, Sample{ + Metric: enh.lb.Labels(), + F: bs.F, + H: bs.H, + }) + } + return enh.Out, nil +} diff --git a/promql/info_test.go b/promql/info_test.go new file mode 100644 index 0000000000..2e7a67172f --- /dev/null +++ b/promql/info_test.go @@ -0,0 +1,140 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package promql_test + +import ( + "testing" + + "github.com/prometheus/prometheus/promql/promqltest" +) + +// The "info" function is experimental. This is why we write those tests here for now instead of promqltest/testdata/info.test. +func TestInfo(t *testing.T) { + engine := promqltest.NewTestEngine(t, false, 0, promqltest.DefaultMaxSamplesPerQuery) + promqltest.RunTest(t, ` +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + metric_with_overlapping_label{instance="a", job="1", label="value", data="base"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 1 + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +# Include one info metric data label. +eval range from 0m to 10m step 5m info(metric, {data=~".+"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Include all info metric data labels. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try including all info metric data labels, but non-matching identifying labels. +eval range from 0m to 10m step 5m info(metric_not_matching_target_info) + metric_not_matching_target_info{instance="a", job="2", label="value"} 0 1 2 + +# Try including a certain info metric data label with a non-matching matcher not accepting empty labels. +# Metric is ignored, due there being a data label matcher not matching empty labels, +# and there being no info series matches. +eval range from 0m to 10m step 5m info(metric, {non_existent=~".+"}) + +# Include a certain info metric data label together with a non-matching matcher accepting empty labels. +# Since the non_existent matcher matches empty labels, it's simply ignored when there's no match. +# XXX: This case has to include a matcher not matching empty labels, due the PromQL limitation +# that vector selectors have to contain at least one matcher not accepting empty labels. +# We might need another construct than vector selector to get around this limitation. +eval range from 0m to 10m step 5m info(metric, {data=~".+", non_existent=~".*"}) + metric{data="info", instance="a", job="1", label="value"} 0 1 2 + +# Info series data labels overlapping with those of base series are ignored. +eval range from 0m to 10m step 5m info(metric_with_overlapping_label) + metric_with_overlapping_label{data="base", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Include data labels from target_info specifically. +eval range from 0m to 10m step 5m info(metric, {__name__="target_info"}) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 2 + +# Try to include all data labels from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent"}) + metric{instance="a", job="1", label="value"} 0 1 2 + +# Try to include a certain data label from a non-existent info metric. +eval range from 0m to 10m step 5m info(metric, {__name__="non_existent", data=~".+"}) + +# Include data labels from build_info. +eval range from 0m to 10m step 5m info(metric, {__name__="build_info"}) + metric{instance="a", job="1", label="value", build_data="build"} 0 1 2 + +# Include data labels from build_info and target_info. +eval range from 0m to 10m step 5m info(metric, {__name__=~".+_info"}) + metric{instance="a", job="1", label="value", build_data="build", data="info", another_data="another info"} 0 1 2 + +# Info metrics themselves are ignored when it comes to enriching with info metric data labels. +eval range from 0m to 10m step 5m info(build_info, {__name__=~".+_info", build_data=~".+"}) + build_info{instance="a", job="1", build_data="build"} 1 1 1 + +clear + +# Overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info", another_data="another info"} 1 1 _ + target_info{instance="a", job="1", data="updated info", another_data="another info"} _ _ 1 + +# Conflicting info series are resolved through picking the latest sample. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value", another_data="another info"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value", another_data="another info"} _ _ 2 + +clear + +# Non-overlapping target_info series. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + target_info{instance="a", job="1", data="info"} 1 1 stale + target_info{instance="a", job="1", data="updated info"} _ _ 1 + +# Include info metric data labels from a metric which data labels change over time. +eval range from 0m to 10m step 5m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 1 _ + metric{data="updated info", instance="a", job="1", label="value"} _ _ 2 + +clear + +# Info series selector matches histogram series, info metrics should be float type. +load 5m + metric{instance="a", job="1", label="value"} 0 1 2 + histogram{instance="a", job="1"} {{schema:1 sum:3 count:22 buckets:[5 10 7]}} + +eval_fail range from 0m to 10m step 5m info(metric, {__name__="histogram"}) + +clear + +# Series with skipped scrape. +load 1m + metric{instance="a", job="1", label="value"} 0 _ 2 3 4 + target_info{instance="a", job="1", data="info"} 1 _ 1 1 1 + +# Lookback works also for the info series. +eval range from 1m to 4m step 1m info(metric) + metric{data="info", instance="a", job="1", label="value"} 0 2 3 4 + +# @ operator works also with info. +# Note that we pick the timestamp missing a sample, lookback should pick previous sample. +eval range from 1m to 4m step 1m info(metric @ 60) + metric{data="info", instance="a", job="1", label="value"} 0 0 0 0 + +# offset operator works also with info. +eval range from 1m to 4m step 1m info(metric offset 1m) + metric{data="info", instance="a", job="1", label="value"} 0 0 2 3 +`, engine) +} diff --git a/promql/parser/ast.go b/promql/parser/ast.go index 162d7817ab..132ef3f0d2 100644 --- a/promql/parser/ast.go +++ b/promql/parser/ast.go @@ -208,6 +208,10 @@ type VectorSelector struct { UnexpandedSeriesSet storage.SeriesSet Series []storage.Series + // BypassEmptyMatcherCheck is true when the VectorSelector isn't required to have at least one matcher matching the empty string. + // This is the case when VectorSelector is used to represent the info function's second argument. + BypassEmptyMatcherCheck bool + PosRange posrange.PositionRange } diff --git a/promql/parser/functions.go b/promql/parser/functions.go index 99b41321fe..aa65aca275 100644 --- a/promql/parser/functions.go +++ b/promql/parser/functions.go @@ -202,10 +202,11 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeScalar, ValueTypeVector}, ReturnType: ValueTypeVector, }, - "holt_winters": { - Name: "holt_winters", - ArgTypes: []ValueType{ValueTypeMatrix, ValueTypeScalar, ValueTypeScalar}, - ReturnType: ValueTypeVector, + "double_exponential_smoothing": { + Name: "double_exponential_smoothing", + ArgTypes: []ValueType{ValueTypeMatrix, ValueTypeScalar, ValueTypeScalar}, + ReturnType: ValueTypeVector, + Experimental: true, }, "hour": { Name: "hour", @@ -223,6 +224,13 @@ var Functions = map[string]*Function{ ArgTypes: []ValueType{ValueTypeMatrix}, ReturnType: ValueTypeVector, }, + "info": { + Name: "info", + ArgTypes: []ValueType{ValueTypeVector, ValueTypeVector}, + ReturnType: ValueTypeVector, + Experimental: true, + Variadic: 1, + }, "irate": { Name: "irate", ArgTypes: []ValueType{ValueTypeMatrix}, diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index da24be0c44..c321a1e973 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -667,10 +667,16 @@ label_set_list : label_set_list COMMA label_set_item label_set_item : IDENTIFIER EQL STRING { $$ = labels.Label{Name: $1.Val, Value: yylex.(*parser).unquoteString($3.Val) } } + | string_identifier EQL STRING + { $$ = labels.Label{Name: $1.Val, Value: yylex.(*parser).unquoteString($3.Val) } } | IDENTIFIER EQL error { yylex.(*parser).unexpected("label set", "string"); $$ = labels.Label{}} + | string_identifier EQL error + { yylex.(*parser).unexpected("label set", "string"); $$ = labels.Label{}} | IDENTIFIER error { yylex.(*parser).unexpected("label set", "\"=\""); $$ = labels.Label{}} + | string_identifier error + { yylex.(*parser).unexpected("label set", "\"=\""); $$ = labels.Label{}} | error { yylex.(*parser).unexpected("label set", "identifier or \"}\""); $$ = labels.Label{} } ; @@ -818,12 +824,12 @@ histogram_desc_item $$ = yylex.(*parser).newMap() $$["sum"] = $3 } - | COUNT_DESC COLON number + | COUNT_DESC COLON signed_or_unsigned_number { $$ = yylex.(*parser).newMap() $$["count"] = $3 } - | ZERO_BUCKET_DESC COLON number + | ZERO_BUCKET_DESC COLON signed_or_unsigned_number { $$ = yylex.(*parser).newMap() $$["z_bucket"] = $3 @@ -875,11 +881,11 @@ bucket_set : LEFT_BRACKET bucket_set_list SPACE RIGHT_BRACKET } ; -bucket_set_list : bucket_set_list SPACE number +bucket_set_list : bucket_set_list SPACE signed_or_unsigned_number { $$ = append($1, $3) } - | number + | signed_or_unsigned_number { $$ = []float64{$1} } diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go index 22231f73e2..8979410ceb 100644 --- a/promql/parser/generated_parser.y.go +++ b/promql/parser/generated_parser.y.go @@ -251,293 +251,295 @@ var yyExca = [...]int16{ 1, -1, -2, 0, -1, 37, - 1, 138, - 10, 138, - 24, 138, + 1, 141, + 10, 141, + 24, 141, -2, 0, -1, 61, - 2, 181, - 15, 181, - 79, 181, - 85, 181, - -2, 102, - -1, 62, - 2, 182, - 15, 182, - 79, 182, - 85, 182, - -2, 103, - -1, 63, - 2, 183, - 15, 183, - 79, 183, - 85, 183, - -2, 105, - -1, 64, 2, 184, 15, 184, 79, 184, 85, 184, - -2, 106, - -1, 65, + -2, 102, + -1, 62, 2, 185, 15, 185, 79, 185, 85, 185, - -2, 107, - -1, 66, + -2, 103, + -1, 63, 2, 186, 15, 186, 79, 186, 85, 186, - -2, 112, - -1, 67, + -2, 105, + -1, 64, 2, 187, 15, 187, 79, 187, 85, 187, - -2, 114, - -1, 68, + -2, 106, + -1, 65, 2, 188, 15, 188, 79, 188, 85, 188, - -2, 116, - -1, 69, + -2, 107, + -1, 66, 2, 189, 15, 189, 79, 189, 85, 189, - -2, 117, - -1, 70, + -2, 112, + -1, 67, 2, 190, 15, 190, 79, 190, 85, 190, - -2, 118, - -1, 71, + -2, 114, + -1, 68, 2, 191, 15, 191, 79, 191, 85, 191, - -2, 119, - -1, 72, + -2, 116, + -1, 69, 2, 192, 15, 192, 79, 192, 85, 192, - -2, 120, - -1, 73, + -2, 117, + -1, 70, 2, 193, 15, 193, 79, 193, 85, 193, - -2, 124, - -1, 74, + -2, 118, + -1, 71, 2, 194, 15, 194, 79, 194, 85, 194, + -2, 119, + -1, 72, + 2, 195, + 15, 195, + 79, 195, + 85, 195, + -2, 120, + -1, 73, + 2, 196, + 15, 196, + 79, 196, + 85, 196, + -2, 124, + -1, 74, + 2, 197, + 15, 197, + 79, 197, + 85, 197, -2, 125, - -1, 200, - 9, 243, - 12, 243, - 13, 243, - 18, 243, - 19, 243, - 25, 243, - 41, 243, - 47, 243, - 48, 243, - 51, 243, - 57, 243, - 62, 243, - 63, 243, - 64, 243, - 65, 243, - 66, 243, - 67, 243, - 68, 243, - 69, 243, - 70, 243, - 71, 243, - 72, 243, - 73, 243, - 74, 243, - 75, 243, - 79, 243, - 83, 243, - 85, 243, - 88, 243, - 89, 243, + -1, 205, + 9, 246, + 12, 246, + 13, 246, + 18, 246, + 19, 246, + 25, 246, + 41, 246, + 47, 246, + 48, 246, + 51, 246, + 57, 246, + 62, 246, + 63, 246, + 64, 246, + 65, 246, + 66, 246, + 67, 246, + 68, 246, + 69, 246, + 70, 246, + 71, 246, + 72, 246, + 73, 246, + 74, 246, + 75, 246, + 79, 246, + 83, 246, + 85, 246, + 88, 246, + 89, 246, -2, 0, - -1, 201, - 9, 243, - 12, 243, - 13, 243, - 18, 243, - 19, 243, - 25, 243, - 41, 243, - 47, 243, - 48, 243, - 51, 243, - 57, 243, - 62, 243, - 63, 243, - 64, 243, - 65, 243, - 66, 243, - 67, 243, - 68, 243, - 69, 243, - 70, 243, - 71, 243, - 72, 243, - 73, 243, - 74, 243, - 75, 243, - 79, 243, - 83, 243, - 85, 243, - 88, 243, - 89, 243, + -1, 206, + 9, 246, + 12, 246, + 13, 246, + 18, 246, + 19, 246, + 25, 246, + 41, 246, + 47, 246, + 48, 246, + 51, 246, + 57, 246, + 62, 246, + 63, 246, + 64, 246, + 65, 246, + 66, 246, + 67, 246, + 68, 246, + 69, 246, + 70, 246, + 71, 246, + 72, 246, + 73, 246, + 74, 246, + 75, 246, + 79, 246, + 83, 246, + 85, 246, + 88, 246, + 89, 246, -2, 0, } const yyPrivate = 57344 -const yyLast = 799 +const yyLast = 804 var yyAct = [...]int16{ - 155, 334, 332, 276, 339, 152, 226, 39, 192, 44, - 291, 290, 156, 118, 82, 178, 229, 107, 106, 346, - 347, 348, 349, 109, 108, 198, 239, 199, 133, 110, - 105, 60, 245, 121, 6, 329, 325, 111, 328, 228, - 200, 201, 160, 119, 304, 267, 293, 128, 260, 160, - 151, 261, 159, 302, 358, 311, 122, 55, 89, 159, - 196, 241, 242, 259, 113, 243, 114, 54, 98, 99, - 302, 112, 101, 256, 104, 88, 230, 232, 234, 235, - 236, 244, 246, 249, 250, 251, 252, 253, 257, 258, - 160, 115, 231, 233, 237, 238, 240, 247, 248, 103, - 159, 109, 254, 255, 324, 150, 357, 110, 333, 218, - 111, 340, 310, 149, 77, 163, 7, 105, 35, 173, - 167, 170, 161, 323, 165, 356, 166, 309, 355, 194, - 2, 3, 4, 5, 308, 322, 184, 197, 162, 186, - 321, 195, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 229, 129, 101, - 217, 104, 219, 220, 190, 266, 270, 239, 160, 121, - 268, 193, 264, 245, 55, 196, 154, 225, 159, 119, - 228, 271, 188, 160, 54, 161, 103, 117, 265, 84, - 262, 299, 122, 159, 320, 263, 298, 272, 10, 83, - 161, 162, 241, 242, 269, 187, 243, 185, 79, 288, - 289, 297, 319, 292, 256, 161, 162, 230, 232, 234, - 235, 236, 244, 246, 249, 250, 251, 252, 253, 257, - 258, 162, 294, 231, 233, 237, 238, 240, 247, 248, - 318, 317, 316, 254, 255, 180, 315, 134, 135, 136, - 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, - 147, 148, 157, 158, 169, 105, 314, 296, 300, 301, - 303, 223, 305, 313, 55, 222, 179, 168, 180, 84, - 306, 307, 177, 125, 54, 182, 295, 176, 124, 83, - 221, 312, 87, 89, 8, 181, 183, 81, 37, 86, - 175, 123, 36, 98, 99, 326, 327, 101, 102, 104, - 88, 127, 331, 126, 50, 336, 337, 338, 182, 335, - 78, 1, 342, 341, 344, 343, 49, 48, 181, 183, - 350, 351, 47, 55, 103, 352, 53, 77, 164, 56, - 46, 354, 22, 54, 59, 55, 172, 9, 9, 57, - 132, 45, 43, 130, 171, 54, 359, 42, 131, 41, - 40, 51, 191, 353, 273, 75, 85, 189, 224, 80, - 345, 18, 19, 120, 153, 20, 58, 227, 52, 116, - 0, 76, 0, 0, 0, 0, 61, 62, 63, 64, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 0, 0, 0, 13, 0, 0, 0, 24, 0, 30, - 0, 0, 31, 32, 55, 38, 0, 53, 77, 0, - 56, 275, 0, 22, 54, 0, 0, 0, 274, 0, - 57, 0, 278, 279, 277, 284, 286, 283, 285, 280, - 281, 282, 287, 0, 0, 0, 75, 0, 0, 0, - 0, 0, 18, 19, 0, 0, 20, 0, 0, 0, - 0, 0, 76, 0, 0, 0, 0, 61, 62, 63, - 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 0, 0, 0, 13, 0, 0, 0, 24, 0, - 30, 0, 55, 31, 32, 53, 77, 0, 56, 330, - 0, 22, 54, 0, 0, 0, 0, 0, 57, 0, - 278, 279, 277, 284, 286, 283, 285, 280, 281, 282, - 287, 0, 0, 0, 75, 0, 0, 0, 0, 0, - 18, 19, 0, 0, 20, 0, 0, 0, 17, 77, - 76, 0, 0, 0, 22, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 0, - 0, 0, 13, 0, 0, 0, 24, 0, 30, 0, - 0, 31, 32, 18, 19, 0, 0, 20, 0, 0, - 0, 17, 35, 0, 0, 0, 0, 22, 11, 12, - 14, 15, 16, 21, 23, 25, 26, 27, 28, 29, - 33, 34, 0, 0, 0, 13, 0, 0, 0, 24, - 0, 30, 0, 0, 31, 32, 18, 19, 0, 0, - 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 11, 12, 14, 15, 16, 21, 23, 25, 26, - 27, 28, 29, 33, 34, 105, 0, 0, 13, 0, - 0, 0, 24, 174, 30, 0, 0, 31, 32, 0, - 0, 0, 0, 0, 105, 0, 0, 0, 0, 0, - 0, 0, 87, 89, 90, 0, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 0, 101, 102, 104, - 88, 87, 89, 90, 0, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 0, 101, 102, 104, 88, - 105, 0, 0, 0, 103, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, - 0, 0, 0, 103, 0, 0, 0, 87, 89, 90, - 0, 91, 92, 93, 0, 95, 96, 97, 98, 99, - 100, 0, 101, 102, 104, 88, 87, 89, 90, 0, - 91, 92, 0, 0, 95, 96, 0, 98, 99, 100, - 0, 101, 102, 104, 88, 0, 0, 0, 0, 103, + 155, 339, 337, 158, 344, 231, 39, 197, 281, 44, + 296, 295, 84, 120, 82, 181, 109, 108, 351, 352, + 353, 354, 107, 111, 203, 136, 204, 159, 154, 112, + 205, 206, 234, 6, 271, 55, 163, 163, 107, 334, + 333, 307, 244, 275, 309, 54, 162, 162, 250, 363, + 91, 272, 330, 131, 362, 233, 60, 270, 276, 110, + 100, 101, 298, 115, 103, 116, 106, 90, 164, 164, + 114, 265, 113, 361, 277, 307, 360, 246, 247, 338, + 103, 248, 106, 153, 165, 165, 264, 316, 201, 261, + 122, 105, 235, 237, 239, 240, 241, 249, 251, 254, + 255, 256, 257, 258, 262, 263, 273, 105, 236, 238, + 242, 243, 245, 252, 253, 152, 117, 166, 259, 260, + 176, 164, 170, 173, 163, 168, 223, 169, 172, 2, + 3, 4, 5, 107, 162, 199, 111, 165, 187, 202, + 189, 171, 112, 269, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 218, 219, 220, 221, 200, + 89, 91, 113, 222, 123, 193, 268, 329, 224, 225, + 183, 100, 101, 191, 121, 103, 104, 106, 90, 7, + 85, 234, 266, 182, 55, 183, 328, 86, 192, 123, + 83, 244, 122, 267, 54, 132, 190, 250, 188, 121, + 345, 230, 105, 86, 233, 77, 35, 119, 304, 10, + 185, 327, 86, 303, 293, 294, 157, 315, 297, 79, + 184, 186, 326, 163, 274, 185, 246, 247, 302, 325, + 248, 324, 314, 162, 323, 184, 186, 299, 261, 313, + 322, 235, 237, 239, 240, 241, 249, 251, 254, 255, + 256, 257, 258, 262, 263, 164, 321, 236, 238, 242, + 243, 245, 252, 253, 180, 126, 320, 259, 260, 179, + 125, 165, 305, 319, 306, 308, 318, 310, 317, 130, + 88, 129, 178, 124, 311, 312, 137, 138, 139, 140, + 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, + 151, 195, 160, 161, 50, 163, 36, 167, 198, 331, + 78, 332, 201, 228, 55, 162, 85, 227, 1, 340, + 341, 342, 336, 49, 54, 343, 83, 347, 346, 349, + 348, 48, 226, 47, 81, 355, 356, 164, 55, 86, + 357, 53, 77, 301, 56, 8, 359, 22, 54, 37, + 55, 175, 46, 165, 57, 128, 135, 127, 45, 43, + 54, 364, 300, 59, 133, 174, 9, 9, 42, 134, + 75, 41, 40, 51, 196, 358, 18, 19, 278, 87, + 20, 194, 229, 80, 350, 156, 76, 58, 232, 52, + 118, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 0, 0, 0, 13, 0, + 0, 0, 24, 0, 30, 0, 0, 31, 32, 55, + 38, 0, 53, 77, 0, 56, 280, 0, 22, 54, + 0, 0, 0, 279, 0, 57, 0, 283, 284, 282, + 289, 291, 288, 290, 285, 286, 287, 292, 0, 0, + 0, 75, 0, 0, 0, 0, 0, 18, 19, 0, + 0, 20, 0, 0, 0, 0, 0, 76, 0, 0, + 0, 0, 61, 62, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 0, 0, 0, 13, + 0, 0, 0, 24, 0, 30, 0, 55, 31, 32, + 53, 77, 0, 56, 335, 0, 22, 54, 0, 0, + 0, 0, 0, 57, 0, 283, 284, 282, 289, 291, + 288, 290, 285, 286, 287, 292, 0, 0, 0, 75, + 0, 0, 0, 0, 0, 18, 19, 0, 0, 20, + 0, 0, 0, 17, 77, 76, 0, 0, 0, 22, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 0, 0, 0, 13, 0, 0, + 0, 24, 0, 30, 0, 0, 31, 32, 18, 19, + 0, 0, 20, 0, 0, 0, 17, 35, 0, 0, + 0, 0, 22, 11, 12, 14, 15, 16, 21, 23, + 25, 26, 27, 28, 29, 33, 34, 0, 0, 0, + 13, 0, 0, 0, 24, 0, 30, 0, 0, 31, + 32, 18, 19, 0, 0, 20, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 11, 12, 14, 15, + 16, 21, 23, 25, 26, 27, 28, 29, 33, 34, + 107, 0, 0, 13, 0, 0, 0, 24, 177, 30, + 0, 0, 31, 32, 0, 0, 0, 0, 0, 107, + 0, 0, 0, 0, 0, 0, 0, 89, 91, 92, + 0, 93, 94, 95, 96, 97, 98, 99, 100, 101, + 102, 0, 103, 104, 106, 90, 89, 91, 92, 0, + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, + 0, 103, 104, 106, 90, 107, 0, 0, 0, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 103, + 0, 0, 0, 0, 107, 0, 0, 0, 105, 0, + 0, 0, 89, 91, 92, 0, 93, 94, 95, 0, + 97, 98, 99, 100, 101, 102, 0, 103, 104, 106, + 90, 89, 91, 92, 0, 93, 94, 0, 0, 97, + 98, 0, 100, 101, 102, 0, 103, 104, 106, 90, + 0, 0, 0, 0, 105, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 105, } var yyPact = [...]int16{ - 32, 106, 569, 569, 405, 526, -1000, -1000, -1000, 105, + 31, 169, 574, 574, 410, 531, -1000, -1000, -1000, 193, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, 277, -1000, 297, -1000, 650, + -1000, -1000, -1000, -1000, -1000, 314, -1000, 278, -1000, 655, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, 22, 95, -1000, -1000, 483, -1000, 483, 101, + -1000, -1000, 57, 147, -1000, -1000, 488, -1000, 488, 192, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, -1000, -1000, -1000, -1000, 167, -1000, -1000, - 281, -1000, -1000, 309, -1000, 23, -1000, -50, -50, -50, - -50, -50, -50, -50, -50, -50, -50, -50, -50, -50, - -50, -50, -50, 48, 174, 336, 95, -56, -1000, 262, - 262, 324, -1000, 631, 103, -1000, 280, -1000, -1000, 274, - 241, -1000, -1000, -1000, 187, -1000, 180, -1000, 159, 483, - -1000, -57, -40, -1000, 483, 483, 483, 483, 483, 483, - 483, 483, 483, 483, 483, 483, 483, 483, 483, -1000, - 165, -1000, -1000, 94, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, 40, 40, 269, -1000, -1000, -1000, -1000, 155, -1000, - -1000, 41, -1000, 650, -1000, -1000, 31, -1000, 170, -1000, - -1000, -1000, -1000, -1000, 163, -1000, -1000, -1000, -1000, -1000, - 19, 144, 140, -1000, -1000, -1000, 404, 16, 262, 262, - 262, 262, 103, 103, 251, 251, 251, 715, 696, 251, - 251, 715, 103, 103, 251, 103, 16, -1000, 24, -1000, - -1000, -1000, 265, -1000, 189, -1000, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, -1000, -1000, -1000, 187, -1000, -1000, + 263, -1000, -1000, 353, 277, -1000, -1000, 29, -1000, -53, + -53, -53, -53, -53, -53, -53, -53, -53, -53, -53, + -53, -53, -53, -53, -53, 26, 214, 305, 147, -56, + -1000, 126, 126, 329, -1000, 636, 24, -1000, 262, -1000, + -1000, 181, 166, -1000, -1000, 178, -1000, 171, -1000, 163, + -1000, 296, 488, -1000, -58, -50, -1000, 488, 488, 488, + 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, + 488, 488, -1000, 175, -1000, -1000, 111, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, 115, 115, 311, -1000, -1000, -1000, + -1000, 179, -1000, -1000, 64, -1000, 655, -1000, -1000, 162, + -1000, 141, -1000, -1000, -1000, -1000, -1000, 32, -1000, -1000, + -1000, -1000, -1000, -1000, -1000, 25, 80, 17, -1000, -1000, + -1000, 409, 8, 126, 126, 126, 126, 24, 24, 119, + 119, 119, 720, 701, 119, 119, 720, 24, 24, 119, + 24, 8, -1000, 40, -1000, -1000, -1000, 341, -1000, 206, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - 483, -1000, -1000, -1000, -1000, -1000, -1000, 34, 34, 18, - 34, 44, 44, 110, 38, -1000, -1000, 285, 267, 260, - 240, 236, 235, 234, 206, 188, 134, 129, -1000, -1000, - -1000, -1000, -1000, -1000, 102, -1000, -1000, -1000, 14, -1000, - 650, -1000, -1000, -1000, 34, -1000, 12, 9, 482, -1000, - -1000, -1000, 51, 81, 40, 40, 40, 97, 97, 51, - 97, 51, -73, -1000, -1000, -1000, -1000, -1000, 34, 34, - -1000, -1000, -1000, 34, -1000, -1000, -1000, -1000, -1000, -1000, - 40, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, - -1000, -1000, -1000, 104, -1000, 33, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, -1000, 488, -1000, -1000, -1000, -1000, + -1000, -1000, 56, 56, 18, 56, 72, 72, 215, 70, + -1000, -1000, 272, 270, 267, 260, 250, 234, 228, 225, + 223, 216, 205, -1000, -1000, -1000, -1000, -1000, -1000, 165, + -1000, -1000, -1000, 30, -1000, 655, -1000, -1000, -1000, 56, + -1000, 14, 13, 487, -1000, -1000, -1000, 22, 27, 27, + 27, 115, 186, 186, 22, 186, 22, -74, -1000, -1000, + -1000, -1000, -1000, 56, 56, -1000, -1000, -1000, 56, -1000, + -1000, -1000, -1000, -1000, -1000, 27, -1000, -1000, -1000, -1000, + -1000, -1000, -1000, -1000, -1000, -1000, -1000, -1000, 52, -1000, + 28, -1000, -1000, -1000, -1000, } var yyPgo = [...]int16{ - 0, 379, 13, 378, 6, 15, 377, 344, 376, 374, - 373, 370, 198, 294, 369, 14, 368, 10, 11, 367, - 366, 8, 364, 3, 4, 363, 2, 1, 0, 362, - 12, 5, 361, 360, 18, 158, 359, 358, 7, 357, - 354, 17, 353, 31, 352, 9, 351, 350, 340, 332, - 327, 326, 314, 321, 302, + 0, 390, 13, 389, 5, 15, 388, 363, 387, 385, + 12, 384, 209, 345, 383, 14, 382, 10, 11, 381, + 379, 7, 378, 8, 4, 375, 2, 1, 3, 374, + 27, 0, 373, 372, 17, 195, 371, 369, 6, 368, + 365, 16, 364, 56, 359, 9, 358, 356, 352, 333, + 331, 323, 304, 318, 306, } var yyR1 = [...]int8{ @@ -554,18 +556,18 @@ var yyR1 = [...]int8{ 13, 13, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, - 14, 14, 14, 15, 15, 15, 15, 54, 20, 20, - 20, 20, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 29, 29, 29, 21, 21, 21, 21, 22, 22, - 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 24, 24, 25, 25, 25, 11, 11, 11, - 11, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, + 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, + 54, 20, 20, 20, 20, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 29, 29, 29, 21, 21, 21, + 21, 22, 22, 22, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 24, 24, 25, 25, 25, + 11, 11, 11, 11, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 8, 8, 5, 5, 5, 5, - 45, 45, 28, 28, 30, 30, 31, 31, 27, 26, - 26, 49, 10, 18, 18, + 6, 6, 6, 6, 6, 6, 6, 8, 8, 5, + 5, 5, 5, 45, 45, 28, 28, 30, 30, 31, + 31, 27, 26, 26, 49, 10, 18, 18, } var yyR2 = [...]int8{ @@ -582,18 +584,18 @@ var yyR2 = [...]int8{ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4, 2, 0, - 3, 1, 2, 3, 3, 2, 1, 2, 0, 3, - 2, 1, 1, 3, 1, 3, 4, 1, 3, 5, - 5, 1, 1, 1, 4, 3, 3, 2, 3, 1, - 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 4, 3, 3, 1, 2, 1, 1, 1, + 3, 1, 2, 3, 3, 3, 3, 2, 2, 1, + 2, 0, 3, 2, 1, 1, 3, 1, 3, 4, + 1, 3, 5, 5, 1, 1, 1, 4, 3, 3, + 2, 3, 1, 2, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 4, 3, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, - 1, 1, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, + 1, 1, 2, 1, 1, 1, 0, 1, } var yyChk = [...]int16{ @@ -605,34 +607,35 @@ var yyChk = [...]int16{ -52, -32, -3, 12, 19, 9, 15, 25, -8, -7, -43, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 41, 57, 13, -52, -12, - -14, 20, -15, 12, 2, -20, 2, 41, 59, 42, - 43, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 56, 57, 83, 58, 14, -34, -41, 2, 79, - 85, 15, -41, -38, -38, -43, -1, 20, -2, 12, - -10, 2, 25, 20, 7, 2, 4, 2, 24, -35, - -42, -37, -47, 78, -35, -35, -35, -35, -35, -35, - -35, -35, -35, -35, -35, -35, -35, -35, -35, -45, - 57, 2, -31, -9, 2, -28, -30, 88, 89, 19, - 9, 41, 57, -45, 2, -41, -34, -17, 15, 2, - -17, -40, 22, -38, 22, 20, 7, 2, -5, 2, - 4, 54, 44, 55, -5, 20, -15, 25, 2, -19, - 5, -29, -21, 12, -28, -30, 16, -38, 82, 84, - 80, 81, -38, -38, -38, -38, -38, -38, -38, -38, - -38, -38, -38, -38, -38, -38, -38, -45, 15, -28, - -28, 21, 6, 2, -16, 22, -4, -6, 25, 2, - 62, 78, 63, 79, 64, 65, 66, 80, 81, 12, - 82, 47, 48, 51, 67, 18, 68, 83, 84, 69, - 70, 71, 72, 73, 88, 89, 59, 74, 75, 22, - 7, 20, -2, 25, 2, 25, 2, 26, 26, -30, - 26, 41, 57, -22, 24, 17, -23, 30, 28, 29, - 35, 36, 37, 33, 31, 34, 32, 38, -17, -17, - -18, -17, -18, 22, -45, 21, 2, 22, 7, 2, - -38, -27, 19, -27, 26, -27, -21, -21, 24, 17, - 2, 17, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 21, 2, 22, -4, -27, 26, 26, - 17, -23, -26, 57, -27, -31, -28, -28, -28, -24, - 14, -24, -26, -24, -26, -11, 92, 93, 94, 95, - -27, -27, -27, -25, -28, 24, 21, 2, 21, -28, + -14, 20, -15, 12, -10, 2, 25, -20, 2, 41, + 59, 42, 43, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 56, 57, 83, 58, 14, -34, -41, + 2, 79, 85, 15, -41, -38, -38, -43, -1, 20, + -2, 12, -10, 2, 20, 7, 2, 4, 2, 4, + 2, 24, -35, -42, -37, -47, 78, -35, -35, -35, + -35, -35, -35, -35, -35, -35, -35, -35, -35, -35, + -35, -35, -45, 57, 2, -31, -9, 2, -28, -30, + 88, 89, 19, 9, 41, 57, -45, 2, -41, -34, + -17, 15, 2, -17, -40, 22, -38, 22, 20, 7, + 2, -5, 2, 4, 54, 44, 55, -5, 20, -15, + 25, 2, 25, 2, -19, 5, -29, -21, 12, -28, + -30, 16, -38, 82, 84, 80, 81, -38, -38, -38, + -38, -38, -38, -38, -38, -38, -38, -38, -38, -38, + -38, -38, -45, 15, -28, -28, 21, 6, 2, -16, + 22, -4, -6, 25, 2, 62, 78, 63, 79, 64, + 65, 66, 80, 81, 12, 82, 47, 48, 51, 67, + 18, 68, 83, 84, 69, 70, 71, 72, 73, 88, + 89, 59, 74, 75, 22, 7, 20, -2, 25, 2, + 25, 2, 26, 26, -30, 26, 41, 57, -22, 24, + 17, -23, 30, 28, 29, 35, 36, 37, 33, 31, + 34, 32, 38, -17, -17, -18, -17, -18, 22, -45, + 21, 2, 22, 7, 2, -38, -27, 19, -27, 26, + -27, -21, -21, 24, 17, 2, 17, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 21, 2, + 22, -4, -27, 26, 26, 17, -23, -26, 57, -27, + -31, -31, -31, -28, -24, 14, -24, -26, -24, -26, + -11, 92, 93, 94, 95, -27, -27, -27, -25, -31, + 24, 21, 2, 21, -31, } var yyDef = [...]int16{ @@ -641,37 +644,38 @@ var yyDef = [...]int16{ 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 0, 2, -2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, - 18, 19, 0, 108, 230, 231, 0, 241, 0, 85, + 18, 19, 0, 108, 233, 234, 0, 244, 0, 85, 86, -2, -2, -2, -2, -2, -2, -2, -2, -2, - -2, -2, -2, -2, -2, 224, 225, 0, 5, 100, - 0, 128, 131, 0, 136, 137, 141, 43, 43, 43, + -2, -2, -2, -2, -2, 227, 228, 0, 5, 100, + 0, 128, 131, 0, 0, 139, 245, 140, 144, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, - 43, 43, 43, 0, 0, 0, 0, 22, 23, 0, - 0, 0, 61, 0, 83, 84, 0, 89, 91, 0, - 95, 99, 242, 126, 0, 132, 0, 135, 140, 0, - 42, 47, 48, 44, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, - 0, 70, 71, 0, 73, 236, 237, 74, 75, 232, - 233, 0, 0, 0, 82, 20, 21, 24, 0, 54, - 25, 0, 63, 65, 67, 87, 0, 92, 0, 98, - 226, 227, 228, 229, 0, 127, 130, 133, 134, 139, - 142, 144, 147, 151, 152, 153, 0, 26, 0, 0, - -2, -2, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 69, 0, 234, - 235, 76, 0, 81, 0, 53, 56, 58, 59, 60, - 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, - 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, - 215, 216, 217, 218, 219, 220, 221, 222, 223, 62, - 66, 88, 90, 93, 97, 94, 96, 0, 0, 0, - 0, 0, 0, 0, 0, 157, 159, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 45, 46, - 49, 244, 50, 72, 0, 78, 80, 51, 0, 57, - 64, 143, 238, 145, 0, 148, 0, 0, 0, 155, - 160, 156, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 77, 79, 52, 55, 146, 0, 0, - 154, 158, 161, 0, 240, 162, 163, 164, 165, 166, - 0, 167, 168, 169, 170, 171, 177, 178, 179, 180, - 149, 150, 239, 0, 175, 0, 173, 176, 172, 174, + 43, 43, 43, 43, 43, 0, 0, 0, 0, 22, + 23, 0, 0, 0, 61, 0, 83, 84, 0, 89, + 91, 0, 95, 99, 126, 0, 132, 0, 137, 0, + 138, 143, 0, 42, 47, 48, 44, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 68, 0, 70, 71, 0, 73, 239, 240, + 74, 75, 235, 236, 0, 0, 0, 82, 20, 21, + 24, 0, 54, 25, 0, 63, 65, 67, 87, 0, + 92, 0, 98, 229, 230, 231, 232, 0, 127, 130, + 133, 135, 134, 136, 142, 145, 147, 150, 154, 155, + 156, 0, 26, 0, 0, -2, -2, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 69, 0, 237, 238, 76, 0, 81, 0, + 53, 56, 58, 59, 60, 198, 199, 200, 201, 202, + 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, + 223, 224, 225, 226, 62, 66, 88, 90, 93, 97, + 94, 96, 0, 0, 0, 0, 0, 0, 0, 0, + 160, 162, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 45, 46, 49, 247, 50, 72, 0, + 78, 80, 51, 0, 57, 64, 146, 241, 148, 0, + 151, 0, 0, 0, 158, 163, 159, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 77, 79, + 52, 55, 149, 0, 0, 157, 161, 164, 0, 243, + 165, 166, 167, 168, 169, 0, 170, 171, 172, 173, + 174, 180, 181, 182, 183, 152, 153, 242, 0, 178, + 0, 176, 179, 175, 177, } var yyTok1 = [...]int8{ @@ -1614,24 +1618,41 @@ yydefault: yyVAL.label = labels.Label{Name: yyDollar[1].item.Val, Value: yylex.(*parser).unquoteString(yyDollar[3].item.Val)} } case 134: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.label = labels.Label{Name: yyDollar[1].item.Val, Value: yylex.(*parser).unquoteString(yyDollar[3].item.Val)} + } + case 135: yyDollar = yyS[yypt-3 : yypt+1] { yylex.(*parser).unexpected("label set", "string") yyVAL.label = labels.Label{} } - case 135: + case 136: + yyDollar = yyS[yypt-3 : yypt+1] + { + yylex.(*parser).unexpected("label set", "string") + yyVAL.label = labels.Label{} + } + case 137: yyDollar = yyS[yypt-2 : yypt+1] { yylex.(*parser).unexpected("label set", "\"=\"") yyVAL.label = labels.Label{} } - case 136: + case 138: + yyDollar = yyS[yypt-2 : yypt+1] + { + yylex.(*parser).unexpected("label set", "\"=\"") + yyVAL.label = labels.Label{} + } + case 139: yyDollar = yyS[yypt-1 : yypt+1] { yylex.(*parser).unexpected("label set", "identifier or \"}\"") yyVAL.label = labels.Label{} } - case 137: + case 140: yyDollar = yyS[yypt-2 : yypt+1] { yylex.(*parser).generatedParserResult = &seriesDescription{ @@ -1639,33 +1660,33 @@ yydefault: values: yyDollar[2].series, } } - case 138: + case 141: yyDollar = yyS[yypt-0 : yypt+1] { yyVAL.series = []SequenceValue{} } - case 139: + case 142: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.series = append(yyDollar[1].series, yyDollar[3].series...) } - case 140: + case 143: yyDollar = yyS[yypt-2 : yypt+1] { yyVAL.series = yyDollar[1].series } - case 141: + case 144: yyDollar = yyS[yypt-1 : yypt+1] { yylex.(*parser).unexpected("series values", "") yyVAL.series = nil } - case 142: + case 145: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.series = []SequenceValue{{Omitted: true}} } - case 143: + case 146: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.series = []SequenceValue{} @@ -1673,12 +1694,12 @@ yydefault: yyVAL.series = append(yyVAL.series, SequenceValue{Omitted: true}) } } - case 144: + case 147: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.series = []SequenceValue{{Value: yyDollar[1].float}} } - case 145: + case 148: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.series = []SequenceValue{} @@ -1687,7 +1708,7 @@ yydefault: yyVAL.series = append(yyVAL.series, SequenceValue{Value: yyDollar[1].float}) } } - case 146: + case 149: yyDollar = yyS[yypt-4 : yypt+1] { yyVAL.series = []SequenceValue{} @@ -1697,12 +1718,12 @@ yydefault: yyDollar[1].float += yyDollar[2].float } } - case 147: + case 150: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.series = []SequenceValue{{Histogram: yyDollar[1].histogram}} } - case 148: + case 151: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.series = []SequenceValue{} @@ -1712,7 +1733,7 @@ yydefault: //$1 += $2 } } - case 149: + case 152: yyDollar = yyS[yypt-5 : yypt+1] { val, err := yylex.(*parser).histogramsIncreaseSeries(yyDollar[1].histogram, yyDollar[3].histogram, yyDollar[5].uint) @@ -1721,7 +1742,7 @@ yydefault: } yyVAL.series = val } - case 150: + case 153: yyDollar = yyS[yypt-5 : yypt+1] { val, err := yylex.(*parser).histogramsDecreaseSeries(yyDollar[1].histogram, yyDollar[3].histogram, yyDollar[5].uint) @@ -1730,7 +1751,7 @@ yydefault: } yyVAL.series = val } - case 151: + case 154: yyDollar = yyS[yypt-1 : yypt+1] { if yyDollar[1].item.Val != "stale" { @@ -1738,130 +1759,130 @@ yydefault: } yyVAL.float = math.Float64frombits(value.StaleNaN) } - case 154: + case 157: yyDollar = yyS[yypt-4 : yypt+1] { yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&yyDollar[2].descriptors) } - case 155: + case 158: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&yyDollar[2].descriptors) } - case 156: - yyDollar = yyS[yypt-3 : yypt+1] - { - m := yylex.(*parser).newMap() - yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&m) - } - case 157: - yyDollar = yyS[yypt-2 : yypt+1] - { - m := yylex.(*parser).newMap() - yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&m) - } - case 158: - yyDollar = yyS[yypt-3 : yypt+1] - { - yyVAL.descriptors = *(yylex.(*parser).mergeMaps(&yyDollar[1].descriptors, &yyDollar[3].descriptors)) - } case 159: - yyDollar = yyS[yypt-1 : yypt+1] + yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.descriptors = yyDollar[1].descriptors + m := yylex.(*parser).newMap() + yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&m) } case 160: yyDollar = yyS[yypt-2 : yypt+1] { - yylex.(*parser).unexpected("histogram description", "histogram description key, e.g. buckets:[5 10 7]") + m := yylex.(*parser).newMap() + yyVAL.histogram = yylex.(*parser).buildHistogramFromMap(&m) } case 161: yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["schema"] = yyDollar[3].int + yyVAL.descriptors = *(yylex.(*parser).mergeMaps(&yyDollar[1].descriptors, &yyDollar[3].descriptors)) } case 162: - yyDollar = yyS[yypt-3 : yypt+1] + yyDollar = yyS[yypt-1 : yypt+1] { - yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["sum"] = yyDollar[3].float + yyVAL.descriptors = yyDollar[1].descriptors } case 163: - yyDollar = yyS[yypt-3 : yypt+1] + yyDollar = yyS[yypt-2 : yypt+1] { - yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["count"] = yyDollar[3].float + yylex.(*parser).unexpected("histogram description", "histogram description key, e.g. buckets:[5 10 7]") } case 164: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["z_bucket"] = yyDollar[3].float + yyVAL.descriptors["schema"] = yyDollar[3].int } case 165: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["z_bucket_w"] = yyDollar[3].float + yyVAL.descriptors["sum"] = yyDollar[3].float } case 166: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["custom_values"] = yyDollar[3].bucket_set + yyVAL.descriptors["count"] = yyDollar[3].float } case 167: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["buckets"] = yyDollar[3].bucket_set + yyVAL.descriptors["z_bucket"] = yyDollar[3].float } case 168: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["offset"] = yyDollar[3].int + yyVAL.descriptors["z_bucket_w"] = yyDollar[3].float } case 169: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["n_buckets"] = yyDollar[3].bucket_set + yyVAL.descriptors["custom_values"] = yyDollar[3].bucket_set } case 170: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["n_offset"] = yyDollar[3].int + yyVAL.descriptors["buckets"] = yyDollar[3].bucket_set } case 171: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.descriptors = yylex.(*parser).newMap() - yyVAL.descriptors["counter_reset_hint"] = yyDollar[3].item + yyVAL.descriptors["offset"] = yyDollar[3].int } case 172: - yyDollar = yyS[yypt-4 : yypt+1] + yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.bucket_set = yyDollar[2].bucket_set + yyVAL.descriptors = yylex.(*parser).newMap() + yyVAL.descriptors["n_buckets"] = yyDollar[3].bucket_set } case 173: yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.bucket_set = yyDollar[2].bucket_set + yyVAL.descriptors = yylex.(*parser).newMap() + yyVAL.descriptors["n_offset"] = yyDollar[3].int } case 174: yyDollar = yyS[yypt-3 : yypt+1] { - yyVAL.bucket_set = append(yyDollar[1].bucket_set, yyDollar[3].float) + yyVAL.descriptors = yylex.(*parser).newMap() + yyVAL.descriptors["counter_reset_hint"] = yyDollar[3].item } case 175: + yyDollar = yyS[yypt-4 : yypt+1] + { + yyVAL.bucket_set = yyDollar[2].bucket_set + } + case 176: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.bucket_set = yyDollar[2].bucket_set + } + case 177: + yyDollar = yyS[yypt-3 : yypt+1] + { + yyVAL.bucket_set = append(yyDollar[1].bucket_set, yyDollar[3].float) + } + case 178: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.bucket_set = []float64{yyDollar[1].float} } - case 230: + case 233: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.node = &NumberLiteral{ @@ -1869,7 +1890,7 @@ yydefault: PosRange: yyDollar[1].item.PositionRange(), } } - case 231: + case 234: yyDollar = yyS[yypt-1 : yypt+1] { var err error @@ -1883,12 +1904,12 @@ yydefault: PosRange: yyDollar[1].item.PositionRange(), } } - case 232: + case 235: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.float = yylex.(*parser).number(yyDollar[1].item.Val) } - case 233: + case 236: yyDollar = yyS[yypt-1 : yypt+1] { var err error @@ -1899,17 +1920,17 @@ yydefault: } yyVAL.float = dur.Seconds() } - case 234: + case 237: yyDollar = yyS[yypt-2 : yypt+1] { yyVAL.float = yyDollar[2].float } - case 235: + case 238: yyDollar = yyS[yypt-2 : yypt+1] { yyVAL.float = -yyDollar[2].float } - case 238: + case 241: yyDollar = yyS[yypt-1 : yypt+1] { var err error @@ -1918,17 +1939,17 @@ yydefault: yylex.(*parser).addParseErrf(yyDollar[1].item.PositionRange(), "invalid repetition in series values: %s", err) } } - case 239: + case 242: yyDollar = yyS[yypt-2 : yypt+1] { yyVAL.int = -int64(yyDollar[2].uint) } - case 240: + case 243: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.int = int64(yyDollar[1].uint) } - case 241: + case 244: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.node = &StringLiteral{ @@ -1936,7 +1957,7 @@ yydefault: PosRange: yyDollar[1].item.PositionRange(), } } - case 242: + case 245: yyDollar = yyS[yypt-1 : yypt+1] { yyVAL.item = Item{ @@ -1945,7 +1966,7 @@ yydefault: Val: yylex.(*parser).unquoteString(yyDollar[1].item.Val), } } - case 243: + case 246: yyDollar = yyS[yypt-0 : yypt+1] { yyVAL.strings = nil diff --git a/promql/parser/lex.go b/promql/parser/lex.go index d031e83307..7210d51b7b 100644 --- a/promql/parser/lex.go +++ b/promql/parser/lex.go @@ -68,6 +68,12 @@ func (i ItemType) IsAggregatorWithParam() bool { return i == TOPK || i == BOTTOMK || i == COUNT_VALUES || i == QUANTILE || i == LIMITK || i == LIMIT_RATIO } +// IsExperimentalAggregator defines the experimental aggregation functions that are controlled +// with EnableExperimentalFunctions. +func (i ItemType) IsExperimentalAggregator() bool { + return i == LIMITK || i == LIMIT_RATIO +} + // IsKeyword returns true if the Item corresponds to a keyword. // Returns false otherwise. func (i ItemType) IsKeyword() bool { return i > keywordsStart && i < keywordsEnd } @@ -610,6 +616,9 @@ func lexBuckets(l *Lexer) stateFn { case isSpace(r): l.emit(SPACE) return lexSpace + case r == '-': + l.emit(SUB) + return lexNumber case isDigit(r): l.backup() return lexNumber diff --git a/promql/parser/parse.go b/promql/parser/parse.go index ae558dccc0..3113d18d57 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -447,8 +447,8 @@ func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateE desiredArgs := 1 if ret.Op.IsAggregatorWithParam() { - if !EnableExperimentalFunctions && (ret.Op == LIMITK || ret.Op == LIMIT_RATIO) { - p.addParseErrf(ret.PositionRange(), "limitk() and limit_ratio() are experimental and must be enabled with --enable-feature=promql-experimental-functions") + if !EnableExperimentalFunctions && ret.Op.IsExperimentalAggregator() { + p.addParseErrf(ret.PositionRange(), "%s() is experimental and must be enabled with --enable-feature=promql-experimental-functions", ret.Op) return } desiredArgs = 2 @@ -784,6 +784,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { } } + if n.Func.Name == "info" && len(n.Args) > 1 { + // Check the type is correct first + if n.Args[1].Type() != ValueTypeVector { + p.addParseErrf(node.PositionRange(), "expected type %s in %s, got %s", DocumentedType(ValueTypeVector), fmt.Sprintf("call to function %q", n.Func.Name), DocumentedType(n.Args[1].Type())) + } + // Check the vector selector in the input doesn't contain a metric name + if n.Args[1].(*VectorSelector).Name != "" { + p.addParseErrf(n.Args[1].PositionRange(), "expected label selectors only, got vector selector instead") + } + // Set Vector Selector flag to bypass empty matcher check + n.Args[1].(*VectorSelector).BypassEmptyMatcherCheck = true + } + for i, arg := range n.Args { if i >= len(n.Func.ArgTypes) { if n.Func.Variadic == 0 { @@ -830,17 +843,19 @@ func (p *parser) checkAST(node Node) (typ ValueType) { // metric name is a non-empty matcher. break } - // A Vector selector must contain at least one non-empty matcher to prevent - // implicit selection of all metrics (e.g. by a typo). - notEmpty := false - for _, lm := range n.LabelMatchers { - if lm != nil && !lm.Matches("") { - notEmpty = true - break + if !n.BypassEmptyMatcherCheck { + // A Vector selector must contain at least one non-empty matcher to prevent + // implicit selection of all metrics (e.g. by a typo). + notEmpty := false + for _, lm := range n.LabelMatchers { + if lm != nil && !lm.Matches("") { + notEmpty = true + break + } + } + if !notEmpty { + p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } - } - if !notEmpty { - p.addParseErrf(n.PositionRange(), "vector selector must contain at least one non-empty matcher") } case *NumberLiteral, *StringLiteral: diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index 37748323ce..0e5e2f638b 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -3872,6 +3872,81 @@ var testExpr = []struct { }, }, }, + { + input: `info(rate(http_request_counter_total{}[5m]))`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &Call{ + Func: MustGetFunction("rate"), + PosRange: posrange.PositionRange{ + Start: 5, + End: 43, + }, + Args: Expressions{ + &MatrixSelector{ + VectorSelector: &VectorSelector{ + Name: "http_request_counter_total", + OriginalOffset: 0, + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 10, + End: 38, + }, + }, + EndPos: 42, + Range: 5 * time.Minute, + }, + }, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 44, + }, + }, + }, + { + input: `info(rate(http_request_counter_total{}[5m]), target_info{foo="bar"})`, + fail: true, + errMsg: `1:46: parse error: expected label selectors only, got vector selector instead`, + }, + { + input: `info(http_request_counter_total{namespace="zzz"}, {foo="bar", bar="baz"})`, + expected: &Call{ + Func: MustGetFunction("info"), + Args: Expressions{ + &VectorSelector{ + Name: "http_request_counter_total", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "namespace", "zzz"), + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "http_request_counter_total"), + }, + PosRange: posrange.PositionRange{ + Start: 5, + End: 48, + }, + }, + &VectorSelector{ + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, "foo", "bar"), + MustLabelMatcher(labels.MatchEqual, "bar", "baz"), + }, + PosRange: posrange.PositionRange{ + Start: 50, + End: 72, + }, + BypassEmptyMatcherCheck: true, + }, + }, + PosRange: posrange.PositionRange{ + Start: 0, + End: 73, + }, + }, + }, } func makeInt64Pointer(val int64) *int64 { @@ -3889,6 +3964,12 @@ func readable(s string) string { } func TestParseExpressions(t *testing.T) { + // Enable experimental functions testing. + EnableExperimentalFunctions = true + t.Cleanup(func() { + EnableExperimentalFunctions = false + }) + model.NameValidationScheme = model.UTF8Validation for _, test := range testExpr { t.Run(readable(test.input), func(t *testing.T) { @@ -3925,8 +4006,7 @@ func TestParseExpressions(t *testing.T) { require.Equal(t, expected, expr, "error on input '%s'", test.input) } else { - require.Error(t, err) - require.Contains(t, err.Error(), test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error()) + require.ErrorContains(t, err, test.errMsg, "unexpected error on input '%s', expected '%s', got '%s'", test.input, test.errMsg, err.Error()) var errorList ParseErrors ok := errors.As(err, &errorList) @@ -4084,17 +4164,17 @@ func TestParseHistogramSeries(t *testing.T) { }, { name: "all properties used", - input: `{} {{schema:1 sum:-0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5 counter_reset_hint:gauge}}`, + input: `{} {{schema:1 sum:0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:3 n_buckets:[4.1 5] n_offset:5 counter_reset_hint:gauge}}`, expected: []histogram.FloatHistogram{{ Schema: 1, - Sum: -0.3, + Sum: 0.3, Count: 3.1, ZeroCount: 7.1, ZeroThreshold: 0.05, PositiveBuckets: []float64{5.1, 10, 7}, - PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + PositiveSpans: []histogram.Span{{Offset: 3, Length: 3}}, NegativeBuckets: []float64{4.1, 5}, - NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + NegativeSpans: []histogram.Span{{Offset: 5, Length: 2}}, CounterResetHint: histogram.GaugeType, }}, }, @@ -4114,6 +4194,22 @@ func TestParseHistogramSeries(t *testing.T) { CounterResetHint: histogram.GaugeType, }}, }, + { + name: "all properties used, with negative values where supported", + input: `{} {{schema:1 sum:-0.3 count:-3.1 z_bucket:-7.1 z_bucket_w:0.05 buckets:[-5.1 -10 -7] offset:-3 n_buckets:[-4.1 -5] n_offset:-5 counter_reset_hint:gauge}}`, + expected: []histogram.FloatHistogram{{ + Schema: 1, + Sum: -0.3, + Count: -3.1, + ZeroCount: -7.1, + ZeroThreshold: 0.05, + PositiveBuckets: []float64{-5.1, -10, -7}, + PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + NegativeBuckets: []float64{-4.1, -5}, + NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + CounterResetHint: histogram.GaugeType, + }}, + }, { name: "static series", input: `{} {{buckets:[5 10 7] schema:1}}x2`, @@ -4385,6 +4481,22 @@ func TestHistogramTestExpression(t *testing.T) { }, expected: `{{offset:-3 buckets:[5.1 0 0 0 0 10 7] n_offset:-1 n_buckets:[4.1 5 0 0 7 8 9]}}`, }, + { + name: "known counter reset hint", + input: histogram.FloatHistogram{ + Schema: 1, + Sum: -0.3, + Count: 3.1, + ZeroCount: 7.1, + ZeroThreshold: 0.05, + PositiveBuckets: []float64{5.1, 10, 7}, + PositiveSpans: []histogram.Span{{Offset: -3, Length: 3}}, + NegativeBuckets: []float64{4.1, 5}, + NegativeSpans: []histogram.Span{{Offset: -5, Length: 2}}, + CounterResetHint: histogram.CounterReset, + }, + expected: `{{schema:1 count:3.1 sum:-0.3 z_bucket:7.1 z_bucket_w:0.05 counter_reset_hint:reset offset:-3 buckets:[5.1 10 7] n_offset:-5 n_buckets:[4.1 5]}}`, + }, } { t.Run(test.name, func(t *testing.T) { expression := test.input.TestExpression() @@ -4436,7 +4548,7 @@ func TestRecoverParserError(t *testing.T) { e := errors.New("custom error") defer func() { - require.Equal(t, e.Error(), err.Error()) + require.EqualError(t, err, e.Error()) }() defer p.recover(&err) diff --git a/promql/parser/printer.go b/promql/parser/printer.go index 69f5f082d6..63b1950827 100644 --- a/promql/parser/printer.go +++ b/promql/parser/printer.go @@ -72,6 +72,11 @@ func (node *AggregateExpr) String() string { return aggrString } +func (node *AggregateExpr) ShortString() string { + aggrString := node.getAggOpStr() + return aggrString +} + func (node *AggregateExpr) getAggOpStr() string { aggrString := node.Op.String() @@ -95,14 +100,20 @@ func joinLabels(ss []string) string { return strings.Join(ss, ", ") } -func (node *BinaryExpr) String() string { - returnBool := "" +func (node *BinaryExpr) returnBool() string { if node.ReturnBool { - returnBool = " bool" + return " bool" } + return "" +} +func (node *BinaryExpr) String() string { matching := node.getMatchingStr() - return fmt.Sprintf("%s %s%s%s %s", node.LHS, node.Op, returnBool, matching, node.RHS) + return fmt.Sprintf("%s %s%s%s %s", node.LHS, node.Op, node.returnBool(), matching, node.RHS) +} + +func (node *BinaryExpr) ShortString() string { + return fmt.Sprintf("%s%s%s", node.Op, node.returnBool(), node.getMatchingStr()) } func (node *BinaryExpr) getMatchingStr() string { @@ -130,9 +141,13 @@ func (node *Call) String() string { return fmt.Sprintf("%s(%s)", node.Func.Name, node.Args) } -func (node *MatrixSelector) String() string { +func (node *Call) ShortString() string { + return node.Func.Name +} + +func (node *MatrixSelector) atOffset() (string, string) { // Copy the Vector selector before changing the offset - vecSelector := *node.VectorSelector.(*VectorSelector) + vecSelector := node.VectorSelector.(*VectorSelector) offset := "" switch { case vecSelector.OriginalOffset > time.Duration(0): @@ -149,7 +164,13 @@ func (node *MatrixSelector) String() string { case vecSelector.StartOrEnd == END: at = " @ end()" } + return at, offset +} +func (node *MatrixSelector) String() string { + at, offset := node.atOffset() + // Copy the Vector selector before changing the offset + vecSelector := *node.VectorSelector.(*VectorSelector) // Do not print the @ and offset twice. offsetVal, atVal, preproc := vecSelector.OriginalOffset, vecSelector.Timestamp, vecSelector.StartOrEnd vecSelector.OriginalOffset = 0 @@ -163,10 +184,19 @@ func (node *MatrixSelector) String() string { return str } +func (node *MatrixSelector) ShortString() string { + at, offset := node.atOffset() + return fmt.Sprintf("[%s]%s%s", model.Duration(node.Range), at, offset) +} + func (node *SubqueryExpr) String() string { return fmt.Sprintf("%s%s", node.Expr.String(), node.getSubqueryTimeSuffix()) } +func (node *SubqueryExpr) ShortString() string { + return node.getSubqueryTimeSuffix() +} + // getSubqueryTimeSuffix returns the '[:] @ offset ' suffix of the subquery. func (node *SubqueryExpr) getSubqueryTimeSuffix() string { step := "" @@ -208,6 +238,10 @@ func (node *UnaryExpr) String() string { return fmt.Sprintf("%s%s", node.Op, node.Expr) } +func (node *UnaryExpr) ShortString() string { + return node.Op.String() +} + func (node *VectorSelector) String() string { var labelStrings []string if len(node.LabelMatchers) > 1 { diff --git a/promql/promqltest/test.go b/promql/promqltest/test.go index ff709e4426..f208b4f313 100644 --- a/promql/promqltest/test.go +++ b/promql/promqltest/test.go @@ -39,6 +39,7 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/util/almost" + "github.com/prometheus/prometheus/util/convertnhcb" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" ) @@ -46,8 +47,8 @@ import ( var ( patSpace = regexp.MustCompile("[\t ]+") patLoad = regexp.MustCompile(`^load(?:_(with_nhcb))?\s+(.+?)$`) - patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) - patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`) + patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|warn|ordered|info))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`) + patEvalRange = regexp.MustCompile(`^eval(?:_(fail|warn|info))?\s+range\s+from\s+(.+)\s+to\s+(.+)\s+step\s+(.+?)\s+(.+)$`) ) const ( @@ -65,7 +66,7 @@ var testStartTime = time.Unix(0, 0).UTC() // LoadedStorage returns storage with generated data using the provided load statements. // Non-load statements will cause test errors. func LoadedStorage(t testutil.T, input string) *teststorage.TestStorage { - test, err := newTest(t, input) + test, err := newTest(t, input, false) require.NoError(t, err) for _, cmd := range test.cmds { @@ -124,11 +125,17 @@ func RunBuiltinTests(t TBRun, engine promql.QueryEngine) { // RunTest parses and runs the test against the provided engine. func RunTest(t testutil.T, input string, engine promql.QueryEngine) { - require.NoError(t, runTest(t, input, engine)) + require.NoError(t, runTest(t, input, engine, false)) } -func runTest(t testutil.T, input string, engine promql.QueryEngine) error { - test, err := newTest(t, input) +// testTest allows tests to be run in "test-the-test" mode (true for +// testingMode). This is a special mode for testing test code execution itself. +func testTest(t testutil.T, input string, engine promql.QueryEngine) error { + return runTest(t, input, engine, true) +} + +func runTest(t testutil.T, input string, engine promql.QueryEngine, testingMode bool) error { + test, err := newTest(t, input, testingMode) // Why do this before checking err? newTest() can create the test storage and then return an error, // and we want to make sure to clean that up to avoid leaking goroutines. @@ -163,6 +170,8 @@ func runTest(t testutil.T, input string, engine promql.QueryEngine) error { // against a test storage. type test struct { testutil.T + // testingMode distinguishes between normal execution and test-execution mode. + testingMode bool cmds []testCommand @@ -173,10 +182,11 @@ type test struct { } // newTest returns an initialized empty Test. -func newTest(t testutil.T, input string) (*test, error) { +func newTest(t testutil.T, input string, testingMode bool) (*test, error) { test := &test{ - T: t, - cmds: []testCommand{}, + T: t, + cmds: []testCommand{}, + testingMode: testingMode, } err := test.parse(input) test.clear() @@ -321,6 +331,8 @@ func (t *test) parseEval(lines []string, i int) (int, *evalCmd, error) { cmd.fail = true case "warn": cmd.warn = true + case "info": + cmd.info = true } for j := 1; i+1 < len(lines); j++ { @@ -477,58 +489,32 @@ func (cmd *loadCmd) append(a storage.Appender) error { return nil } -func getHistogramMetricBase(m labels.Labels, suffix string) (labels.Labels, uint64) { - mName := m.Get(labels.MetricName) - baseM := labels.NewBuilder(m). - Set(labels.MetricName, strings.TrimSuffix(mName, suffix)). - Del(labels.BucketLabel). - Labels() - hash := baseM.Hash() - return baseM, hash -} - type tempHistogramWrapper struct { metric labels.Labels - upperBounds []float64 - histogramByTs map[int64]tempHistogram + histogramByTs map[int64]convertnhcb.TempHistogram } func newTempHistogramWrapper() tempHistogramWrapper { return tempHistogramWrapper{ - upperBounds: []float64{}, - histogramByTs: map[int64]tempHistogram{}, + histogramByTs: map[int64]convertnhcb.TempHistogram{}, } } -type tempHistogram struct { - bucketCounts map[float64]float64 - count float64 - sum float64 -} - -func newTempHistogram() tempHistogram { - return tempHistogram{ - bucketCounts: map[float64]float64{}, - } -} - -func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogramWrapper func(*tempHistogramWrapper), updateHistogram func(*tempHistogram, float64)) { - m2, m2hash := getHistogramMetricBase(m, suffix) +func processClassicHistogramSeries(m labels.Labels, name string, histogramMap map[uint64]tempHistogramWrapper, smpls []promql.Sample, updateHistogram func(*convertnhcb.TempHistogram, float64)) { + m2 := convertnhcb.GetHistogramMetricBase(m, name) + m2hash := m2.Hash() histogramWrapper, exists := histogramMap[m2hash] if !exists { histogramWrapper = newTempHistogramWrapper() } histogramWrapper.metric = m2 - if updateHistogramWrapper != nil { - updateHistogramWrapper(&histogramWrapper) - } for _, s := range smpls { if s.H != nil { continue } histogram, exists := histogramWrapper.histogramByTs[s.T] if !exists { - histogram = newTempHistogram() + histogram = convertnhcb.NewTempHistogram() } updateHistogram(&histogram, s.F) histogramWrapper.histogramByTs[s.T] = histogram @@ -536,34 +522,6 @@ func processClassicHistogramSeries(m labels.Labels, suffix string, histogramMap histogramMap[m2hash] = histogramWrapper } -func processUpperBoundsAndCreateBaseHistogram(upperBounds0 []float64) ([]float64, *histogram.FloatHistogram) { - sort.Float64s(upperBounds0) - upperBounds := make([]float64, 0, len(upperBounds0)) - prevLE := math.Inf(-1) - for _, le := range upperBounds0 { - if le != prevLE { // deduplicate - upperBounds = append(upperBounds, le) - prevLE = le - } - } - var customBounds []float64 - if upperBounds[len(upperBounds)-1] == math.Inf(1) { - customBounds = upperBounds[:len(upperBounds)-1] - } else { - customBounds = upperBounds - } - return upperBounds, &histogram.FloatHistogram{ - Count: 0, - Sum: 0, - Schema: histogram.CustomBucketsSchema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: uint32(len(upperBounds))}, - }, - PositiveBuckets: make([]float64, len(upperBounds)), - CustomValues: customBounds, - } -} - // If classic histograms are defined, convert them into native histograms with custom // bounds and append the defined time series to the storage. func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { @@ -574,24 +532,26 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { for hash, smpls := range cmd.defs { m := cmd.metrics[hash] mName := m.Get(labels.MetricName) - switch { - case strings.HasSuffix(mName, "_bucket") && m.Has(labels.BucketLabel): + suffixType, name := convertnhcb.GetHistogramMetricBaseName(mName) + switch suffixType { + case convertnhcb.SuffixBucket: + if !m.Has(labels.BucketLabel) { + panic(fmt.Sprintf("expected bucket label in metric %s", m)) + } le, err := strconv.ParseFloat(m.Get(labels.BucketLabel), 64) if err != nil || math.IsNaN(le) { continue } - processClassicHistogramSeries(m, "_bucket", histogramMap, smpls, func(histogramWrapper *tempHistogramWrapper) { - histogramWrapper.upperBounds = append(histogramWrapper.upperBounds, le) - }, func(histogram *tempHistogram, f float64) { - histogram.bucketCounts[le] = f + processClassicHistogramSeries(m, name, histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + _ = histogram.SetBucketCount(le, f) }) - case strings.HasSuffix(mName, "_count"): - processClassicHistogramSeries(m, "_count", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.count = f + case convertnhcb.SuffixCount: + processClassicHistogramSeries(m, name, histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + _ = histogram.SetCount(f) }) - case strings.HasSuffix(mName, "_sum"): - processClassicHistogramSeries(m, "_sum", histogramMap, smpls, nil, func(histogram *tempHistogram, f float64) { - histogram.sum = f + case convertnhcb.SuffixSum: + processClassicHistogramSeries(m, name, histogramMap, smpls, func(histogram *convertnhcb.TempHistogram, f float64) { + _ = histogram.SetSum(f) }) } } @@ -599,30 +559,22 @@ func (cmd *loadCmd) appendCustomHistogram(a storage.Appender) error { // Convert the collated classic histogram data into native histograms // with custom bounds and append them to the storage. for _, histogramWrapper := range histogramMap { - upperBounds, fhBase := processUpperBoundsAndCreateBaseHistogram(histogramWrapper.upperBounds) samples := make([]promql.Sample, 0, len(histogramWrapper.histogramByTs)) for t, histogram := range histogramWrapper.histogramByTs { - fh := fhBase.Copy() - var prevCount, total float64 - for i, le := range upperBounds { - currCount, exists := histogram.bucketCounts[le] - if !exists { - currCount = 0 - } - count := currCount - prevCount - fh.PositiveBuckets[i] = count - total += count - prevCount = currCount - } - fh.Sum = histogram.sum - if histogram.count != 0 { - total = histogram.count - } - fh.Count = total - s := promql.Sample{T: t, H: fh.Compact(0)} - if err := s.H.Validate(); err != nil { + h, fh, err := histogram.Convert() + if err != nil { return err } + if fh == nil { + if err := h.Validate(); err != nil { + return err + } + fh = h.ToFloat(nil) + } + if err := fh.Validate(); err != nil { + return err + } + s := promql.Sample{T: t, H: fh} samples = append(samples, s) } sort.Slice(samples, func(i, j int) bool { return samples[i].T < samples[j].T }) @@ -657,10 +609,10 @@ type evalCmd struct { step time.Duration line int - isRange bool // if false, instant query - fail, warn, ordered bool - expectedFailMessage string - expectedFailRegexp *regexp.Regexp + isRange bool // if false, instant query + fail, warn, ordered, info bool + expectedFailMessage string + expectedFailRegexp *regexp.Regexp metrics map[uint64]labels.Labels expectScalar bool @@ -727,7 +679,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { switch val := result.(type) { case promql.Matrix: if ev.ordered { - return fmt.Errorf("expected ordered result, but query returned a matrix") + return errors.New("expected ordered result, but query returned a matrix") } if ev.expectScalar { @@ -790,7 +742,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error { } if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0)) { - return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H, actual.H, formatSeriesResult(s)) + return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H.TestExpression(), actual.H.TestExpression(), formatSeriesResult(s)) } } } @@ -1006,7 +958,13 @@ func formatSeriesResult(s promql.Series) string { histogramPlural = "" } - return fmt.Sprintf("%v float point%s %v and %v histogram point%s %v", len(s.Floats), floatPlural, s.Floats, len(s.Histograms), histogramPlural, s.Histograms) + histograms := make([]string, 0, len(s.Histograms)) + + for _, p := range s.Histograms { + histograms = append(histograms, fmt.Sprintf("%v @[%v]", p.H.TestExpression(), p.T)) + } + + return fmt.Sprintf("%v float point%s %v and %v histogram point%s %v", len(s.Floats), floatPlural, s.Floats, len(s.Histograms), histogramPlural, histograms) } // HistogramTestExpression returns TestExpression() for the given histogram or "" if the histogram is nil. @@ -1129,11 +1087,25 @@ func (t *test) exec(tc testCommand, engine promql.QueryEngine) error { } func (t *test) execEval(cmd *evalCmd, engine promql.QueryEngine) error { - if cmd.isRange { - return t.execRangeEval(cmd, engine) + do := func() error { + if cmd.isRange { + return t.execRangeEval(cmd, engine) + } + + return t.execInstantEval(cmd, engine) } - return t.execInstantEval(cmd, engine) + if t.testingMode { + return do() + } + + if tt, ok := t.T.(*testing.T); ok { + tt.Run(fmt.Sprintf("line %d/%s", cmd.line, cmd.expr), func(t *testing.T) { + require.NoError(t, do()) + }) + return nil + } + return errors.New("t.T is not testing.T") } func (t *test) execRangeEval(cmd *evalCmd, engine promql.QueryEngine) error { @@ -1152,12 +1124,16 @@ func (t *test) execRangeEval(cmd *evalCmd, engine promql.QueryEngine) error { if res.Err == nil && cmd.fail { return fmt.Errorf("expected error evaluating query %q (line %d) but got none", cmd.expr, cmd.line) } - countWarnings, _ := res.Warnings.CountWarningsAndInfo() - if !cmd.warn && countWarnings > 0 { + countWarnings, countInfo := res.Warnings.CountWarningsAndInfo() + switch { + case !cmd.warn && countWarnings > 0: return fmt.Errorf("unexpected warnings evaluating query %q (line %d): %v", cmd.expr, cmd.line, res.Warnings) - } - if cmd.warn && countWarnings == 0 { + case cmd.warn && countWarnings == 0: return fmt.Errorf("expected warnings evaluating query %q (line %d) but got none", cmd.expr, cmd.line) + case !cmd.info && countInfo > 0: + return fmt.Errorf("unexpected info annotations evaluating query %q (line %d): %v", cmd.expr, cmd.line, res.Warnings) + case cmd.info && countInfo == 0: + return fmt.Errorf("expected info annotations evaluating query %q (line %d) but got none", cmd.expr, cmd.line) } defer q.Close() @@ -1202,12 +1178,16 @@ func (t *test) runInstantQuery(iq atModifierTestCase, cmd *evalCmd, engine promq if res.Err == nil && cmd.fail { return fmt.Errorf("expected error evaluating query %q (line %d) but got none", iq.expr, cmd.line) } - countWarnings, _ := res.Warnings.CountWarningsAndInfo() - if !cmd.warn && countWarnings > 0 { + countWarnings, countInfo := res.Warnings.CountWarningsAndInfo() + switch { + case !cmd.warn && countWarnings > 0: return fmt.Errorf("unexpected warnings evaluating query %q (line %d): %v", iq.expr, cmd.line, res.Warnings) - } - if cmd.warn && countWarnings == 0 { + case cmd.warn && countWarnings == 0: return fmt.Errorf("expected warnings evaluating query %q (line %d) but got none", iq.expr, cmd.line) + case !cmd.info && countInfo > 0: + return fmt.Errorf("unexpected info annotations evaluating query %q (line %d): %v", iq.expr, cmd.line, res.Warnings) + case cmd.info && countInfo == 0: + return fmt.Errorf("expected info annotations evaluating query %q (line %d) but got none", iq.expr, cmd.line) } err = cmd.compareResult(res.Value) if err != nil { diff --git a/promql/promqltest/test_test.go b/promql/promqltest/test_test.go index 5aff71fb14..d0d09271e9 100644 --- a/promql/promqltest/test_test.go +++ b/promql/promqltest/test_test.go @@ -237,7 +237,7 @@ eval instant at 5m sum by (group) (http_requests) load 5m testmetric {{}} -eval instant at 5m testmetric +eval instant at 0m testmetric `, expectedError: `error in eval testmetric (line 5): unexpected metric {__name__="testmetric"} in result, has value {count:0, sum:0}`, }, @@ -381,7 +381,7 @@ load 5m eval range from 0 to 10m step 5m testmetric testmetric {{schema:-1 sum:4 count:1 buckets:[1] offset:1}} {{schema:-1 sum:7 count:1 buckets:[1] offset:1}} {{schema:-1 sum:8 count:1 buckets:[1] offset:1}} `, - expectedError: `error in eval testmetric (line 5): expected histogram value at index 1 (t=300000) for {__name__="testmetric"} to be {count:1, sum:7, (1,4]:1}, but got {count:1, sum:5, (1,4]:1} (result has 0 float points [] and 3 histogram points [{count:1, sum:4, (1,4]:1} @[0] {count:1, sum:5, (1,4]:1} @[300000] {count:1, sum:6, (1,4]:1} @[600000]])`, + expectedError: `error in eval testmetric (line 5): expected histogram value at index 1 (t=300000) for {__name__="testmetric"} to be {{schema:-1 count:1 sum:7 offset:1 buckets:[1]}}, but got {{schema:-1 count:1 sum:5 counter_reset_hint:not_reset offset:1 buckets:[1]}} (result has 0 float points [] and 3 histogram points [{{schema:-1 count:1 sum:4 offset:1 buckets:[1]}} @[0] {{schema:-1 count:1 sum:5 counter_reset_hint:not_reset offset:1 buckets:[1]}} @[300000] {{schema:-1 count:1 sum:6 counter_reset_hint:not_reset offset:1 buckets:[1]}} @[600000]])`, }, "range query with too many points for query time range": { input: testData + ` @@ -532,7 +532,7 @@ load 5m eval range from 0 to 5m step 5m testmetric testmetric 2 3 `, - expectedError: `error in eval testmetric (line 5): expected 2 float points and 0 histogram points for {__name__="testmetric"}, but got 0 float points [] and 2 histogram points [{count:0, sum:0} @[0] {count:0, sum:0} @[300000]]`, + expectedError: `error in eval testmetric (line 5): expected 2 float points and 0 histogram points for {__name__="testmetric"}, but got 0 float points [] and 2 histogram points [{{}} @[0] {{counter_reset_hint:not_reset}} @[300000]]`, }, "range query with expected mixed results": { input: ` @@ -552,7 +552,7 @@ load 5m eval range from 0 to 5m step 5m testmetric testmetric {{}} 3 `, - expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{count:0, sum:0} @[300000]])`, + expectedError: `error in eval testmetric (line 5): expected float value at index 0 for {__name__="testmetric"} to have timestamp 300000, but it had timestamp 0 (result has 1 float point [3 @[0]] and 1 histogram point [{{}} @[300000]])`, }, "instant query with expected scalar result": { input: ` @@ -595,7 +595,7 @@ eval range from 0 to 5m step 5m testmetric for name, testCase := range testCases { t.Run(name, func(t *testing.T) { - err := runTest(t, testCase.input, NewTestEngine(t, false, 0, DefaultMaxSamplesPerQuery)) + err := testTest(t, testCase.input, NewTestEngine(t, false, 0, DefaultMaxSamplesPerQuery)) if testCase.expectedError == "" { require.NoError(t, err) diff --git a/promql/promqltest/testdata/aggregators.test b/promql/promqltest/testdata/aggregators.test index 68d2e735b3..7479d2c3d3 100644 --- a/promql/promqltest/testdata/aggregators.test +++ b/promql/promqltest/testdata/aggregators.test @@ -229,13 +229,28 @@ load 5m http_requests{job="api-server", instance="0", group="canary"} NaN http_requests{job="api-server", instance="1", group="canary"} 3 http_requests{job="api-server", instance="2", group="canary"} 4 + http_requests_histogram{job="api-server", instance="3", group="canary"} {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}} eval instant at 0m max(http_requests) {} 4 +# The histogram is ignored here so the result doesn't change but it has an info annotation now. +eval_info instant at 0m max({job="api-server"}) + {} 4 + +# The histogram is ignored here so there is no result but it has an info annotation now. +eval_info instant at 0m max(http_requests_histogram) + eval instant at 0m min(http_requests) {} 1 +# The histogram is ignored here so the result doesn't change but it has an info annotation now. +eval_info instant at 0m min({job="api-server"}) + {} 1 + +# The histogram is ignored here so there is no result but it has an info annotation now. +eval_info instant at 0m min(http_requests_histogram) + eval instant at 0m max by (group) (http_requests) {group="production"} 2 {group="canary"} 4 @@ -250,13 +265,15 @@ clear load 5m http_requests{job="api-server", instance="0", group="production"} 0+10x10 http_requests{job="api-server", instance="1", group="production"} 0+20x10 - http_requests{job="api-server", instance="2", group="production"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="api-server", instance="2", group="production"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN http_requests{job="api-server", instance="0", group="canary"} 0+30x10 http_requests{job="api-server", instance="1", group="canary"} 0+40x10 http_requests{job="app-server", instance="0", group="production"} 0+50x10 http_requests{job="app-server", instance="1", group="production"} 0+60x10 http_requests{job="app-server", instance="0", group="canary"} 0+70x10 http_requests{job="app-server", instance="1", group="canary"} 0+80x10 + http_requests_histogram{job="app-server", instance="2", group="canary"} {{schema:0 sum:10 count:10}}x11 + http_requests_histogram{job="api-server", instance="3", group="production"} {{schema:0 sum:20 count:20}}x11 foo 3+0x10 eval_ordered instant at 50m topk(3, http_requests) @@ -323,6 +340,47 @@ eval_ordered instant at 50m topk(scalar(foo), http_requests) http_requests{group="canary", instance="0", job="app-server"} 700 http_requests{group="production", instance="1", job="app-server"} 600 +# Tests for histogram: should ignore histograms. +eval_info instant at 50m topk(100, http_requests_histogram) + #empty + +eval_info range from 0 to 50m step 5m topk(100, http_requests_histogram) + #empty + +eval_info instant at 50m topk(1, {__name__=~"http_requests(_histogram)?"}) + {__name__="http_requests", group="canary", instance="1", job="app-server"} 800 + +eval_info instant at 50m count(topk(1000, {__name__=~"http_requests(_histogram)?"})) + {} 9 + +eval_info range from 0 to 50m step 5m count(topk(1000, {__name__=~"http_requests(_histogram)?"})) + {} 9x10 + +eval_info instant at 50m topk by (instance) (1, {__name__=~"http_requests(_histogram)?"}) + {__name__="http_requests", group="canary", instance="0", job="app-server"} 700 + {__name__="http_requests", group="canary", instance="1", job="app-server"} 800 + {__name__="http_requests", group="production", instance="2", job="api-server"} NaN + +eval_info instant at 50m bottomk(100, http_requests_histogram) + #empty + +eval_info range from 0 to 50m step 5m bottomk(100, http_requests_histogram) + #empty + +eval_info instant at 50m bottomk(1, {__name__=~"http_requests(_histogram)?"}) + {__name__="http_requests", group="production", instance="0", job="api-server"} 100 + +eval_info instant at 50m count(bottomk(1000, {__name__=~"http_requests(_histogram)?"})) + {} 9 + +eval_info range from 0 to 50m step 5m count(bottomk(1000, {__name__=~"http_requests(_histogram)?"})) + {} 9x10 + +eval_info instant at 50m bottomk by (instance) (1, {__name__=~"http_requests(_histogram)?"}) + {__name__="http_requests", group="production", instance="0", job="api-server"} 100 + {__name__="http_requests", group="production", instance="1", job="api-server"} 200 + {__name__="http_requests", group="production", instance="2", job="api-server"} NaN + clear # Tests for count_values. @@ -336,37 +394,41 @@ load 5m version{job="app-server", instance="1", group="production"} 6 version{job="app-server", instance="0", group="canary"} 7 version{job="app-server", instance="1", group="canary"} 7 + version{job="app-server", instance="2", group="canary"} {{schema:0 sum:10 count:20 z_bucket_w:0.001 z_bucket:2 buckets:[1 2] n_buckets:[1 2]}} + version{job="app-server", instance="3", group="canary"} {{schema:0 sum:10 count:20 z_bucket_w:0.001 z_bucket:2 buckets:[1 2] n_buckets:[1 2]}} -eval instant at 5m count_values("version", version) +eval instant at 1m count_values("version", version) {version="6"} 5 {version="7"} 2 {version="8"} 2 + {version="{count:20, sum:10, [-2,-1):2, [-1,-0.5):1, [-0.001,0.001]:2, (0.5,1]:1, (1,2]:2}"} 2 +eval instant at 1m count_values(((("version"))), version) + {version="6"} 5 + {version="7"} 2 + {version="8"} 2 + {version="{count:20, sum:10, [-2,-1):2, [-1,-0.5):1, [-0.001,0.001]:2, (0.5,1]:1, (1,2]:2}"} 2 -eval instant at 5m count_values(((("version"))), version) - {version="6"} 5 - {version="7"} 2 - {version="8"} 2 - - -eval instant at 5m count_values without (instance)("version", version) +eval instant at 1m count_values without (instance)("version", version) {job="api-server", group="production", version="6"} 3 {job="api-server", group="canary", version="8"} 2 {job="app-server", group="production", version="6"} 2 {job="app-server", group="canary", version="7"} 2 + {job="app-server", group="canary", version="{count:20, sum:10, [-2,-1):2, [-1,-0.5):1, [-0.001,0.001]:2, (0.5,1]:1, (1,2]:2}"} 2 # Overwrite label with output. Don't do this. -eval instant at 5m count_values without (instance)("job", version) +eval instant at 1m count_values without (instance)("job", version) {job="6", group="production"} 5 {job="8", group="canary"} 2 {job="7", group="canary"} 2 + {job="{count:20, sum:10, [-2,-1):2, [-1,-0.5):1, [-0.001,0.001]:2, (0.5,1]:1, (1,2]:2}", group="canary"} 2 # Overwrite label with output. Don't do this. -eval instant at 5m count_values by (job, group)("job", version) +eval instant at 1m count_values by (job, group)("job", version) {job="6", group="production"} 5 {job="8", group="canary"} 2 {job="7", group="canary"} 2 - + {job="{count:20, sum:10, [-2,-1):2, [-1,-0.5):1, [-0.001,0.001]:2, (0.5,1]:1, (1,2]:2}", group="canary"} 2 # Tests for quantile. clear @@ -380,6 +442,7 @@ load 10s data{test="uneven samples",point="a"} 0 data{test="uneven samples",point="b"} 1 data{test="uneven samples",point="c"} 4 + data_histogram{test="histogram sample", point="c"} {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}} foo .8 eval instant at 1m quantile without(point)(0.8, data) @@ -387,6 +450,15 @@ eval instant at 1m quantile without(point)(0.8, data) {test="three samples"} 1.6 {test="uneven samples"} 2.8 +# The histogram is ignored here so the result doesn't change but it has an info annotation now. +eval_info instant at 1m quantile without(point)(0.8, {__name__=~"data(_histogram)?"}) + {test="two samples"} 0.8 + {test="three samples"} 1.6 + {test="uneven samples"} 2.8 + +# The histogram is ignored here so there is no result but it has an info annotation now. +eval_info instant at 1m quantile(0.8, data_histogram) + # Bug #5276. eval instant at 1m quantile without(point)(scalar(foo), data) {test="two samples"} 0.8 @@ -416,12 +488,14 @@ load 10s data{test="uneven samples",point="a"} 0 data{test="uneven samples",point="b"} 1 data{test="uneven samples",point="c"} 4 + data{test="histogram sample",point="c"} {{schema:0 sum:0 count:0}} foo .8 eval instant at 1m group without(point)(data) {test="two samples"} 1 {test="three samples"} 1 {test="uneven samples"} 1 + {test="histogram sample"} 1 eval instant at 1m group(foo) {} 1 @@ -469,6 +543,7 @@ load 10s data{test="bigzero",point="b"} -9.988465674311579e+307 data{test="bigzero",point="c"} 9.988465674311579e+307 data{test="bigzero",point="d"} 9.988465674311579e+307 + data{test="value is nan"} NaN eval instant at 1m avg(data{test="ten"}) {} 10 @@ -503,6 +578,10 @@ eval instant at 1m avg(data{test="-big"}) eval instant at 1m avg(data{test="bigzero"}) {} 0 +# If NaN is in the mix, the result is NaN. +eval instant at 1m avg(data) + {} NaN + # Test summing and averaging extreme values. clear @@ -572,3 +651,166 @@ clear # #eval instant at 1m count(topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without()) == topk(1,max(up) without())) # {} 1 + +clear + +# Test stddev produces consistent results regardless the order the data is loaded in. +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +# The histogram is ignored here so the result doesn't change but it has an info annotation now. +eval_info instant at 0m stddev(series) + {} 0.5 + +eval_info instant at 0m stdvar(series) + {} 0.25 + +# The histogram is ignored here so there is no result but it has an info annotation now. +eval_info instant at 0m stddev({label="c"}) + +eval_info instant at 0m stdvar({label="c"}) + +eval_info instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + +eval_info instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + +clear + +load 5m + series{label="a"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + series{label="b"} 1 + series{label="c"} 2 + +eval_info instant at 0m stddev(series) + {} 0.5 + +eval_info instant at 0m stdvar(series) + {} 0.25 + +eval_info instant at 0m stddev by (label) (series) + {label="b"} 0 + {label="c"} 0 + +eval_info instant at 0m stdvar by (label) (series) + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} NaN + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series NaN + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +clear + +load 5m + series{label="a"} 1 + series{label="b"} 2 + series{label="c"} inf + +eval instant at 0m stddev (series) + {} NaN + +eval instant at 0m stdvar (series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +eval instant at 0m stdvar by (label) (series) + {label="a"} 0 + {label="b"} 0 + {label="c"} NaN + +clear + +load 5m + series{label="a"} inf + series{label="b"} 1 + series{label="c"} 2 + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN + +eval instant at 0m stddev by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +eval instant at 0m stdvar by (label) (series) + {label="a"} NaN + {label="b"} 0 + {label="c"} 0 + +clear + +load 5m + series inf + +eval instant at 0m stddev(series) + {} NaN + +eval instant at 0m stdvar(series) + {} NaN diff --git a/promql/promqltest/testdata/at_modifier.test b/promql/promqltest/testdata/at_modifier.test index 35f90ee671..1ad301bdb7 100644 --- a/promql/promqltest/testdata/at_modifier.test +++ b/promql/promqltest/testdata/at_modifier.test @@ -90,7 +90,8 @@ eval instant at 25s sum_over_time(metric{job="1"}[100] offset 50s @ 100) eval instant at 25s metric{job="1"} @ 50 + metric{job="1"} @ 100 {job="1"} 15 -eval instant at 25s rate(metric{job="1"}[100s] @ 100) + label_replace(rate(metric{job="2"}[123s] @ 200), "job", "1", "", "") +# Note that this triggers an info annotation because we are rate'ing a metric that does not end in `_total`. +eval_info instant at 25s rate(metric{job="1"}[100s] @ 100) + label_replace(rate(metric{job="2"}[123s] @ 200), "job", "1", "", "") {job="1"} 0.3 eval instant at 25s sum_over_time(metric{job="1"}[100s] @ 100) + label_replace(sum_over_time(metric{job="2"}[100s] @ 100), "job", "1", "", "") @@ -121,45 +122,43 @@ eval instant at 25s sum_over_time(metric{job="1"}[100:1] offset 20 @ 100) # Since vector selector has timestamp, the result value does not depend on the timestamp of subqueries. # Inner most sum=1+2+...+10=55. -# With [100s:25s] subquery, it's 55*5. +# With [100s:25s] subquery, it's 55*4. eval instant at 100s sum_over_time(sum_over_time(metric{job="1"}[100s] @ 100)[100s:25s] @ 50) - {job="1"} 275 + {job="1"} 220 # Nested subqueries with different timestamps on both. # Since vector selector has timestamp, the result value does not depend on the timestamp of subqueries. -# Sum of innermost subquery is 275 as above. The outer subquery repeats it 4 times. +# Sum of innermost subquery is 220 as above. The outer subquery repeats it 3 times. eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[100s] @ 100)[100s:25s] @ 50)[3s:1s] @ 3000) - {job="1"} 1100 + {job="1"} 660 # Testing the inner subquery timestamp since vector selector does not have @. # Inner sum for subquery [100s:25s] @ 50 are -# at -50 nothing, at -25 nothing, at 0=0, at 25=2, at 50=4+5=9. -# This sum of 11 is repeated 4 times by outer subquery. +# at -50 nothing, at -25 nothing, at 0=0, at 25=2, at 50=5. +# This sum of 7 is repeated 3 times by outer subquery. eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[10s])[100s:25s] @ 50)[3s:1s] @ 200) - {job="1"} 44 + {job="1"} 21 # Inner sum for subquery [100s:25s] @ 200 are -# at 100=9+10, at 125=12, at 150=14+15, at 175=17, at 200=19+20. -# This sum of 116 is repeated 4 times by outer subquery. +# at 125=12, at 150=15, at 175=17, at 200=20. +# This sum of 64 is repeated 3 times by outer subquery. eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[10s])[100s:25s] @ 200)[3s:1s] @ 50) - {job="1"} 464 + {job="1"} 192 # Nested subqueries with timestamp only on outer subquery. # Outer most subquery: -# at 900=783 -# inner subquery: at 870=87+86+85, at 880=88+87+86, at 890=89+88+87 -# at 925=537 -# inner subquery: at 895=89+88, at 905=90+89, at 915=90+91 -# at 950=828 -# inner subquery: at 920=92+91+90, at 930=93+92+91, at 940=94+93+92 -# at 975=567 -# inner subquery: at 945=94+93, at 955=95+94, at 965=96+95 -# at 1000=873 -# inner subquery: at 970=97+96+95, at 980=98+97+96, at 990=99+98+97 +# at 925=360 +# inner subquery: at 905=90+89, at 915=91+90 +# at 950=372 +# inner subquery: at 930=93+92, at 940=94+93 +# at 975=380 +# inner subquery: at 955=95+94, at 965=96+95 +# at 1000=392 +# inner subquery: at 980=98+97, at 990=99+98 eval instant at 0s sum_over_time(sum_over_time(sum_over_time(metric{job="1"}[20s])[20s:10s] offset 10s)[100s:25s] @ 1000) - {job="1"} 3588 + {job="1"} 1504 # minute is counted on the value of the sample. eval instant at 10s minute(metric @ 1500) @@ -182,32 +181,32 @@ eval instant at 15m timestamp(timestamp(metric{job="1"} @ 10)) # minute is counted on the value of the sample. eval instant at 0s sum_over_time(minute(metric @ 1500)[100s:10s]) - {job="1"} 22 - {job="2"} 55 + {job="1"} 20 + {job="2"} 50 # If nothing passed, minute() takes eval time. # Here the eval time is determined by the subquery. # [50m:1m] at 6000, i.e. 100m, is 50m to 100m. -# sum=50+51+52+...+59+0+1+2+...+40. +# sum=51+52+...+59+0+1+2+...+40. eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000) + {} 1315 + +# sum=46+47+...+59+0+1+2+...+35. +eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000 offset 5m) {} 1365 -# sum=45+46+47+...+59+0+1+2+...+35. -eval instant at 0s sum_over_time(minute()[50m:1m] @ 6000 offset 5m) - {} 1410 - # time() is the eval time which is determined by subquery here. -# 2900+2901+...+3000 = (3000*3001 - 2899*2900)/2. +# 2901+...+3000 = (3000*3001 - 2899*2900)/2. eval instant at 0s sum_over_time(vector(time())[100s:1s] @ 3000) - {} 297950 + {} 295050 -# 2300+2301+...+2400 = (2400*2401 - 2299*2300)/2. +# 2301+...+2400 = (2400*2401 - 2299*2300)/2. eval instant at 0s sum_over_time(vector(time())[100s:1s] @ 3000 offset 600s) - {} 237350 + {} 235050 # timestamp() takes the time of the sample and not the evaluation time. eval instant at 0s sum_over_time(timestamp(metric{job="1"} @ 10)[100s:10s] @ 3000) - {job="1"} 110 + {job="1"} 100 # The result of inner timestamp() will have the timestamp as the # eval time, hence entire expression is not step invariant and depends on eval time. diff --git a/promql/promqltest/testdata/functions.test b/promql/promqltest/testdata/functions.test index 6e2b3630bc..a44a9fc67e 100644 --- a/promql/promqltest/testdata/functions.test +++ b/promql/promqltest/testdata/functions.test @@ -2,15 +2,20 @@ load 5m http_requests{path="/foo"} 1 2 3 0 1 0 0 1 2 0 http_requests{path="/bar"} 1 2 3 4 5 1 2 3 4 5 - http_requests{path="/biz"} 0 0 0 0 0 1 1 1 1 1 + http_requests{path="/biz"} 0 0 0 0 0 1 1 1 1 1 + http_requests_histogram{path="/foo"} {{schema:0 sum:1 count:1}}x9 + http_requests_histogram{path="/bar"} 0 0 0 0 0 0 0 0 {{schema:0 sum:1 count:1}} {{schema:0 sum:1 count:1}} + http_requests_histogram{path="/biz"} 0 1 0 2 0 3 0 {{schema:0 sum:1 count:1}} {{schema:0 sum:2 count:2}} {{schema:0 sum:1 count:1}} # Tests for resets(). eval instant at 50m resets(http_requests[5m]) + +eval instant at 50m resets(http_requests[10m]) {path="/foo"} 0 {path="/bar"} 0 {path="/biz"} 0 -eval instant at 50m resets(http_requests[300]) +eval instant at 50m resets(http_requests[600]) {path="/foo"} 0 {path="/bar"} 0 {path="/biz"} 0 @@ -21,6 +26,11 @@ eval instant at 50m resets(http_requests[20m]) {path="/biz"} 0 eval instant at 50m resets(http_requests[30m]) + {path="/foo"} 1 + {path="/bar"} 0 + {path="/biz"} 0 + +eval instant at 50m resets(http_requests[32m]) {path="/foo"} 2 {path="/bar"} 1 {path="/biz"} 0 @@ -34,39 +44,56 @@ eval instant at 50m resets(nonexistent_metric[50m]) # Tests for changes(). eval instant at 50m changes(http_requests[5m]) + +eval instant at 50m changes(http_requests[6m]) {path="/foo"} 0 {path="/bar"} 0 {path="/biz"} 0 eval instant at 50m changes(http_requests[20m]) - {path="/foo"} 3 - {path="/bar"} 3 + {path="/foo"} 2 + {path="/bar"} 2 {path="/biz"} 0 eval instant at 50m changes(http_requests[30m]) - {path="/foo"} 4 - {path="/bar"} 5 - {path="/biz"} 1 + {path="/foo"} 3 + {path="/bar"} 4 + {path="/biz"} 0 eval instant at 50m changes(http_requests[50m]) - {path="/foo"} 8 - {path="/bar"} 9 + {path="/foo"} 7 + {path="/bar"} 8 {path="/biz"} 1 eval instant at 50m changes((http_requests[50m])) - {path="/foo"} 8 - {path="/bar"} 9 + {path="/foo"} 7 + {path="/bar"} 8 {path="/biz"} 1 eval instant at 50m changes(nonexistent_metric[50m]) +# Test for mix of floats and histograms. +# Because of bug #14172 we are not able to test more complex cases like below: +# 0 1 2 {{schema:0 sum:1 count:1}} 3 {{schema:0 sum:2 count:2}} 4 {{schema:0 sum:3 count:3}}. +eval instant at 50m changes(http_requests_histogram[5m]) + +eval instant at 50m changes(http_requests_histogram[6m]) + {path="/foo"} 0 + {path="/bar"} 0 + {path="/biz"} 0 + +eval instant at 50m changes(http_requests_histogram[60m]) + {path="/foo"} 0 + {path="/bar"} 1 + {path="/biz"} 9 + clear load 5m x{a="b"} NaN NaN NaN x{a="c"} 0 NaN 0 -eval instant at 15m changes(x[15m]) +eval instant at 15m changes(x[20m]) {a="b"} 0 {a="c"} 2 @@ -74,15 +101,15 @@ clear # Tests for increase(). load 5m - http_requests{path="/foo"} 0+10x10 - http_requests{path="/bar"} 0+10x5 0+10x5 - http_requests{path="/dings"} 10+10x10 - http_requests{path="/bumms"} 1+10x10 + http_requests_total{path="/foo"} 0+10x10 + http_requests_total{path="/bar"} 0+18x5 0+18x5 + http_requests_total{path="/dings"} 10+10x10 + http_requests_total{path="/bumms"} 1+10x10 # Tests for increase(). -eval instant at 50m increase(http_requests[50m]) +eval instant at 50m increase(http_requests_total[50m]) {path="/foo"} 100 - {path="/bar"} 90 + {path="/bar"} 160 {path="/dings"} 100 {path="/bumms"} 100 @@ -93,9 +120,9 @@ eval instant at 50m increase(http_requests[50m]) # chosen. However, "bumms" has value 1 at t=0 and would reach 0 at # t=-30s. Here the extrapolation to t=-2m30s would reach a negative # value, and therefore the extrapolation happens only by 30s. -eval instant at 50m increase(http_requests[100m]) +eval instant at 50m increase(http_requests_total[100m]) {path="/foo"} 100 - {path="/bar"} 90 + {path="/bar"} 162 {path="/dings"} 105 {path="/bumms"} 101 @@ -106,55 +133,57 @@ clear # So the sequence 3 2 (decreasing counter = reset) is interpreted the same as 3 0 1 2. # Prometheus assumes it missed the intermediate values 0 and 1. load 5m - http_requests{path="/foo"} 0 1 2 3 2 3 4 + http_requests_total{path="/foo"} 0 1 2 3 2 3 4 -eval instant at 30m increase(http_requests[30m]) +eval instant at 30m increase(http_requests_total[30m]) {path="/foo"} 7 clear # Tests for rate(). load 5m - testcounter_reset_middle 0+10x4 0+10x5 - testcounter_reset_end 0+10x9 0 10 + testcounter_reset_middle_total 0+27x4 0+27x5 + testcounter_reset_end_total 0+10x9 0 10 # Counter resets at in the middle of range are handled correctly by rate(). -eval instant at 50m rate(testcounter_reset_middle[50m]) - {} 0.03 +eval instant at 50m rate(testcounter_reset_middle_total[50m]) + {} 0.08 # Counter resets at end of range are ignored by rate(). -eval instant at 50m rate(testcounter_reset_end[5m]) +eval instant at 50m rate(testcounter_reset_end_total[5m]) + +eval instant at 50m rate(testcounter_reset_end_total[6m]) {} 0 clear load 5m - calculate_rate_offset{x="a"} 0+10x10 - calculate_rate_offset{x="b"} 0+20x10 - calculate_rate_window 0+80x10 + calculate_rate_offset_total{x="a"} 0+10x10 + calculate_rate_offset_total{x="b"} 0+20x10 + calculate_rate_window_total 0+80x10 # Rates should calculate per-second rates. -eval instant at 50m rate(calculate_rate_window[50m]) +eval instant at 50m rate(calculate_rate_window_total[50m]) {} 0.26666666666666666 -eval instant at 50m rate(calculate_rate_offset[10m] offset 5m) +eval instant at 50m rate(calculate_rate_offset_total[10m] offset 5m) {x="a"} 0.03333333333333333 {x="b"} 0.06666666666666667 clear load 4m - testcounter_zero_cutoff{start="0m"} 0+240x10 - testcounter_zero_cutoff{start="1m"} 60+240x10 - testcounter_zero_cutoff{start="2m"} 120+240x10 - testcounter_zero_cutoff{start="3m"} 180+240x10 - testcounter_zero_cutoff{start="4m"} 240+240x10 - testcounter_zero_cutoff{start="5m"} 300+240x10 + testcounter_zero_cutoff_total{start="0m"} 0+240x10 + testcounter_zero_cutoff_total{start="1m"} 60+240x10 + testcounter_zero_cutoff_total{start="2m"} 120+240x10 + testcounter_zero_cutoff_total{start="3m"} 180+240x10 + testcounter_zero_cutoff_total{start="4m"} 240+240x10 + testcounter_zero_cutoff_total{start="5m"} 300+240x10 # Zero cutoff for left-side extrapolation happens until we # reach half a sampling interval (2m). Beyond that, we only # extrapolate by half a sampling interval. -eval instant at 10m rate(testcounter_zero_cutoff[20m]) +eval instant at 10m rate(testcounter_zero_cutoff_total[20m]) {start="0m"} 0.5 {start="1m"} 0.55 {start="2m"} 0.6 @@ -163,7 +192,7 @@ eval instant at 10m rate(testcounter_zero_cutoff[20m]) {start="5m"} 0.6 # Normal half-interval cutoff for left-side extrapolation. -eval instant at 50m rate(testcounter_zero_cutoff[20m]) +eval instant at 50m rate(testcounter_zero_cutoff_total[20m]) {start="0m"} 0.6 {start="1m"} 0.6 {start="2m"} 0.6 @@ -175,15 +204,15 @@ clear # Tests for irate(). load 5m - http_requests{path="/foo"} 0+10x10 - http_requests{path="/bar"} 0+10x5 0+10x5 + http_requests_total{path="/foo"} 0+10x10 + http_requests_total{path="/bar"} 0+10x5 0+10x5 -eval instant at 50m irate(http_requests[50m]) +eval instant at 50m irate(http_requests_total[50m]) {path="/foo"} .03333333333333333333 {path="/bar"} .03333333333333333333 # Counter reset. -eval instant at 30m irate(http_requests[50m]) +eval instant at 30m irate(http_requests_total[50m]) {path="/foo"} .03333333333333333333 {path="/bar"} 0 @@ -213,18 +242,18 @@ clear # Tests for deriv() and predict_linear(). load 5m - testcounter_reset_middle 0+10x4 0+10x5 - http_requests{job="app-server", instance="1", group="canary"} 0+80x10 + testcounter_reset_middle_total 0+10x4 0+10x5 + http_requests_total{job="app-server", instance="1", group="canary"} 0+80x10 # deriv should return the same as rate in simple cases. -eval instant at 50m rate(http_requests{group="canary", instance="1", job="app-server"}[50m]) +eval instant at 50m rate(http_requests_total{group="canary", instance="1", job="app-server"}[50m]) {group="canary", instance="1", job="app-server"} 0.26666666666666666 -eval instant at 50m deriv(http_requests{group="canary", instance="1", job="app-server"}[50m]) +eval instant at 50m deriv(http_requests_total{group="canary", instance="1", job="app-server"}[50m]) {group="canary", instance="1", job="app-server"} 0.26666666666666666 # deriv should return correct result. -eval instant at 50m deriv(testcounter_reset_middle[100m]) +eval instant at 50m deriv(testcounter_reset_middle_total[100m]) {} 0.010606060606060607 # predict_linear should return correct result. @@ -241,31 +270,31 @@ eval instant at 50m deriv(testcounter_reset_middle[100m]) # intercept at t=0: 6.818181818181818 # intercept at t=3000: 38.63636363636364 # intercept at t=3000+3600: 76.81818181818181 -eval instant at 50m predict_linear(testcounter_reset_middle[50m], 3600) - {} 76.81818181818181 +eval instant at 50m predict_linear(testcounter_reset_middle_total[50m], 3600) + {} 70 -eval instant at 50m predict_linear(testcounter_reset_middle[50m], 1h) - {} 76.81818181818181 +eval instant at 50m predict_linear(testcounter_reset_middle_total[50m], 1h) + {} 70 # intercept at t = 3000+3600 = 6600 -eval instant at 50m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600) +eval instant at 50m predict_linear(testcounter_reset_middle_total[55m] @ 3000, 3600) {} 76.81818181818181 -eval instant at 50m predict_linear(testcounter_reset_middle[50m] @ 3000, 1h) +eval instant at 50m predict_linear(testcounter_reset_middle_total[55m] @ 3000, 1h) {} 76.81818181818181 # intercept at t = 600+3600 = 4200 -eval instant at 10m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600) +eval instant at 10m predict_linear(testcounter_reset_middle_total[55m] @ 3000, 3600) {} 51.36363636363637 # intercept at t = 4200+3600 = 7800 -eval instant at 70m predict_linear(testcounter_reset_middle[50m] @ 3000, 3600) +eval instant at 70m predict_linear(testcounter_reset_middle_total[55m] @ 3000, 3600) {} 89.54545454545455 -# With http_requests, there is a sample value exactly at the end of +# With http_requests_total, there is a sample value exactly at the end of # the range, and it has exactly the predicted value, so predict_linear # can be emulated with deriv. -eval instant at 50m predict_linear(http_requests[50m], 3600) - (http_requests + deriv(http_requests[50m]) * 3600) +eval instant at 50m predict_linear(http_requests_total[50m], 3600) - (http_requests_total + deriv(http_requests_total[50m]) * 3600) {group="canary", instance="1", job="app-server"} 0 clear @@ -441,6 +470,21 @@ eval instant at 0m clamp(test_clamp, NaN, 0) eval instant at 0m clamp(test_clamp, 5, -5) +clear + +load 1m + mixed_metric {{schema:0 sum:5 count:4 buckets:[1 2 1]}} 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +# clamp ignores any histograms +eval range from 0 to 5m step 1m clamp(mixed_metric, 2, 5) + {} _ 2 2 3 + +eval range from 0 to 5m step 1m clamp_min(mixed_metric, 2) + {} _ 2 2 3 + +eval range from 0 to 5m step 1m clamp_max(mixed_metric, 2) + {} _ 1 2 2 + # Test cases for sgn. clear load 5m @@ -467,7 +511,7 @@ load 5m http_requests{job="api-server", instance="1", group="production"} 0+20x10 http_requests{job="api-server", instance="0", group="canary"} 0+30x10 http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN http_requests{job="app-server", instance="0", group="production"} 0+50x10 http_requests{job="app-server", instance="1", group="production"} 0+60x10 http_requests{job="app-server", instance="0", group="canary"} 0+70x10 @@ -502,7 +546,7 @@ load 5m http_requests{job="api-server", instance="1", group="production"} 0+20x10 http_requests{job="api-server", instance="0", group="canary"} 0+30x10 http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN + http_requests{job="api-server", instance="2", group="canary"} NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN http_requests{job="app-server", instance="0", group="production"} 0+50x10 http_requests{job="app-server", instance="1", group="production"} 0+60x10 http_requests{job="app-server", instance="0", group="canary"} 0+70x10 @@ -640,7 +684,7 @@ eval_ordered instant at 50m sort_by_label(node_uname_info, "release") node_uname_info{job="node_exporter", instance="4m5", release="1.11.3"} 100 node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 100 -# Tests for holt_winters +# Tests for double_exponential_smoothing clear # positive trends @@ -650,7 +694,7 @@ load 10s http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 http_requests{job="api-server", instance="1", group="canary"} 0+40x2000 -eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1) +eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1) {job="api-server", instance="0", group="production"} 8000 {job="api-server", instance="1", group="production"} 16000 {job="api-server", instance="0", group="canary"} 24000 @@ -664,7 +708,7 @@ load 10s http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300-80x1000 http_requests{job="api-server", instance="1", group="canary"} 0-40x1000 0+40x1000 -eval instant at 8000s holt_winters(http_requests[1m], 0.01, 0.1) +eval instant at 8000s double_exponential_smoothing(http_requests[1m], 0.01, 0.1) {job="api-server", instance="0", group="production"} 0 {job="api-server", instance="1", group="production"} -16000 {job="api-server", instance="0", group="canary"} 24000 @@ -687,11 +731,12 @@ load 10s metric8 9.988465674311579e+307 9.988465674311579e+307 metric9 -9.988465674311579e+307 -9.988465674311579e+307 -9.988465674311579e+307 metric10 -9.988465674311579e+307 9.988465674311579e+307 + metric11 1 2 3 NaN NaN -eval instant at 1m avg_over_time(metric[1m]) +eval instant at 55s avg_over_time(metric[1m]) {} 3 -eval instant at 1m sum_over_time(metric[1m])/count_over_time(metric[1m]) +eval instant at 55s sum_over_time(metric[1m])/count_over_time(metric[1m]) {} 3 eval instant at 1m avg_over_time(metric2[1m]) @@ -758,8 +803,8 @@ eval instant at 1m avg_over_time(metric8[1m]) {} 9.988465674311579e+307 # This overflows float64. -eval instant at 1m sum_over_time(metric8[1m])/count_over_time(metric8[1m]) - {} Inf +eval instant at 1m sum_over_time(metric8[2m])/count_over_time(metric8[2m]) + {} +Inf eval instant at 1m avg_over_time(metric9[1m]) {} -9.988465674311579e+307 @@ -768,21 +813,40 @@ eval instant at 1m avg_over_time(metric9[1m]) eval instant at 1m sum_over_time(metric9[1m])/count_over_time(metric9[1m]) {} -Inf -eval instant at 1m avg_over_time(metric10[1m]) +eval instant at 45s avg_over_time(metric10[1m]) {} 0 -eval instant at 1m sum_over_time(metric10[1m])/count_over_time(metric10[1m]) +eval instant at 1m avg_over_time(metric10[2m]) {} 0 +eval instant at 45s sum_over_time(metric10[1m])/count_over_time(metric10[1m]) + {} 0 + +eval instant at 1m sum_over_time(metric10[2m])/count_over_time(metric10[2m]) + {} 0 + +# NaN behavior. +eval instant at 20s avg_over_time(metric11[1m]) + {} 2 + +eval instant at 30s avg_over_time(metric11[1m]) + {} NaN + +eval instant at 1m avg_over_time(metric11[1m]) + {} NaN + +eval instant at 1m sum_over_time(metric11[1m])/count_over_time(metric11[1m]) + {} NaN + # Test if very big intermediate values cause loss of detail. clear load 10s metric 1 1e100 1 -1e100 -eval instant at 1m sum_over_time(metric[1m]) +eval instant at 1m sum_over_time(metric[2m]) {} 2 -eval instant at 1m avg_over_time(metric[1m]) +eval instant at 1m avg_over_time(metric[2m]) {} 0.5 # Tests for stddev_over_time and stdvar_over_time. @@ -790,13 +854,13 @@ clear load 10s metric 0 8 8 2 3 -eval instant at 1m stdvar_over_time(metric[1m]) +eval instant at 1m stdvar_over_time(metric[2m]) {} 10.56 -eval instant at 1m stddev_over_time(metric[1m]) +eval instant at 1m stddev_over_time(metric[2m]) {} 3.249615 -eval instant at 1m stddev_over_time((metric[1m])) +eval instant at 1m stddev_over_time((metric[2m])) {} 3.249615 # Tests for stddev_over_time and stdvar_over_time #4927. @@ -826,42 +890,42 @@ load 10s data{test="three samples"} 0 1 2 data{test="uneven samples"} 0 1 4 -eval instant at 1m quantile_over_time(0, data[1m]) +eval instant at 1m quantile_over_time(0, data[2m]) {test="two samples"} 0 {test="three samples"} 0 {test="uneven samples"} 0 -eval instant at 1m quantile_over_time(0.5, data[1m]) +eval instant at 1m quantile_over_time(0.5, data[2m]) {test="two samples"} 0.5 {test="three samples"} 1 {test="uneven samples"} 1 -eval instant at 1m quantile_over_time(0.75, data[1m]) +eval instant at 1m quantile_over_time(0.75, data[2m]) {test="two samples"} 0.75 {test="three samples"} 1.5 {test="uneven samples"} 2.5 -eval instant at 1m quantile_over_time(0.8, data[1m]) +eval instant at 1m quantile_over_time(0.8, data[2m]) {test="two samples"} 0.8 {test="three samples"} 1.6 {test="uneven samples"} 2.8 -eval instant at 1m quantile_over_time(1, data[1m]) +eval instant at 1m quantile_over_time(1, data[2m]) {test="two samples"} 1 {test="three samples"} 2 {test="uneven samples"} 4 -eval_warn instant at 1m quantile_over_time(-1, data[1m]) +eval_warn instant at 1m quantile_over_time(-1, data[2m]) {test="two samples"} -Inf {test="three samples"} -Inf {test="uneven samples"} -Inf -eval_warn instant at 1m quantile_over_time(2, data[1m]) +eval_warn instant at 1m quantile_over_time(2, data[2m]) {test="two samples"} +Inf {test="three samples"} +Inf {test="uneven samples"} +Inf -eval_warn instant at 1m (quantile_over_time(2, (data[1m]))) +eval_warn instant at 1m (quantile_over_time(2, (data[2m]))) {test="two samples"} +Inf {test="three samples"} +Inf {test="uneven samples"} +Inf @@ -869,6 +933,9 @@ eval_warn instant at 1m (quantile_over_time(2, (data[1m]))) clear # Test time-related functions. +load 5m + histogram_sample {{schema:0 sum:1 count:1}} + eval instant at 0m year() {} 1970 @@ -950,6 +1017,23 @@ eval instant at 0m days_in_month(vector(1454284800)) eval instant at 0m days_in_month(vector(1485907200)) {} 28 +# Test for histograms. +eval instant at 0m day_of_month(histogram_sample) + +eval instant at 0m day_of_week(histogram_sample) + +eval instant at 0m day_of_year(histogram_sample) + +eval instant at 0m days_in_month(histogram_sample) + +eval instant at 0m hour(histogram_sample) + +eval instant at 0m minute(histogram_sample) + +eval instant at 0m month(histogram_sample) + +eval instant at 0m year(histogram_sample) + clear # Test duplicate labelset in promql output. @@ -969,21 +1053,21 @@ load 10s data{type="some_nan3"} NaN 0 1 data{type="only_nan"} NaN NaN NaN -eval instant at 1m min_over_time(data[1m]) +eval instant at 1m min_over_time(data[2m]) {type="numbers"} 0 {type="some_nan"} 0 {type="some_nan2"} 1 {type="some_nan3"} 0 {type="only_nan"} NaN -eval instant at 1m max_over_time(data[1m]) +eval instant at 1m max_over_time(data[2m]) {type="numbers"} 3 {type="some_nan"} 2 {type="some_nan2"} 2 {type="some_nan3"} 1 {type="only_nan"} NaN -eval instant at 1m last_over_time(data[1m]) +eval instant at 1m last_over_time(data[2m]) data{type="numbers"} 3 data{type="some_nan"} NaN data{type="some_nan2"} 1 @@ -1041,48 +1125,54 @@ eval instant at 50m absent(rate(nonexistant[5m])) clear # Testdata for absent_over_time() -eval instant at 1m absent_over_time(http_requests[5m]) +eval instant at 1m absent_over_time(http_requests_total[5m]) {} 1 -eval instant at 1m absent_over_time(http_requests{handler="/foo"}[5m]) +eval instant at 1m absent_over_time(http_requests_total{handler="/foo"}[5m]) {handler="/foo"} 1 -eval instant at 1m absent_over_time(http_requests{handler!="/foo"}[5m]) +eval instant at 1m absent_over_time(http_requests_total{handler!="/foo"}[5m]) {} 1 -eval instant at 1m absent_over_time(http_requests{handler="/foo", handler="/bar", handler="/foobar"}[5m]) +eval instant at 1m absent_over_time(http_requests_total{handler="/foo", handler="/bar", handler="/foobar"}[5m]) {} 1 eval instant at 1m absent_over_time(rate(nonexistant[5m])[5m:]) {} 1 -eval instant at 1m absent_over_time(http_requests{handler="/foo", handler="/bar", instance="127.0.0.1"}[5m]) +eval instant at 1m absent_over_time(http_requests_total{handler="/foo", handler="/bar", instance="127.0.0.1"}[5m]) {instance="127.0.0.1"} 1 load 1m - http_requests{path="/foo",instance="127.0.0.1",job="httpd"} 1+1x10 - http_requests{path="/bar",instance="127.0.0.1",job="httpd"} 1+1x10 + http_requests_total{path="/foo",instance="127.0.0.1",job="httpd"} 1+1x10 + http_requests_total{path="/bar",instance="127.0.0.1",job="httpd"} 1+1x10 httpd_handshake_failures_total{instance="127.0.0.1",job="node"} 1+1x15 httpd_log_lines_total{instance="127.0.0.1",job="node"} 1 ssl_certificate_expiry_seconds{job="ingress"} NaN NaN NaN NaN NaN -eval instant at 5m absent_over_time(http_requests[5m]) +eval instant at 5m absent_over_time(http_requests_total[5m]) -eval instant at 5m absent_over_time(rate(http_requests[5m])[5m:1m]) +eval instant at 5m absent_over_time(rate(http_requests_total[5m])[5m:1m]) eval instant at 0m absent_over_time(httpd_log_lines_total[30s]) eval instant at 1m absent_over_time(httpd_log_lines_total[30s]) {} 1 -eval instant at 15m absent_over_time(http_requests[5m]) - -eval instant at 16m absent_over_time(http_requests[5m]) +eval instant at 15m absent_over_time(http_requests_total[5m]) {} 1 -eval instant at 16m absent_over_time(http_requests[6m]) +eval instant at 15m absent_over_time(http_requests_total[10m]) + +eval instant at 16m absent_over_time(http_requests_total[6m]) + {} 1 + +eval instant at 16m absent_over_time(http_requests_total[16m]) eval instant at 16m absent_over_time(httpd_handshake_failures_total[1m]) + {} 1 + +eval instant at 16m absent_over_time(httpd_handshake_failures_total[2m]) eval instant at 16m absent_over_time({instance="127.0.0.1"}[5m]) @@ -1105,30 +1195,30 @@ eval instant at 10m absent_over_time({job="ingress"}[4m]) clear # Testdata for present_over_time() -eval instant at 1m present_over_time(http_requests[5m]) +eval instant at 1m present_over_time(http_requests_total[5m]) -eval instant at 1m present_over_time(http_requests{handler="/foo"}[5m]) +eval instant at 1m present_over_time(http_requests_total{handler="/foo"}[5m]) -eval instant at 1m present_over_time(http_requests{handler!="/foo"}[5m]) +eval instant at 1m present_over_time(http_requests_total{handler!="/foo"}[5m]) -eval instant at 1m present_over_time(http_requests{handler="/foo", handler="/bar", handler="/foobar"}[5m]) +eval instant at 1m present_over_time(http_requests_total{handler="/foo", handler="/bar", handler="/foobar"}[5m]) eval instant at 1m present_over_time(rate(nonexistant[5m])[5m:]) -eval instant at 1m present_over_time(http_requests{handler="/foo", handler="/bar", instance="127.0.0.1"}[5m]) +eval instant at 1m present_over_time(http_requests_total{handler="/foo", handler="/bar", instance="127.0.0.1"}[5m]) load 1m - http_requests{path="/foo",instance="127.0.0.1",job="httpd"} 1+1x10 - http_requests{path="/bar",instance="127.0.0.1",job="httpd"} 1+1x10 + http_requests_total{path="/foo",instance="127.0.0.1",job="httpd"} 1+1x10 + http_requests_total{path="/bar",instance="127.0.0.1",job="httpd"} 1+1x10 httpd_handshake_failures_total{instance="127.0.0.1",job="node"} 1+1x15 httpd_log_lines_total{instance="127.0.0.1",job="node"} 1 ssl_certificate_expiry_seconds{job="ingress"} NaN NaN NaN NaN NaN -eval instant at 5m present_over_time(http_requests[5m]) +eval instant at 5m present_over_time(http_requests_total[5m]) {instance="127.0.0.1", job="httpd", path="/bar"} 1 {instance="127.0.0.1", job="httpd", path="/foo"} 1 -eval instant at 5m present_over_time(rate(http_requests[5m])[5m:1m]) +eval instant at 5m present_over_time(rate(http_requests_total[5m])[5m:1m]) {instance="127.0.0.1", job="httpd", path="/bar"} 1 {instance="127.0.0.1", job="httpd", path="/foo"} 1 @@ -1137,18 +1227,19 @@ eval instant at 0m present_over_time(httpd_log_lines_total[30s]) eval instant at 1m present_over_time(httpd_log_lines_total[30s]) -eval instant at 15m present_over_time(http_requests[5m]) +eval instant at 15m present_over_time(http_requests_total[5m]) + +eval instant at 15m present_over_time(http_requests_total[10m]) {instance="127.0.0.1", job="httpd", path="/bar"} 1 {instance="127.0.0.1", job="httpd", path="/foo"} 1 -eval instant at 16m present_over_time(http_requests[5m]) +eval instant at 16m present_over_time(http_requests_total[6m]) -eval instant at 16m present_over_time(http_requests[6m]) +eval instant at 16m present_over_time(http_requests_total[16m]) {instance="127.0.0.1", job="httpd", path="/bar"} 1 {instance="127.0.0.1", job="httpd", path="/foo"} 1 eval instant at 16m present_over_time(httpd_handshake_failures_total[1m]) - {instance="127.0.0.1", job="node"} 1 eval instant at 16m present_over_time({instance="127.0.0.1"}[5m]) {instance="127.0.0.1",job="node"} 1 @@ -1169,59 +1260,59 @@ load 5m exp_root_log{l="x"} 10 exp_root_log{l="y"} 20 -eval instant at 5m exp(exp_root_log) +eval instant at 1m exp(exp_root_log) {l="x"} 22026.465794806718 {l="y"} 485165195.4097903 -eval instant at 5m exp(exp_root_log - 10) +eval instant at 1m exp(exp_root_log - 10) {l="y"} 22026.465794806718 {l="x"} 1 -eval instant at 5m exp(exp_root_log - 20) +eval instant at 1m exp(exp_root_log - 20) {l="x"} 4.5399929762484854e-05 {l="y"} 1 -eval instant at 5m ln(exp_root_log) +eval instant at 1m ln(exp_root_log) {l="x"} 2.302585092994046 {l="y"} 2.995732273553991 -eval instant at 5m ln(exp_root_log - 10) +eval instant at 1m ln(exp_root_log - 10) {l="y"} 2.302585092994046 {l="x"} -Inf -eval instant at 5m ln(exp_root_log - 20) +eval instant at 1m ln(exp_root_log - 20) {l="y"} -Inf {l="x"} NaN -eval instant at 5m exp(ln(exp_root_log)) +eval instant at 1m exp(ln(exp_root_log)) {l="y"} 20 {l="x"} 10 -eval instant at 5m sqrt(exp_root_log) +eval instant at 1m sqrt(exp_root_log) {l="x"} 3.1622776601683795 {l="y"} 4.47213595499958 -eval instant at 5m log2(exp_root_log) +eval instant at 1m log2(exp_root_log) {l="x"} 3.3219280948873626 {l="y"} 4.321928094887363 -eval instant at 5m log2(exp_root_log - 10) +eval instant at 1m log2(exp_root_log - 10) {l="y"} 3.3219280948873626 {l="x"} -Inf -eval instant at 5m log2(exp_root_log - 20) +eval instant at 1m log2(exp_root_log - 20) {l="x"} NaN {l="y"} -Inf -eval instant at 5m log10(exp_root_log) +eval instant at 1m log10(exp_root_log) {l="x"} 1 {l="y"} 1.301029995663981 -eval instant at 5m log10(exp_root_log - 10) +eval instant at 1m log10(exp_root_log - 10) {l="y"} 1 {l="x"} -Inf -eval instant at 5m log10(exp_root_log - 20) +eval instant at 1m log10(exp_root_log - 20) {l="x"} NaN {l="y"} -Inf @@ -1234,3 +1325,12 @@ load 1m # We expect the value to be 0 for t=0s to t=59s (inclusive), then 60 for t=60s and t=61s. eval range from 0 to 61s step 1s timestamp(metric) {} 0x59 60 60 + +clear + +# Check round with mixed data types +load 1m + mixed_metric {{schema:0 sum:5 count:4 buckets:[1 2 1]}} 1 2 3 {{schema:0 sum:5 count:4 buckets:[1 2 1]}} {{schema:0 sum:8 count:6 buckets:[1 4 1]}} + +eval range from 0 to 5m step 1m round(mixed_metric) + {} _ 1 2 3 diff --git a/promql/promqltest/testdata/histograms.test b/promql/promqltest/testdata/histograms.test index 47cba79935..8ab23640af 100644 --- a/promql/promqltest/testdata/histograms.test +++ b/promql/promqltest/testdata/histograms.test @@ -108,8 +108,8 @@ eval instant at 50m histogram_stdvar(testhistogram3) eval instant at 50m histogram_fraction(0, 0.2, testhistogram3) {start="positive"} 0.6363636363636364 {start="negative"} 0 - -eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[5m])) + +eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[10m])) {start="positive"} 0.6363636363636364 {start="negative"} 0 @@ -118,8 +118,8 @@ eval instant at 50m histogram_fraction(0, 0.2, rate(testhistogram3[5m])) eval instant at 50m testhistogram3_bucket{le=".2"} / ignoring(le) testhistogram3_count {start="positive"} 0.6363636363636364 - -eval instant at 50m rate(testhistogram3_bucket{le=".2"}[5m]) / ignoring(le) rate(testhistogram3_count[5m]) + +eval instant at 50m rate(testhistogram3_bucket{le=".2"}[10m]) / ignoring(le) rate(testhistogram3_count[10m]) {start="positive"} 0.6363636363636364 # Test histogram_quantile, native and classic. @@ -241,28 +241,27 @@ eval instant at 50m histogram_quantile(0.8, testhistogram_bucket) {start="negative"} 0.3 # More realistic with rates. - -eval instant at 50m histogram_quantile(0.2, rate(testhistogram[5m])) +eval instant at 50m histogram_quantile(0.2, rate(testhistogram[10m])) {start="positive"} 0.048 {start="negative"} -0.2 -eval instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[5m])) +eval instant at 50m histogram_quantile(0.2, rate(testhistogram_bucket[10m])) {start="positive"} 0.048 {start="negative"} -0.2 -eval instant at 50m histogram_quantile(0.5, rate(testhistogram[5m])) +eval instant at 50m histogram_quantile(0.5, rate(testhistogram[10m])) {start="positive"} 0.15 {start="negative"} -0.15 -eval instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[5m])) +eval instant at 50m histogram_quantile(0.5, rate(testhistogram_bucket[10m])) {start="positive"} 0.15 {start="negative"} -0.15 -eval instant at 50m histogram_quantile(0.8, rate(testhistogram[5m])) +eval instant at 50m histogram_quantile(0.8, rate(testhistogram[10m])) {start="positive"} 0.72 {start="negative"} 0.3 -eval instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[5m])) +eval instant at 50m histogram_quantile(0.8, rate(testhistogram_bucket[10m])) {start="positive"} 0.72 {start="negative"} 0.3 @@ -307,115 +306,112 @@ eval instant at 47m histogram_quantile(5./6., rate(testhistogram2_bucket[15m])) # Aggregated histogram: Everything in one. Note how native histograms # don't require aggregation by le. -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m]))) {} 0.075 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.075 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m]))) {} 0.1277777777777778 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.1277777777777778 # Aggregated histogram: Everything in one. Now with avg, which does not change anything. -eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds[10m]))) {} 0.075 -eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.3, avg(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.075 -eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds[5m]))) +eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds[10m]))) {} 0.12777777777777778 -eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[5m])) by (le)) +eval instant at 50m histogram_quantile(0.5, avg(rate(request_duration_seconds_bucket[10m])) by (le)) {} 0.12777777777777778 # Aggregated histogram: By instance. -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m])) by (instance)) {instance="ins1"} 0.075 {instance="ins2"} 0.075 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le, instance)) {instance="ins1"} 0.075 {instance="ins2"} 0.075 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m])) by (instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m])) by (instance)) {instance="ins1"} 0.1333333333 {instance="ins2"} 0.125 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le, instance)) {instance="ins1"} 0.1333333333 {instance="ins2"} 0.125 # Aggregated histogram: By job. - -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (job)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m])) by (job)) {job="job1"} 0.1 {job="job2"} 0.0642857142857143 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le, job)) {job="job1"} 0.1 {job="job2"} 0.0642857142857143 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m])) by (job)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m])) by (job)) {job="job1"} 0.14 {job="job2"} 0.1125 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le, job)) {job="job1"} 0.14 {job="job2"} 0.1125 # Aggregated histogram: By job and instance. - -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[5m])) by (job, instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds[10m])) by (job, instance)) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance)) +eval instant at 50m histogram_quantile(0.3, sum(rate(request_duration_seconds_bucket[10m])) by (le, job, instance)) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[5m])) by (job, instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds[10m])) by (job, instance)) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.1333333333333333 {instance="ins1", job="job2"} 0.1 {instance="ins2", job="job2"} 0.1166666666666667 -eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[5m])) by (le, job, instance)) +eval instant at 50m histogram_quantile(0.5, sum(rate(request_duration_seconds_bucket[10m])) by (le, job, instance)) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.1333333333333333 {instance="ins1", job="job2"} 0.1 {instance="ins2", job="job2"} 0.1166666666666667 # The unaggregated histogram for comparison. Same result as the previous one. - -eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds[5m])) +eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds[10m])) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[5m])) +eval instant at 50m histogram_quantile(0.3, rate(request_duration_seconds_bucket[10m])) {instance="ins1", job="job1"} 0.11 {instance="ins2", job="job1"} 0.09 {instance="ins1", job="job2"} 0.06 {instance="ins2", job="job2"} 0.0675 -eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds[5m])) +eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds[10m])) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.13333333333333333 {instance="ins1", job="job2"} 0.1 {instance="ins2", job="job2"} 0.11666666666666667 -eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[5m])) +eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket[10m])) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} 0.13333333333333333 {instance="ins1", job="job2"} 0.1 @@ -425,6 +421,25 @@ eval instant at 50m histogram_quantile(0.5, rate(request_duration_seconds_bucket eval instant at 50m sum(request_duration_seconds) {} {{schema:-53 count:250 custom_values:[0.1 0.2] buckets:[100 90 60]}} +eval instant at 50m sum(request_duration_seconds{job="job1",instance="ins1"} + ignoring(job,instance) request_duration_seconds{job="job1",instance="ins2"} + ignoring(job,instance) request_duration_seconds{job="job2",instance="ins1"} + ignoring(job,instance) request_duration_seconds{job="job2",instance="ins2"}) + {} {{schema:-53 count:250 custom_values:[0.1 0.2] buckets:[100 90 60]}} + +eval instant at 50m avg(request_duration_seconds) + {} {{schema:-53 count:62.5 custom_values:[0.1 0.2] buckets:[25 22.5 15]}} + +# To verify the result above, calculate from classic histogram as well. +eval instant at 50m avg (request_duration_seconds_bucket{le="0.1"}) + {} 25 + +eval instant at 50m avg (request_duration_seconds_bucket{le="0.2"}) - avg (request_duration_seconds_bucket{le="0.1"}) + {} 22.5 + +eval instant at 50m avg (request_duration_seconds_bucket{le="+Inf"}) - avg (request_duration_seconds_bucket{le="0.2"}) + {} 15 + +eval instant at 50m count(request_duration_seconds) + {} 4 + # A histogram with nonmonotonic bucket counts. This may happen when recording # rule evaluation or federation races scrape ingestion, causing some buckets # counts to be derived from fewer samples. @@ -437,30 +452,30 @@ load 5m nonmonotonic_bucket{le="1000"} 0+9x10 nonmonotonic_bucket{le="+Inf"} 0+8x10 -# Nonmonotonic buckets -eval instant at 50m histogram_quantile(0.01, nonmonotonic_bucket) +# Nonmonotonic buckets, triggering an info annotation. +eval_info instant at 50m histogram_quantile(0.01, nonmonotonic_bucket) {} 0.0045 -eval instant at 50m histogram_quantile(0.5, nonmonotonic_bucket) +eval_info instant at 50m histogram_quantile(0.5, nonmonotonic_bucket) {} 8.5 -eval instant at 50m histogram_quantile(0.99, nonmonotonic_bucket) +eval_info instant at 50m histogram_quantile(0.99, nonmonotonic_bucket) {} 979.75 # Buckets with different representations of the same upper bound. -eval instant at 50m histogram_quantile(0.5, rate(mixed_bucket[5m])) +eval instant at 50m histogram_quantile(0.5, rate(mixed_bucket[10m])) {instance="ins1", job="job1"} 0.15 {instance="ins2", job="job1"} NaN -eval instant at 50m histogram_quantile(0.5, rate(mixed[5m])) +eval instant at 50m histogram_quantile(0.5, rate(mixed[10m])) {instance="ins1", job="job1"} 0.2 {instance="ins2", job="job1"} NaN -eval instant at 50m histogram_quantile(0.75, rate(mixed_bucket[5m])) +eval instant at 50m histogram_quantile(0.75, rate(mixed_bucket[10m])) {instance="ins1", job="job1"} 0.2 {instance="ins2", job="job1"} NaN -eval instant at 50m histogram_quantile(1, rate(mixed_bucket[5m])) +eval instant at 50m histogram_quantile(1, rate(mixed_bucket[10m])) {instance="ins1", job="job1"} 0.2 {instance="ins2", job="job1"} NaN @@ -469,7 +484,7 @@ load_with_nhcb 5m empty_bucket{le="0.2", job="job1", instance="ins1"} 0x10 empty_bucket{le="+Inf", job="job1", instance="ins1"} 0x10 -eval instant at 50m histogram_quantile(0.2, rate(empty_bucket[5m])) +eval instant at 50m histogram_quantile(0.2, rate(empty_bucket[10m])) {instance="ins1", job="job1"} NaN # Load a duplicate histogram with a different name to test failure scenario on multiple histograms with the same label set. @@ -508,3 +523,36 @@ eval instant at 5m histogram_quantile(1.0, sum by (le) (rate(const_histogram_buc eval instant at 5m histogram_quantile(1.0, sum(rate(const_histogram[5m]))) {} NaN + +load_with_nhcb 1m + histogram_over_time_bucket{le="0"} 0 1 3 9 + histogram_over_time_bucket{le="1"} 2 3 3 9 + histogram_over_time_bucket{le="2"} 3 8 5 10 + histogram_over_time_bucket{le="4"} 3 10 6 18 + +# Test custom buckets with sum_over_time, avg_over_time. +eval instant at 3m sum_over_time(histogram_over_time[4m:1m]) + {} {{schema:-53 count:37 custom_values:[0 1 2 4] buckets:[13 4 9 11]}} + +eval instant at 3m avg_over_time(histogram_over_time[4m:1m]) + {} {{schema:-53 count:9.25 custom_values:[0 1 2 4] buckets:[3.25 1 2.25 2.75]}} + +# Test custom buckets with counter reset +load_with_nhcb 5m + histogram_with_reset_bucket{le="1"} 1 3 9 + histogram_with_reset_bucket{le="2"} 3 3 9 + histogram_with_reset_bucket{le="4"} 8 5 12 + histogram_with_reset_bucket{le="8"} 10 6 18 + histogram_with_reset_sum{} 36 16 61 + +eval instant at 10m increase(histogram_with_reset[15m]) + {} {{schema:-53 count:27 sum:91.5 custom_values:[1 2 4 8] counter_reset_hint:gauge buckets:[13.5 0 4.5 9]}} + +eval instant at 10m resets(histogram_with_reset[15m]) + {} 1 + +eval instant at 10m histogram_count(increase(histogram_with_reset[15m])) + {} 27 + +eval instant at 10m histogram_sum(increase(histogram_with_reset[15m])) + {} 91.5 diff --git a/promql/promqltest/testdata/limit.test b/promql/promqltest/testdata/limit.test index 0ab363f9ae..e6dd007af4 100644 --- a/promql/promqltest/testdata/limit.test +++ b/promql/promqltest/testdata/limit.test @@ -9,6 +9,8 @@ load 5m http_requests{job="api-server", instance="1", group="canary"} 0+40x10 http_requests{job="api-server", instance="2", group="canary"} 0+50x10 http_requests{job="api-server", instance="3", group="canary"} 0+60x10 + http_requests{job="api-server", instance="histogram_1", group="canary"} {{schema:0 sum:10 count:10}}x11 + http_requests{job="api-server", instance="histogram_2", group="canary"} {{schema:0 sum:20 count:20}}x11 eval instant at 50m count(limitk by (group) (0, http_requests)) # empty @@ -16,25 +18,56 @@ eval instant at 50m count(limitk by (group) (0, http_requests)) eval instant at 50m count(limitk by (group) (-1, http_requests)) # empty -# Exercise k==1 special case (as sample is added before the main series loop +# Exercise k==1 special case (as sample is added before the main series loop). eval instant at 50m count(limitk by (group) (1, http_requests) and http_requests) - {} 2 + {} 2 eval instant at 50m count(limitk by (group) (2, http_requests) and http_requests) - {} 4 + {} 4 eval instant at 50m count(limitk(100, http_requests) and http_requests) - {} 6 + {} 8 -# Exercise k==1 special case (as sample is added before the main series loop +# Exercise k==1 special case (as sample is added before the main series loop). eval instant at 50m count(limitk by (group) (1, http_requests) and http_requests) - {} 2 + {} 2 eval instant at 50m count(limitk by (group) (2, http_requests) and http_requests) - {} 4 + {} 4 eval instant at 50m count(limitk(100, http_requests) and http_requests) - {} 6 + {} 8 + +# Test for histograms. +# k==1: verify that histogram is included in the result. +eval instant at 50m limitk(1, http_requests{instance="histogram_1"}) + {__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}} + +eval range from 0 to 50m step 5m limitk(1, http_requests{instance="histogram_1"}) + {__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}}x10 + +# Histogram is included with mix of floats as well. +eval instant at 50m limitk(8, http_requests{instance=~"(histogram_2|0)"}) + {__name__="http_requests", group="canary", instance="histogram_2", job="api-server"} {{count:20 sum:20}} + {__name__="http_requests", group="production", instance="0", job="api-server"} 100 + {__name__="http_requests", group="canary", instance="0", job="api-server"} 300 + +eval range from 0 to 50m step 5m limitk(8, http_requests{instance=~"(histogram_2|0)"}) + {__name__="http_requests", group="canary", instance="histogram_2", job="api-server"} {{count:20 sum:20}}x10 + {__name__="http_requests", group="production", instance="0", job="api-server"} 0+10x10 + {__name__="http_requests", group="canary", instance="0", job="api-server"} 0+30x10 + +eval instant at 50m count(limitk(2, http_requests{instance=~"histogram_[0-9]"})) + {} 2 + +eval range from 0 to 50m step 5m count(limitk(2, http_requests{instance=~"histogram_[0-9]"})) + {} 2+0x10 + +eval instant at 50m count(limitk(1000, http_requests{instance=~"histogram_[0-9]"})) + {} 2 + +eval range from 0 to 50m step 5m count(limitk(1000, http_requests{instance=~"histogram_[0-9]"})) + {} 2+0x10 # limit_ratio eval range from 0 to 50m step 5m count(limit_ratio(0.0, http_requests)) @@ -42,9 +75,9 @@ eval range from 0 to 50m step 5m count(limit_ratio(0.0, http_requests)) # limitk(2, ...) should always return a 2-count subset of the timeseries (hence the AND'ing) eval range from 0 to 50m step 5m count(limitk(2, http_requests) and http_requests) - {} 2+0x10 + {} 2+0x10 -# Tests for limit_ratio +# Tests for limit_ratio. # # NB: below 0.5 ratio will depend on some hashing "luck" (also there's no guarantee that # an integer comes from: total number of series * ratio), as it depends on: @@ -56,50 +89,50 @@ eval range from 0 to 50m step 5m count(limitk(2, http_requests) and http_request # # See `AddRatioSample()` in promql/engine.go for more details. -# Half~ish samples: verify we get "near" 3 (of 0.5 * 6) -eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) <= bool (3+1) - {} 1+0x10 +# Half~ish samples: verify we get "near" 3 (of 0.5 * 6). +eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) <= bool (4+1) + {} 1+0x10 -eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) >= bool (3-1) - {} 1+0x10 +eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and http_requests) >= bool (4-1) + {} 1+0x10 -# All samples +# All samples. eval range from 0 to 50m step 5m count(limit_ratio(1.0, http_requests) and http_requests) - {} 6+0x10 + {} 8+0x10 -# All samples +# All samples. eval range from 0 to 50m step 5m count(limit_ratio(-1.0, http_requests) and http_requests) - {} 6+0x10 + {} 8+0x10 -# Capped to 1.0 -> all samples +# Capped to 1.0 -> all samples. eval_warn range from 0 to 50m step 5m count(limit_ratio(1.1, http_requests) and http_requests) - {} 6+0x10 + {} 8+0x10 -# Capped to -1.0 -> all samples +# Capped to -1.0 -> all samples. eval_warn range from 0 to 50m step 5m count(limit_ratio(-1.1, http_requests) and http_requests) - {} 6+0x10 + {} 8+0x10 -# Verify that limit_ratio(value) and limit_ratio(1.0-value) return the "complement" of each other -# Complement below for [0.2, -0.8] +# Verify that limit_ratio(value) and limit_ratio(1.0-value) return the "complement" of each other. +# Complement below for [0.2, -0.8]. # -# Complement 1of2: `or` should return all samples +# Complement 1of2: `or` should return all samples. eval range from 0 to 50m step 5m count(limit_ratio(0.2, http_requests) or limit_ratio(-0.8, http_requests)) - {} 6+0x10 + {} 8+0x10 -# Complement 2of2: `and` should return no samples +# Complement 2of2: `and` should return no samples. eval range from 0 to 50m step 5m count(limit_ratio(0.2, http_requests) and limit_ratio(-0.8, http_requests)) # empty -# Complement below for [0.5, -0.5] +# Complement below for [0.5, -0.5]. eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) or limit_ratio(-0.5, http_requests)) - {} 6+0x10 + {} 8+0x10 eval range from 0 to 50m step 5m count(limit_ratio(0.5, http_requests) and limit_ratio(-0.5, http_requests)) # empty -# Complement below for [0.8, -0.2] +# Complement below for [0.8, -0.2]. eval range from 0 to 50m step 5m count(limit_ratio(0.8, http_requests) or limit_ratio(-0.2, http_requests)) - {} 6+0x10 + {} 8+0x10 eval range from 0 to 50m step 5m count(limit_ratio(0.8, http_requests) and limit_ratio(-0.2, http_requests)) # empty @@ -107,13 +140,19 @@ eval range from 0 to 50m step 5m count(limit_ratio(0.8, http_requests) and limit # Complement below for [some_ratio, 1.0 - some_ratio], some_ratio derived from time(), # using a small prime number to avoid rounded ratio values, and a small set of them. eval range from 0 to 50m step 5m count(limit_ratio(time() % 17/17, http_requests) or limit_ratio(1.0 - (time() % 17/17), http_requests)) - {} 6+0x10 + {} 8+0x10 eval range from 0 to 50m step 5m count(limit_ratio(time() % 17/17, http_requests) and limit_ratio(1.0 - (time() % 17/17), http_requests)) # empty -# Poor man's normality check: ok (loaded samples follow a nice linearity over labels and time) -# The check giving: 1 (i.e. true) -eval range from 0 to 50m step 5m abs(avg(limit_ratio(0.5, http_requests)) - avg(limit_ratio(-0.5, http_requests))) <= bool stddev(http_requests) +# Poor man's normality check: ok (loaded samples follow a nice linearity over labels and time). +# The check giving: 1 (i.e. true). +eval range from 0 to 50m step 5m abs(avg(limit_ratio(0.5, http_requests{instance!~"histogram_[0-9]"})) - avg(limit_ratio(-0.5, http_requests{instance!~"histogram_[0-9]"}))) <= bool stddev(http_requests{instance!~"histogram_[0-9]"}) {} 1+0x10 +# All specified histograms are included for r=1. +eval instant at 50m limit_ratio(1, http_requests{instance="histogram_1"}) + {__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}} + +eval range from 0 to 50m step 5m limit_ratio(1, http_requests{instance="histogram_1"}) + {__name__="http_requests", group="canary", instance="histogram_1", job="api-server"} {{count:10 sum:10}}x10 diff --git a/promql/promqltest/testdata/name_label_dropping.test b/promql/promqltest/testdata/name_label_dropping.test index 1f1dac3602..9af45a7324 100644 --- a/promql/promqltest/testdata/name_label_dropping.test +++ b/promql/promqltest/testdata/name_label_dropping.test @@ -1,84 +1,88 @@ # Test for __name__ label drop. load 5m - metric{env="1"} 0 60 120 - another_metric{env="1"} 60 120 180 + metric_total{env="1"} 0 60 120 + another_metric_total{env="1"} 60 120 180 -# Does not drop __name__ for vector selector -eval instant at 15m metric{env="1"} - metric{env="1"} 120 +# Does not drop __name__ for vector selector. +eval instant at 10m metric_total{env="1"} + metric_total{env="1"} 120 -# Drops __name__ for unary operators -eval instant at 15m -metric +# Drops __name__ for unary operators. +eval instant at 10m -metric_total {env="1"} -120 -# Drops __name__ for binary operators -eval instant at 15m metric + another_metric +# Drops __name__ for binary operators. +eval instant at 10m metric_total + another_metric_total {env="1"} 300 -# Does not drop __name__ for binary comparison operators -eval instant at 15m metric <= another_metric - metric{env="1"} 120 +# Does not drop __name__ for binary comparison operators. +eval instant at 10m metric_total <= another_metric_total + metric_total{env="1"} 120 -# Drops __name__ for binary comparison operators with "bool" modifier -eval instant at 15m metric <= bool another_metric +# Drops __name__ for binary comparison operators with "bool" modifier. +eval instant at 10m metric_total <= bool another_metric_total {env="1"} 1 -# Drops __name__ for vector-scalar operations -eval instant at 15m metric * 2 +# Drops __name__ for vector-scalar operations. +eval instant at 10m metric_total * 2 {env="1"} 240 -# Drops __name__ for instant-vector functions -eval instant at 15m clamp(metric, 0, 100) +# Drops __name__ for instant-vector functions. +eval instant at 10m clamp(metric_total, 0, 100) {env="1"} 100 -# Drops __name__ for range-vector functions -eval instant at 15m rate(metric{env="1"}[10m]) - {env="1"} 0.2 - -# Does not drop __name__ for last_over_time function -eval instant at 15m last_over_time(metric{env="1"}[10m]) - metric{env="1"} 120 - -# Drops name for other _over_time functions -eval instant at 15m max_over_time(metric{env="1"}[10m]) +# Drops __name__ for round function. +eval instant at 10m round(metric_total) {env="1"} 120 -# Allows relabeling (to-be-dropped) __name__ via label_replace -eval instant at 15m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") - {my_name="rate_metric", env="1"} 0.2 - {my_name="rate_another_metric", env="1"} 0.2 +# Drops __name__ for range-vector functions. +eval instant at 10m rate(metric_total{env="1"}[10m]) + {env="1"} 0.2 -# Allows preserving __name__ via label_replace -eval instant at 15m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") - rate_metric{env="1"} 0.2 - rate_another_metric{env="1"} 0.2 +# Does not drop __name__ for last_over_time function. +eval instant at 10m last_over_time(metric_total{env="1"}[10m]) + metric_total{env="1"} 120 -# Allows relabeling (to-be-dropped) __name__ via label_join -eval instant at 15m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") - {my_name="metric", env="1"} 0.2 - {my_name="another_metric", env="1"} 0.2 +# Drops name for other _over_time functions. +eval instant at 10m max_over_time(metric_total{env="1"}[10m]) + {env="1"} 120 -# Allows preserving __name__ via label_join -eval instant at 15m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") - metric_1{env="1"} 0.2 - another_metric_1{env="1"} 0.2 +# Allows relabeling (to-be-dropped) __name__ via label_replace. +eval instant at 10m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)") + {my_name="rate_metric_total", env="1"} 0.2 + {my_name="rate_another_metric_total", env="1"} 0.2 -# Does not drop metric names fro aggregation operators -eval instant at 15m sum by (__name__, env) (metric{env="1"}) - metric{env="1"} 120 +# Allows preserving __name__ via label_replace. +eval instant at 10m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)") + rate_metric_total{env="1"} 0.2 + rate_another_metric_total{env="1"} 0.2 -# Aggregation operators by __name__ lead to duplicate labelset errors (aggregation is partitioned by not yet removed __name__ label) +# Allows relabeling (to-be-dropped) __name__ via label_join. +eval instant at 10m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__") + {my_name="metric_total", env="1"} 0.2 + {my_name="another_metric_total", env="1"} 0.2 + +# Allows preserving __name__ via label_join. +eval instant at 10m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env") + metric_total_1{env="1"} 0.2 + another_metric_total_1{env="1"} 0.2 + +# Does not drop metric names from aggregation operators. +eval instant at 10m sum by (__name__, env) (metric_total{env="1"}) + metric_total{env="1"} 120 + +# Aggregation operators by __name__ lead to duplicate labelset errors (aggregation is partitioned by not yet removed __name__ label). # This is an accidental side effect of delayed __name__ label dropping -eval_fail instant at 15m sum by (__name__) (rate({env="1"}[10m])) +eval_fail instant at 10m sum by (__name__) (rate({env="1"}[10m])) -# Aggregation operators aggregate metrics with same labelset and to-be-dropped names +# Aggregation operators aggregate metrics with same labelset and to-be-dropped names. # This is an accidental side effect of delayed __name__ label dropping -eval instant at 15m sum(rate({env="1"}[10m])) by (env) +eval instant at 10m sum(rate({env="1"}[10m])) by (env) {env="1"} 0.4 -# Aggregationk operators propagate __name__ label dropping information -eval instant at 15m topk(10, sum by (__name__, env) (metric{env="1"})) - metric{env="1"} 120 +# Aggregationk operators propagate __name__ label dropping information. +eval instant at 10m topk(10, sum by (__name__, env) (metric_total{env="1"})) + metric_total{env="1"} 120 -eval instant at 15m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m]))) +eval instant at 10m topk(10, sum by (__name__, env) (rate(metric_total{env="1"}[10m]))) {env="1"} 0.2 diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 71e102dcee..6be298cf7d 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -2,55 +2,58 @@ load 5m empty_histogram {{}} -eval instant at 5m empty_histogram +eval instant at 1m empty_histogram {__name__="empty_histogram"} {{}} -eval instant at 5m histogram_count(empty_histogram) +eval instant at 1m histogram_count(empty_histogram) {} 0 -eval instant at 5m histogram_sum(empty_histogram) +eval instant at 1m histogram_sum(empty_histogram) {} 0 -eval instant at 5m histogram_avg(empty_histogram) +eval instant at 1m histogram_avg(empty_histogram) {} NaN -eval instant at 5m histogram_fraction(-Inf, +Inf, empty_histogram) +eval instant at 1m histogram_fraction(-Inf, +Inf, empty_histogram) {} NaN -eval instant at 5m histogram_fraction(0, 8, empty_histogram) +eval instant at 1m histogram_fraction(0, 8, empty_histogram) {} NaN - +clear # buckets:[1 2 1] means 1 observation in the 1st bucket, 2 observations in the 2nd and 1 observation in the 3rd (total 4). load 5m single_histogram {{schema:0 sum:5 count:4 buckets:[1 2 1]}} # histogram_count extracts the count property from the histogram. -eval instant at 5m histogram_count(single_histogram) +eval instant at 1m histogram_count(single_histogram) {} 4 # histogram_sum extracts the sum property from the histogram. -eval instant at 5m histogram_sum(single_histogram) +eval instant at 1m histogram_sum(single_histogram) {} 5 # histogram_avg calculates the average from sum and count properties. -eval instant at 5m histogram_avg(single_histogram) +eval instant at 1m histogram_avg(single_histogram) {} 1.25 # We expect half of the values to fall in the range 1 < x <= 2. -eval instant at 5m histogram_fraction(1, 2, single_histogram) +eval instant at 1m histogram_fraction(1, 2, single_histogram) {} 0.5 # We expect all values to fall in the range 0 < x <= 8. -eval instant at 5m histogram_fraction(0, 8, single_histogram) +eval instant at 1m histogram_fraction(0, 8, single_histogram) {} 1 -# Median is 1.5 due to linear estimation of the midpoint of the middle bucket, whose values are within range 1 < x <= 2. -eval instant at 5m histogram_quantile(0.5, single_histogram) - {} 1.5 - +# Median is 1.414213562373095 (2**2**-1, or sqrt(2)) due to +# exponential interpolation, i.e. the "midpoint" within range 1 < x <= +# 2 is assumed where the bucket boundary would be if we increased the +# resolution of the histogram by one step. +eval instant at 1m histogram_quantile(0.5, single_histogram) + {} 1.414213562373095 +clear # Repeat the same histogram 10 times. load 5m @@ -68,8 +71,9 @@ eval instant at 5m histogram_avg(multi_histogram) eval instant at 5m histogram_fraction(1, 2, multi_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, multi_histogram) - {} 1.5 + {} 1.414213562373095 # Each entry should look the same as the first. @@ -85,10 +89,11 @@ eval instant at 50m histogram_avg(multi_histogram) eval instant at 50m histogram_fraction(1, 2, multi_histogram) {} 0.5 +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, multi_histogram) - {} 1.5 - + {} 1.414213562373095 +clear # Accumulate the histogram addition for 10 iterations, offset is a bucket position where offset:0 is always the bucket # with an upper limit of 1 and offset:1 is the bucket which follows to the right. Negative offsets represent bucket @@ -109,8 +114,9 @@ eval instant at 5m histogram_avg(incr_histogram) eval instant at 5m histogram_fraction(1, 2, incr_histogram) {} 0.6 +# See explanation for exponential interpolation above. eval instant at 5m histogram_quantile(0.5, incr_histogram) - {} 1.5 + {} 1.414213562373095 eval instant at 50m incr_histogram @@ -129,18 +135,20 @@ eval instant at 50m histogram_avg(incr_histogram) eval instant at 50m histogram_fraction(1, 2, incr_histogram) {} 0.8571428571428571 +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, incr_histogram) - {} 1.5 + {} 1.414213562373095 # Per-second average rate of increase should be 1/(5*60) for count and buckets, then 2/(5*60) for sum. -eval instant at 50m rate(incr_histogram[5m]) - {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} +eval instant at 50m rate(incr_histogram[10m]) + {} {{count:0.0033333333333333335 sum:0.006666666666666667 offset:1 buckets:[0.0033333333333333335]}} # Calculate the 50th percentile of observations over the last 10m. +# See explanation for exponential interpolation above. eval instant at 50m histogram_quantile(0.5, rate(incr_histogram[10m])) - {} 1.5 - + {} 1.414213562373095 +clear # Schema represents the histogram resolution, different schema have compatible bucket boundaries, e.g.: # 0: 1 2 4 8 16 32 64 (higher resolution) @@ -166,77 +174,79 @@ eval instant at 5m histogram_avg(low_res_histogram) eval instant at 5m histogram_fraction(1, 4, low_res_histogram) {} 1 - +clear # z_bucket:1 means there is one observation in the zero bucket and z_bucket_w:0.5 means the zero bucket has the range # 0 < x <= 0.5. Sum and count are expected to represent all observations in the histogram, including those in the zero bucket. load 5m single_zero_histogram {{schema:0 z_bucket:1 z_bucket_w:0.5 sum:0.25 count:1}} -eval instant at 5m histogram_count(single_zero_histogram) +eval instant at 1m histogram_count(single_zero_histogram) {} 1 -eval instant at 5m histogram_sum(single_zero_histogram) +eval instant at 1m histogram_sum(single_zero_histogram) {} 0.25 -eval instant at 5m histogram_avg(single_zero_histogram) +eval instant at 1m histogram_avg(single_zero_histogram) {} 0.25 # When only the zero bucket is populated, or there are negative buckets, the distribution is assumed to be equally # distributed around zero; i.e. that there are an equal number of positive and negative observations. Therefore the # entire distribution must lie within the full range of the zero bucket, in this case: -0.5 < x <= +0.5. -eval instant at 5m histogram_fraction(-0.5, 0.5, single_zero_histogram) +eval instant at 1m histogram_fraction(-0.5, 0.5, single_zero_histogram) {} 1 # Half of the observations are estimated to be zero, as this is the midpoint between -0.5 and +0.5. -eval instant at 5m histogram_quantile(0.5, single_zero_histogram) +eval instant at 1m histogram_quantile(0.5, single_zero_histogram) {} 0 - +clear # Let's turn single_histogram upside-down. load 5m negative_histogram {{schema:0 sum:-5 count:4 n_buckets:[1 2 1]}} -eval instant at 5m histogram_count(negative_histogram) +eval instant at 1m histogram_count(negative_histogram) {} 4 -eval instant at 5m histogram_sum(negative_histogram) +eval instant at 1m histogram_sum(negative_histogram) {} -5 -eval instant at 5m histogram_avg(negative_histogram) +eval instant at 1m histogram_avg(negative_histogram) {} -1.25 # We expect half of the values to fall in the range -2 < x <= -1. -eval instant at 5m histogram_fraction(-2, -1, negative_histogram) +eval instant at 1m histogram_fraction(-2, -1, negative_histogram) {} 0.5 -eval instant at 5m histogram_quantile(0.5, negative_histogram) - {} -1.5 - +# Exponential interpolation works the same as for positive buckets, just mirrored. +eval instant at 1m histogram_quantile(0.5, negative_histogram) + {} -1.414213562373095 +clear # Two histogram samples. load 5m two_samples_histogram {{schema:0 sum:4 count:4 buckets:[1 2 1]}} {{schema:0 sum:-4 count:4 n_buckets:[1 2 1]}} # We expect to see the newest sample. -eval instant at 10m histogram_count(two_samples_histogram) +eval instant at 5m histogram_count(two_samples_histogram) {} 4 -eval instant at 10m histogram_sum(two_samples_histogram) +eval instant at 5m histogram_sum(two_samples_histogram) {} -4 -eval instant at 10m histogram_avg(two_samples_histogram) +eval instant at 5m histogram_avg(two_samples_histogram) {} -1 -eval instant at 10m histogram_fraction(-2, -1, two_samples_histogram) +eval instant at 5m histogram_fraction(-2, -1, two_samples_histogram) {} 0.5 -eval instant at 10m histogram_quantile(0.5, two_samples_histogram) - {} -1.5 - +# See explanation for exponential interpolation above. +eval instant at 5m histogram_quantile(0.5, two_samples_histogram) + {} -1.414213562373095 +clear # Add two histograms with negated data. load 5m @@ -259,6 +269,8 @@ eval instant at 5m histogram_fraction(0, 4, balanced_histogram) eval instant at 5m histogram_quantile(0.5, balanced_histogram) {} 0.5 +clear + # Add histogram to test sum(last_over_time) regression load 5m incr_sum_histogram{number="1"} {{schema:0 sum:0 count:0 buckets:[1]}}+{{schema:0 sum:1 count:1 buckets:[1]}}x10 @@ -270,6 +282,8 @@ eval instant at 50m histogram_sum(sum(incr_sum_histogram)) eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 +clear + # Apply rate function to histogram. load 15s histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 @@ -280,6 +294,8 @@ eval instant at 5m rate(histogram_rate[45s]) eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 +clear + # Apply count and sum function to histogram. load 10m histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -290,6 +306,8 @@ eval instant at 10m histogram_count(histogram_count_sum_2) eval instant at 10m histogram_sum(histogram_count_sum_2) {} 100 +clear + # Apply stddev and stdvar function to histogram with {1, 2, 3, 4} (low res). load 10m histogram_stddev_stdvar_1 {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}}x1 @@ -300,6 +318,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_1) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_1) {} 1.163807968526718 +clear + # Apply stddev and stdvar function to histogram with {1, 1, 1, 1} (high res). load 10m histogram_stddev_stdvar_2 {{schema:8 count:10 sum:10 buckets:[1 2 3 4]}}x1 @@ -310,6 +330,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_2) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_2) {} 2.3971123370139447e-05 +clear + # Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9}. load 10m histogram_stddev_stdvar_3 {{schema:3 count:7 sum:62 z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 @@ -320,6 +342,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_3) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_3) {} 1844.4651144196398 +clear + # Apply stddev and stdvar function to histogram with {-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3}. load 10m histogram_stddev_stdvar_4 {{schema:0 count:10 sum:-112946 z_bucket:0 n_buckets:[0 0 1 1 1 0 1 1 0 0 3 0 0 0 1 0 0 1]}}x1 @@ -330,6 +354,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_4) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_4) {} 759352122.1939945 +clear + # Apply stddev and stdvar function to histogram with {-10x10}. load 10m histogram_stddev_stdvar_5 {{schema:0 count:10 sum:-100 z_bucket:0 n_buckets:[0 0 0 0 10]}}x1 @@ -340,6 +366,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_5) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_5) {} 1.725830020304794 +clear + # Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, NaN}. load 10m histogram_stddev_stdvar_6 {{schema:3 count:7 sum:NaN z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 @@ -350,6 +378,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_6) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_6) {} NaN +clear + # Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, Inf}. load 10m histogram_stddev_stdvar_7 {{schema:3 count:7 sum:Inf z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 @@ -360,6 +390,8 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) {} Inf +clear + # Apply quantile function to histogram with all positive buckets with zero bucket. load 10m histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 @@ -370,20 +402,24 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_1) eval instant at 10m histogram_quantile(1, histogram_quantile_1) {} 16 +# The following quantiles are within a bucket. Exponential +# interpolation is applied (rather than linear, as it is done for +# classic histograms), leading to slightly different quantile values. eval instant at 10m histogram_quantile(0.99, histogram_quantile_1) - {} 15.759999999999998 + {} 15.67072476139083 eval instant at 10m histogram_quantile(0.9, histogram_quantile_1) - {} 13.600000000000001 + {} 12.99603834169977 eval instant at 10m histogram_quantile(0.6, histogram_quantile_1) - {} 4.799999999999997 + {} 4.594793419988138 eval instant at 10m histogram_quantile(0.5, histogram_quantile_1) - {} 1.6666666666666665 + {} 1.5874010519681994 +# Linear interpolation within the zero bucket after all. eval instant at 10m histogram_quantile(0.1, histogram_quantile_1) - {} 0.0006000000000000001 + {} 0.0006 eval instant at 10m histogram_quantile(0, histogram_quantile_1) {} 0 @@ -391,6 +427,8 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_1) eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_1) {} -Inf +clear + # Apply quantile function to histogram with all negative buckets with zero bucket. load 10m histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 @@ -401,17 +439,20 @@ eval_warn instant at 10m histogram_quantile(1.001, histogram_quantile_2) eval instant at 10m histogram_quantile(1, histogram_quantile_2) {} 0 +# Again, the quantile values here are slightly different from what +# they would be with linear interpolation. Note that quantiles +# ending up in the zero bucket are linearly interpolated after all. eval instant at 10m histogram_quantile(0.99, histogram_quantile_2) - {} -6.000000000000048e-05 + {} -0.00006 eval instant at 10m histogram_quantile(0.9, histogram_quantile_2) - {} -0.0005999999999999996 + {} -0.0006 eval instant at 10m histogram_quantile(0.5, histogram_quantile_2) - {} -1.6666666666666667 + {} -1.5874010519681996 eval instant at 10m histogram_quantile(0.1, histogram_quantile_2) - {} -13.6 + {} -12.996038341699768 eval instant at 10m histogram_quantile(0, histogram_quantile_2) {} -16 @@ -419,7 +460,11 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_2) eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_2) {} -Inf -# Apply quantile function to histogram with both positive and negative buckets with zero bucket. +clear + +# Apply quantile function to histogram with both positive and negative +# buckets with zero bucket. +# First positive buckets with exponential interpolation. load 10m histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -430,31 +475,34 @@ eval instant at 10m histogram_quantile(1, histogram_quantile_3) {} 16 eval instant at 10m histogram_quantile(0.99, histogram_quantile_3) - {} 15.519999999999996 + {} 15.34822590920423 eval instant at 10m histogram_quantile(0.9, histogram_quantile_3) - {} 11.200000000000003 + {} 10.556063286183155 eval instant at 10m histogram_quantile(0.7, histogram_quantile_3) - {} 1.2666666666666657 + {} 1.2030250360821164 +# Linear interpolation in the zero bucket, symmetrically centered around +# the zero point. eval instant at 10m histogram_quantile(0.55, histogram_quantile_3) - {} 0.0006000000000000005 + {} 0.0006 eval instant at 10m histogram_quantile(0.5, histogram_quantile_3) {} 0 eval instant at 10m histogram_quantile(0.45, histogram_quantile_3) - {} -0.0005999999999999996 + {} -0.0006 +# Finally negative buckets with mirrored exponential interpolation. eval instant at 10m histogram_quantile(0.3, histogram_quantile_3) - {} -1.266666666666667 + {} -1.2030250360821169 eval instant at 10m histogram_quantile(0.1, histogram_quantile_3) - {} -11.2 + {} -10.556063286183155 eval instant at 10m histogram_quantile(0.01, histogram_quantile_3) - {} -15.52 + {} -15.34822590920423 eval instant at 10m histogram_quantile(0, histogram_quantile_3) {} -16 @@ -462,6 +510,92 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_3) eval_warn instant at 10m histogram_quantile(-1, histogram_quantile_3) {} -Inf +clear + +# Try different schemas. (The interpolation logic must not depend on the schema.) +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} 2.0 + {schema="0"} 1.4142135623730951 + {schema="+1"} 1.189207 + +eval instant at 1m histogram_fraction(0, 2, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.4142135623730951, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.189207, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +# The same as above, but one bucket "further to the right". +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 buckets:[0 0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 buckets:[0 0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 buckets:[0 0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} 8.0 + {schema="0"} 2.82842712474619 + {schema="+1"} 1.6817928305074292 + +eval instant at 1m histogram_fraction(0, 8, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(0, 2.82842712474619, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(0, 1.6817928305074292, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +# And everything again but for negative buckets. +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} -2.0 + {schema="0"} -1.4142135623730951 + {schema="+1"} -1.189207 + +eval instant at 1m histogram_fraction(-2, 0, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(-1.4142135623730951, 0, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(-1.189207, 0, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + +clear +load 1m + var_res_histogram{schema="-1"} {{schema:-1 sum:6 count:5 n_buckets:[0 0 5]}} + var_res_histogram{schema="0"} {{schema:0 sum:4 count:5 n_buckets:[0 0 5]}} + var_res_histogram{schema="+1"} {{schema:1 sum:4 count:5 n_buckets:[0 0 5]}} + +eval instant at 1m histogram_quantile(0.5, var_res_histogram) + {schema="-1"} -8.0 + {schema="0"} -2.82842712474619 + {schema="+1"} -1.6817928305074292 + +eval instant at 1m histogram_fraction(-8, 0, var_res_histogram{schema="-1"}) + {schema="-1"} 0.5 + +eval instant at 1m histogram_fraction(-2.82842712474619, 0, var_res_histogram{schema="0"}) + {schema="0"} 0.5 + +eval instant at 1m histogram_fraction(-1.6817928305074292, 0, var_res_histogram{schema="+1"}) + {schema="+1"} 0.5 + + # Apply fraction function to empty histogram. load 10m histogram_fraction_1 {{}}x1 @@ -469,6 +603,8 @@ load 10m eval instant at 10m histogram_fraction(3.1415, 42, histogram_fraction_1) {} NaN +clear + # Apply fraction function to histogram with positive and zero buckets. load 10m histogram_fraction_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 @@ -485,11 +621,18 @@ eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2) eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2) {} 0.16666666666666666 +# Note that this result and the one above add up to 1. +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) + {} 0.8333333333333334 + +# We are in the zero bucket, resulting in linear interpolation eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2) {} 0.08333333333333333 -eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) - {} 0.8333333333333334 +# Demonstrate that the inverse operation with histogram_quantile yields +# the original value with the non-trivial result above. +eval instant at 10m histogram_quantile(0.08333333333333333, histogram_fraction_2) + {} 0.0005 eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) {} 0 @@ -497,17 +640,30 @@ eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2) {} 0.25 +# More non-trivial results with interpolation involved below, including +# some round-trips via histogram_quantile to prove that the inverse +# operation leads to the same results. + +eval instant at 10m histogram_fraction(0, 1.5, histogram_fraction_2) + {} 0.4795739585136224 + eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2) - {} 0.125 + {} 0.10375937481971091 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2) {} 0.3333333333333333 +eval instant at 10m histogram_fraction(0, 6, histogram_fraction_2) + {} 0.6320802083934297 + +eval instant at 10m histogram_quantile(0.6320802083934297, histogram_fraction_2) + {} 6 + eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2) - {} 0.2916666666666667 + {} 0.29874687506009634 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2) - {} 0.16666666666666666 + {} 0.15250624987980724 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2) {} 0 @@ -570,6 +726,12 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3) eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3) {} 0.08333333333333333 +eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_3) + {} 0.9166666666666666 + +eval instant at 10m histogram_quantile(0.9166666666666666, histogram_fraction_3) + {} -0.0005 + eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3) {} 0 @@ -595,16 +757,22 @@ eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3) {} 0.25 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3) - {} 0.125 + {} 0.10375937481971091 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3) {} 0.3333333333333333 +eval instant at 10m histogram_fraction(-inf, -6, histogram_fraction_3) + {} 0.36791979160657035 + +eval instant at 10m histogram_quantile(0.36791979160657035, histogram_fraction_3) + {} -6 + eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3) - {} 0.2916666666666667 + {} 0.29874687506009634 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3) - {} 0.16666666666666666 + {} 0.15250624987980724 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3) {} 0 @@ -633,6 +801,8 @@ eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_3) eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_3) {} 1 +clear + # Apply fraction function to histogram with both positive, negative and zero buckets. load 10m histogram_fraction_4 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -652,6 +822,18 @@ eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4) eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4) {} 0.08333333333333333 +eval instant at 10m histogram_fraction(-inf, 0.0005, histogram_fraction_4) + {} 0.5416666666666666 + +eval instant at 10m histogram_quantile(0.5416666666666666, histogram_fraction_4) + {} 0.0005 + +eval instant at 10m histogram_fraction(-inf, -0.0005, histogram_fraction_4) + {} 0.4583333333333333 + +eval instant at 10m histogram_quantile(0.4583333333333333, histogram_fraction_4) + {} -0.0005 + eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4) {} 0.4166666666666667 @@ -662,31 +844,31 @@ eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4) {} 0.125 eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4) - {} 0.0625 + {} 0.051879687409855414 eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4) {} 0.16666666666666666 eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4) - {} 0.14583333333333334 + {} 0.14937343753004825 eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4) - {} 0.08333333333333333 + {} 0.07625312493990366 eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4) {} 0.125 eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4) - {} 0.0625 + {} 0.051879687409855456 eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4) {} 0.16666666666666666 eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4) - {} 0.14583333333333334 + {} 0.14937343753004817 eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4) - {} 0.08333333333333333 + {} 0.07625312493990362 eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4) {} 0 @@ -720,27 +902,39 @@ eval instant at 10m histogram_sum(scalar(histogram_fraction(-Inf, +Inf, sum(hist # Apply multiplication and division operator to histogram. load 10m - histogram_mul_div {{schema:0 count:21 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[3 3 3]}}x1 + histogram_mul_div {{schema:0 count:30 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[6 6 6]}}x1 float_series_3 3+0x1 float_series_0 0+0x1 eval instant at 10m histogram_mul_div*3 - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} + +eval instant at 10m histogram_mul_div*-1 + {} {{schema:0 count:-30 sum:-33 z_bucket:-3 z_bucket_w:0.001 buckets:[-3 -3 -3] n_buckets:[-6 -6 -6]}} + +eval instant at 10m -histogram_mul_div + {} {{schema:0 count:-30 sum:-33 z_bucket:-3 z_bucket_w:0.001 buckets:[-3 -3 -3] n_buckets:[-6 -6 -6]}} + +eval instant at 10m histogram_mul_div*-3 + {} {{schema:0 count:-90 sum:-99 z_bucket:-9 z_bucket_w:0.001 buckets:[-9 -9 -9] n_buckets:[-18 -18 -18]}} eval instant at 10m 3*histogram_mul_div - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div*float_series_3 - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m float_series_3*histogram_mul_div - {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}} + {} {{schema:0 count:90 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[18 18 18]}} eval instant at 10m histogram_mul_div/3 - {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} + +eval instant at 10m histogram_mul_div/-3 + {} {{schema:0 count:-10 sum:-11 z_bucket:-1 z_bucket_w:0.001 buckets:[-1 -1 -1] n_buckets:[-2 -2 -2]}} eval instant at 10m histogram_mul_div/float_series_3 - {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}} + {} {{schema:0 count:10 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[2 2 2]}} eval instant at 10m histogram_mul_div*0 {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} @@ -754,18 +948,40 @@ eval instant at 10m histogram_mul_div*float_series_0 eval instant at 10m float_series_0*histogram_mul_div {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}} -# TODO: (NeerajGartia21) remove all the histogram buckets in case of division with zero. See: https://github.com/prometheus/prometheus/issues/13934 eval instant at 10m histogram_mul_div/0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div/float_series_0 - {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}} + {} {{schema:0 count:Inf sum:Inf z_bucket_w:0.001 z_bucket:Inf}} eval instant at 10m histogram_mul_div*0/0 - {} {{schema:0 count:NaN sum:NaN z_bucket:NaN z_bucket_w:0.001 buckets:[NaN NaN NaN] n_buckets:[NaN NaN NaN]}} + {} {{schema:0 count:NaN sum:NaN z_bucket_w:0.001 z_bucket:NaN}} + +eval_info instant at 10m histogram_mul_div*histogram_mul_div + +eval_info instant at 10m histogram_mul_div/histogram_mul_div + +eval_info instant at 10m float_series_3/histogram_mul_div + +eval_info instant at 10m 0/histogram_mul_div clear +# Apply binary operators to mixed histogram and float samples. +# TODO:(NeerajGartia21) move these tests to their respective locations when tests from engine_test.go are be moved here. + +load 10m + histogram_sample {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + float_sample 0x1 + +eval_info instant at 10m float_sample+histogram_sample + +eval_info instant at 10m histogram_sample+float_sample + +eval_info instant at 10m float_sample-histogram_sample + +eval_info instant at 10m histogram_sample-float_sample + # Counter reset only noticeable in a single bucket. load 5m reset_in_bucket {{schema:0 count:4 sum:5 buckets:[1 2 1]}} {{schema:0 count:5 sum:6 buckets:[1 1 3]}} {{schema:0 count:6 sum:7 buckets:[1 2 3]}} @@ -802,7 +1018,7 @@ load 30s some_metric {{schema:0 sum:1 count:1 buckets:[1] counter_reset_hint:gauge}} {{schema:0 sum:2 count:2 buckets:[2] counter_reset_hint:gauge}} {{schema:0 sum:3 count:3 buckets:[3] counter_reset_hint:gauge}} # Test the case where we only have two points for rate -eval_warn instant at 30s rate(some_metric[30s]) +eval_warn instant at 30s rate(some_metric[1m]) {} {{count:0.03333333333333333 sum:0.03333333333333333 buckets:[0.03333333333333333]}} # Test the case where we have more than two points for rate @@ -824,11 +1040,11 @@ eval_warn instant at 1m30s rate(some_metric[1m]) # Should produce no results. # Start with custom, end with exponential. -eval_warn instant at 1m rate(some_metric[30s]) +eval_warn instant at 1m rate(some_metric[1m]) # Should produce no results. # Start with exponential, end with custom. -eval_warn instant at 30s rate(some_metric[30s]) +eval_warn instant at 30s rate(some_metric[1m]) # Should produce no results. clear @@ -963,8 +1179,30 @@ clear load 1m histogram_sum_over_time {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:1 count:0}} -eval instant at 3m sum_over_time(histogram_sum_over_time[3m:1m]) +eval instant at 3m sum_over_time(histogram_sum_over_time[4m:1m]) {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}} -eval instant at 3m avg_over_time(histogram_sum_over_time[3m:1m]) +eval instant at 3m avg_over_time(histogram_sum_over_time[4m:1m]) {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}} + +clear + +# Test native histograms with sub operator. +load 10m + histogram_sub_1{idx="0"} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1 + histogram_sub_1{idx="1"} {{schema:0 count:11 sum:1234.5 z_bucket:3 z_bucket_w:0.001 buckets:[0 2 1] n_buckets:[0 0 3 2]}}x1 + histogram_sub_2{idx="0"} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1 + histogram_sub_2{idx="1"} {{schema:1 count:11 sum:1234.5 z_bucket:3 z_bucket_w:0.001 buckets:[0 2 1] n_buckets:[0 0 3 2]}}x1 + histogram_sub_3{idx="0"} {{schema:1 count:11 sum:1234.5 z_bucket:3 z_bucket_w:0.001 buckets:[0 2 1] n_buckets:[0 0 3 2]}}x1 + histogram_sub_3{idx="1"} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1 + +eval instant at 10m histogram_sub_1{idx="0"} - ignoring(idx) histogram_sub_1{idx="1"} + {} {{schema:0 count:30 sum:1111.1 z_bucket:2 z_bucket_w:0.001 buckets:[1 1 0 2 1 1 1] n_buckets:[0 1 1 0 7 0 0 0 0 5 5 2]}} + +eval instant at 10m histogram_sub_2{idx="0"} - ignoring(idx) histogram_sub_2{idx="1"} + {} {{schema:0 count:30 sum:1111.1 z_bucket:2 z_bucket_w:0.001 buckets:[1 0 1 2 1 1 1] n_buckets:[0 -2 2 2 7 0 0 0 0 5 5 2]}} + +eval instant at 10m histogram_sub_3{idx="0"} - ignoring(idx) histogram_sub_3{idx="1"} + {} {{schema:0 count:-30 sum:-1111.1 z_bucket:-2 z_bucket_w:0.001 buckets:[-1 0 -1 -2 -1 -1 -1] n_buckets:[0 2 -2 -2 -7 0 0 0 0 -5 -5 -2]}} + +clear diff --git a/promql/promqltest/testdata/operators.test b/promql/promqltest/testdata/operators.test index df2311b9ba..667989ca77 100644 --- a/promql/promqltest/testdata/operators.test +++ b/promql/promqltest/testdata/operators.test @@ -1,12 +1,13 @@ load 5m - http_requests{job="api-server", instance="0", group="production"} 0+10x10 - http_requests{job="api-server", instance="1", group="production"} 0+20x10 - http_requests{job="api-server", instance="0", group="canary"} 0+30x10 - http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="app-server", instance="0", group="production"} 0+50x10 - http_requests{job="app-server", instance="1", group="production"} 0+60x10 - http_requests{job="app-server", instance="0", group="canary"} 0+70x10 - http_requests{job="app-server", instance="1", group="canary"} 0+80x10 + http_requests_total{job="api-server", instance="0", group="production"} 0+10x10 + http_requests_total{job="api-server", instance="1", group="production"} 0+20x10 + http_requests_total{job="api-server", instance="0", group="canary"} 0+30x10 + http_requests_total{job="api-server", instance="1", group="canary"} 0+40x10 + http_requests_total{job="app-server", instance="0", group="production"} 0+50x10 + http_requests_total{job="app-server", instance="1", group="production"} 0+60x10 + http_requests_total{job="app-server", instance="0", group="canary"} 0+70x10 + http_requests_total{job="app-server", instance="1", group="canary"} 0+80x10 + http_requests_histogram{job="app-server", instance="1", group="production"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}}x11 load 5m vector_matching_a{l="x"} 0+1x100 @@ -14,21 +15,21 @@ load 5m vector_matching_b{l="x"} 0+4x25 -eval instant at 50m SUM(http_requests) BY (job) - COUNT(http_requests) BY (job) +eval instant at 50m SUM(http_requests_total) BY (job) - COUNT(http_requests_total) BY (job) {job="api-server"} 996 {job="app-server"} 2596 -eval instant at 50m 2 - SUM(http_requests) BY (job) +eval instant at 50m 2 - SUM(http_requests_total) BY (job) {job="api-server"} -998 {job="app-server"} -2598 -eval instant at 50m -http_requests{job="api-server",instance="0",group="production"} +eval instant at 50m -http_requests_total{job="api-server",instance="0",group="production"} {job="api-server",instance="0",group="production"} -100 -eval instant at 50m +http_requests{job="api-server",instance="0",group="production"} - http_requests{job="api-server",instance="0",group="production"} 100 +eval instant at 50m +http_requests_total{job="api-server",instance="0",group="production"} + http_requests_total{job="api-server",instance="0",group="production"} 100 -eval instant at 50m - - - SUM(http_requests) BY (job) +eval instant at 50m - - - SUM(http_requests_total) BY (job) {job="api-server"} -1000 {job="app-server"} -2600 @@ -41,83 +42,83 @@ eval instant at 50m -2^---1*3 eval instant at 50m 2/-2^---1*3+2 -10 -eval instant at 50m -10^3 * - SUM(http_requests) BY (job) ^ -1 +eval instant at 50m -10^3 * - SUM(http_requests_total) BY (job) ^ -1 {job="api-server"} 1 {job="app-server"} 0.38461538461538464 -eval instant at 50m 1000 / SUM(http_requests) BY (job) +eval instant at 50m 1000 / SUM(http_requests_total) BY (job) {job="api-server"} 1 {job="app-server"} 0.38461538461538464 -eval instant at 50m SUM(http_requests) BY (job) - 2 +eval instant at 50m SUM(http_requests_total) BY (job) - 2 {job="api-server"} 998 {job="app-server"} 2598 -eval instant at 50m SUM(http_requests) BY (job) % 3 +eval instant at 50m SUM(http_requests_total) BY (job) % 3 {job="api-server"} 1 {job="app-server"} 2 -eval instant at 50m SUM(http_requests) BY (job) % 0.3 +eval instant at 50m SUM(http_requests_total) BY (job) % 0.3 {job="api-server"} 0.1 {job="app-server"} 0.2 -eval instant at 50m SUM(http_requests) BY (job) ^ 2 +eval instant at 50m SUM(http_requests_total) BY (job) ^ 2 {job="api-server"} 1000000 {job="app-server"} 6760000 -eval instant at 50m SUM(http_requests) BY (job) % 3 ^ 2 +eval instant at 50m SUM(http_requests_total) BY (job) % 3 ^ 2 {job="api-server"} 1 {job="app-server"} 8 -eval instant at 50m SUM(http_requests) BY (job) % 2 ^ (3 ^ 2) +eval instant at 50m SUM(http_requests_total) BY (job) % 2 ^ (3 ^ 2) {job="api-server"} 488 {job="app-server"} 40 -eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 +eval instant at 50m SUM(http_requests_total) BY (job) % 2 ^ 3 ^ 2 {job="api-server"} 488 {job="app-server"} 40 -eval instant at 50m SUM(http_requests) BY (job) % 2 ^ 3 ^ 2 ^ 2 +eval instant at 50m SUM(http_requests_total) BY (job) % 2 ^ 3 ^ 2 ^ 2 {job="api-server"} 1000 {job="app-server"} 2600 -eval instant at 50m COUNT(http_requests) BY (job) ^ COUNT(http_requests) BY (job) +eval instant at 50m COUNT(http_requests_total) BY (job) ^ COUNT(http_requests_total) BY (job) {job="api-server"} 256 {job="app-server"} 256 -eval instant at 50m SUM(http_requests) BY (job) / 0 +eval instant at 50m SUM(http_requests_total) BY (job) / 0 {job="api-server"} +Inf {job="app-server"} +Inf -eval instant at 50m http_requests{group="canary", instance="0", job="api-server"} / 0 +eval instant at 50m http_requests_total{group="canary", instance="0", job="api-server"} / 0 {group="canary", instance="0", job="api-server"} +Inf -eval instant at 50m -1 * http_requests{group="canary", instance="0", job="api-server"} / 0 +eval instant at 50m -1 * http_requests_total{group="canary", instance="0", job="api-server"} / 0 {group="canary", instance="0", job="api-server"} -Inf -eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} / 0 +eval instant at 50m 0 * http_requests_total{group="canary", instance="0", job="api-server"} / 0 {group="canary", instance="0", job="api-server"} NaN -eval instant at 50m 0 * http_requests{group="canary", instance="0", job="api-server"} % 0 +eval instant at 50m 0 * http_requests_total{group="canary", instance="0", job="api-server"} % 0 {group="canary", instance="0", job="api-server"} NaN -eval instant at 50m SUM(http_requests) BY (job) + SUM(http_requests) BY (job) +eval instant at 50m SUM(http_requests_total) BY (job) + SUM(http_requests_total) BY (job) {job="api-server"} 2000 {job="app-server"} 5200 -eval instant at 50m (SUM((http_requests)) BY (job)) + SUM(http_requests) BY (job) +eval instant at 50m (SUM((http_requests_total)) BY (job)) + SUM(http_requests_total) BY (job) {job="api-server"} 2000 {job="app-server"} 5200 -eval instant at 50m http_requests{job="api-server", group="canary"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="1", job="api-server"} 400 +eval instant at 50m http_requests_total{job="api-server", group="canary"} + http_requests_total{group="canary", instance="0", job="api-server"} 300 + http_requests_total{group="canary", instance="1", job="api-server"} 400 -eval instant at 50m http_requests{job="api-server", group="canary"} + rate(http_requests{job="api-server"}[5m]) * 5 * 60 +eval instant at 50m http_requests_total{job="api-server", group="canary"} + rate(http_requests_total{job="api-server"}[10m]) * 5 * 60 {group="canary", instance="0", job="api-server"} 330 {group="canary", instance="1", job="api-server"} 440 -eval instant at 50m rate(http_requests[25m]) * 25 * 60 +eval instant at 50m rate(http_requests_total[25m]) * 25 * 60 {group="canary", instance="0", job="api-server"} 150 {group="canary", instance="0", job="app-server"} 350 {group="canary", instance="1", job="api-server"} 200 @@ -127,7 +128,7 @@ eval instant at 50m rate(http_requests[25m]) * 25 * 60 {group="production", instance="1", job="api-server"} 100 {group="production", instance="1", job="app-server"} 300 -eval instant at 50m (rate((http_requests[25m])) * 25) * 60 +eval instant at 50m (rate((http_requests_total[25m])) * 25) * 60 {group="canary", instance="0", job="api-server"} 150 {group="canary", instance="0", job="app-server"} 350 {group="canary", instance="1", job="api-server"} 200 @@ -138,53 +139,53 @@ eval instant at 50m (rate((http_requests[25m])) * 25) * 60 {group="production", instance="1", job="app-server"} 300 -eval instant at 50m http_requests{group="canary"} and http_requests{instance="0"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 +eval instant at 50m http_requests_total{group="canary"} and http_requests_total{instance="0"} + http_requests_total{group="canary", instance="0", job="api-server"} 300 + http_requests_total{group="canary", instance="0", job="app-server"} 700 -eval instant at 50m (http_requests{group="canary"} + 1) and http_requests{instance="0"} +eval instant at 50m (http_requests_total{group="canary"} + 1) and http_requests_total{instance="0"} {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 -eval instant at 50m (http_requests{group="canary"} + 1) and on(instance, job) http_requests{instance="0", group="production"} +eval instant at 50m (http_requests_total{group="canary"} + 1) and on(instance, job) http_requests_total{instance="0", group="production"} {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 -eval instant at 50m (http_requests{group="canary"} + 1) and on(instance) http_requests{instance="0", group="production"} +eval instant at 50m (http_requests_total{group="canary"} + 1) and on(instance) http_requests_total{instance="0", group="production"} {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 -eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group) http_requests{instance="0", group="production"} +eval instant at 50m (http_requests_total{group="canary"} + 1) and ignoring(group) http_requests_total{instance="0", group="production"} {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 -eval instant at 50m (http_requests{group="canary"} + 1) and ignoring(group, job) http_requests{instance="0", group="production"} +eval instant at 50m (http_requests_total{group="canary"} + 1) and ignoring(group, job) http_requests_total{instance="0", group="production"} {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 -eval instant at 50m http_requests{group="canary"} or http_requests{group="production"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 +eval instant at 50m http_requests_total{group="canary"} or http_requests_total{group="production"} + http_requests_total{group="canary", instance="0", job="api-server"} 300 + http_requests_total{group="canary", instance="0", job="app-server"} 700 + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 + http_requests_total{group="production", instance="0", job="api-server"} 100 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="1", job="app-server"} 600 # On overlap the rhs samples must be dropped. -eval instant at 50m (http_requests{group="canary"} + 1) or http_requests{instance="1"} +eval instant at 50m (http_requests_total{group="canary"} + 1) or http_requests_total{instance="1"} {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 {group="canary", instance="1", job="api-server"} 401 {group="canary", instance="1", job="app-server"} 801 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="1", job="app-server"} 600 # Matching only on instance excludes everything that has instance=0/1 but includes # entries without the instance label. -eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_requests or cpu_count or vector_matching_a) +eval instant at 50m (http_requests_total{group="canary"} + 1) or on(instance) (http_requests_total or cpu_count or vector_matching_a) {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 {group="canary", instance="1", job="api-server"} 401 @@ -192,7 +193,7 @@ eval instant at 50m (http_requests{group="canary"} + 1) or on(instance) (http_re vector_matching_a{l="x"} 10 vector_matching_a{l="y"} 20 -eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, job) (http_requests or cpu_count or vector_matching_a) +eval instant at 50m (http_requests_total{group="canary"} + 1) or ignoring(l, group, job) (http_requests_total or cpu_count or vector_matching_a) {group="canary", instance="0", job="api-server"} 301 {group="canary", instance="0", job="app-server"} 701 {group="canary", instance="1", job="api-server"} 401 @@ -200,81 +201,81 @@ eval instant at 50m (http_requests{group="canary"} + 1) or ignoring(l, group, jo vector_matching_a{l="x"} 10 vector_matching_a{l="y"} 20 -eval instant at 50m http_requests{group="canary"} unless http_requests{instance="0"} - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 +eval instant at 50m http_requests_total{group="canary"} unless http_requests_total{instance="0"} + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 -eval instant at 50m http_requests{group="canary"} unless on(job) http_requests{instance="0"} +eval instant at 50m http_requests_total{group="canary"} unless on(job) http_requests_total{instance="0"} -eval instant at 50m http_requests{group="canary"} unless on(job, instance) http_requests{instance="0"} - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 +eval instant at 50m http_requests_total{group="canary"} unless on(job, instance) http_requests_total{instance="0"} + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 -eval instant at 50m http_requests{group="canary"} / on(instance,job) http_requests{group="production"} +eval instant at 50m http_requests_total{group="canary"} / on(instance,job) http_requests_total{group="production"} {instance="0", job="api-server"} 3 {instance="0", job="app-server"} 1.4 {instance="1", job="api-server"} 2 {instance="1", job="app-server"} 1.3333333333333333 -eval instant at 50m http_requests{group="canary"} unless ignoring(group, instance) http_requests{instance="0"} +eval instant at 50m http_requests_total{group="canary"} unless ignoring(group, instance) http_requests_total{instance="0"} -eval instant at 50m http_requests{group="canary"} unless ignoring(group) http_requests{instance="0"} - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 +eval instant at 50m http_requests_total{group="canary"} unless ignoring(group) http_requests_total{instance="0"} + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 -eval instant at 50m http_requests{group="canary"} / ignoring(group) http_requests{group="production"} +eval instant at 50m http_requests_total{group="canary"} / ignoring(group) http_requests_total{group="production"} {instance="0", job="api-server"} 3 {instance="0", job="app-server"} 1.4 {instance="1", job="api-server"} 2 {instance="1", job="app-server"} 1.3333333333333333 # https://github.com/prometheus/prometheus/issues/1489 -eval instant at 50m http_requests AND ON (dummy) vector(1) - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 +eval instant at 50m http_requests_total AND ON (dummy) vector(1) + http_requests_total{group="canary", instance="0", job="api-server"} 300 + http_requests_total{group="canary", instance="0", job="app-server"} 700 + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 + http_requests_total{group="production", instance="0", job="api-server"} 100 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="1", job="app-server"} 600 -eval instant at 50m http_requests AND IGNORING (group, instance, job) vector(1) - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 +eval instant at 50m http_requests_total AND IGNORING (group, instance, job) vector(1) + http_requests_total{group="canary", instance="0", job="api-server"} 300 + http_requests_total{group="canary", instance="0", job="app-server"} 700 + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 + http_requests_total{group="production", instance="0", job="api-server"} 100 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="1", job="app-server"} 600 # Comparisons. -eval instant at 50m SUM(http_requests) BY (job) > 1000 +eval instant at 50m SUM(http_requests_total) BY (job) > 1000 {job="app-server"} 2600 -eval instant at 50m 1000 < SUM(http_requests) BY (job) +eval instant at 50m 1000 < SUM(http_requests_total) BY (job) {job="app-server"} 2600 -eval instant at 50m SUM(http_requests) BY (job) <= 1000 +eval instant at 50m SUM(http_requests_total) BY (job) <= 1000 {job="api-server"} 1000 -eval instant at 50m SUM(http_requests) BY (job) != 1000 +eval instant at 50m SUM(http_requests_total) BY (job) != 1000 {job="app-server"} 2600 -eval instant at 50m SUM(http_requests) BY (job) == 1000 +eval instant at 50m SUM(http_requests_total) BY (job) == 1000 {job="api-server"} 1000 -eval instant at 50m SUM(http_requests) BY (job) == bool 1000 +eval instant at 50m SUM(http_requests_total) BY (job) == bool 1000 {job="api-server"} 1 {job="app-server"} 0 -eval instant at 50m SUM(http_requests) BY (job) == bool SUM(http_requests) BY (job) +eval instant at 50m SUM(http_requests_total) BY (job) == bool SUM(http_requests_total) BY (job) {job="api-server"} 1 {job="app-server"} 1 -eval instant at 50m SUM(http_requests) BY (job) != bool SUM(http_requests) BY (job) +eval instant at 50m SUM(http_requests_total) BY (job) != bool SUM(http_requests_total) BY (job) {job="api-server"} 0 {job="app-server"} 0 @@ -284,9 +285,29 @@ eval instant at 50m 0 == bool 1 eval instant at 50m 1 == bool 1 1 -eval instant at 50m http_requests{job="api-server", instance="0", group="production"} == bool 100 +eval instant at 50m http_requests_total{job="api-server", instance="0", group="production"} == bool 100 {job="api-server", instance="0", group="production"} 1 +# The histogram is ignored here so the result doesn't change but it has an info annotation now. +eval_info instant at 5m {job="app-server"} == 80 + http_requests_total{group="canary", instance="1", job="app-server"} 80 + +eval_info instant at 5m http_requests_histogram != 80 + +eval_info instant at 5m http_requests_histogram > 80 + +eval_info instant at 5m http_requests_histogram < 80 + +eval_info instant at 5m http_requests_histogram >= 80 + +eval_info instant at 5m http_requests_histogram <= 80 + +# Should produce valid results in case of (in)equality between two histograms. +eval instant at 5m http_requests_histogram == http_requests_histogram + http_requests_histogram{job="app-server", instance="1", group="production"} {{schema:1 sum:15 count:10 buckets:[3 2 5 7 9]}} + +eval instant at 5m http_requests_histogram != http_requests_histogram + # group_left/group_right. clear @@ -308,65 +329,65 @@ load 5m threshold{instance="abc",job="node",target="a@b.com"} 0 # Copy machine role to node variable. -eval instant at 5m node_role * on (instance) group_right (role) node_var +eval instant at 1m node_role * on (instance) group_right (role) node_var {instance="abc",job="node",role="prometheus"} 2 -eval instant at 5m node_var * on (instance) group_left (role) node_role +eval instant at 1m node_var * on (instance) group_left (role) node_role {instance="abc",job="node",role="prometheus"} 2 -eval instant at 5m node_var * ignoring (role) group_left (role) node_role +eval instant at 1m node_var * ignoring (role) group_left (role) node_role {instance="abc",job="node",role="prometheus"} 2 -eval instant at 5m node_role * ignoring (role) group_right (role) node_var +eval instant at 1m node_role * ignoring (role) group_right (role) node_var {instance="abc",job="node",role="prometheus"} 2 # Copy machine role to node variable with instrumentation labels. -eval instant at 5m node_cpu * ignoring (role, mode) group_left (role) node_role +eval instant at 1m node_cpu * ignoring (role, mode) group_left (role) node_role {instance="abc",job="node",mode="idle",role="prometheus"} 3 {instance="abc",job="node",mode="user",role="prometheus"} 1 -eval instant at 5m node_cpu * on (instance) group_left (role) node_role +eval instant at 1m node_cpu * on (instance) group_left (role) node_role {instance="abc",job="node",mode="idle",role="prometheus"} 3 {instance="abc",job="node",mode="user",role="prometheus"} 1 # Ratio of total. -eval instant at 5m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) +eval instant at 1m node_cpu / on (instance) group_left sum by (instance,job)(node_cpu) {instance="abc",job="node",mode="idle"} .75 {instance="abc",job="node",mode="user"} .25 {instance="def",job="node",mode="idle"} .80 {instance="def",job="node",mode="user"} .20 -eval instant at 5m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) +eval instant at 1m sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu) {job="node",mode="idle"} 0.7857142857142857 {job="node",mode="user"} 0.21428571428571427 -eval instant at 5m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) +eval instant at 1m sum(sum by (mode, job)(node_cpu) / on (job) group_left sum by (job)(node_cpu)) {} 1.0 -eval instant at 5m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) +eval instant at 1m node_cpu / ignoring (mode) group_left sum without (mode)(node_cpu) {instance="abc",job="node",mode="idle"} .75 {instance="abc",job="node",mode="user"} .25 {instance="def",job="node",mode="idle"} .80 {instance="def",job="node",mode="user"} .20 -eval instant at 5m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) +eval instant at 1m node_cpu / ignoring (mode) group_left(dummy) sum without (mode)(node_cpu) {instance="abc",job="node",mode="idle"} .75 {instance="abc",job="node",mode="user"} .25 {instance="def",job="node",mode="idle"} .80 {instance="def",job="node",mode="user"} .20 -eval instant at 5m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) +eval instant at 1m sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu) {job="node",mode="idle"} 0.7857142857142857 {job="node",mode="user"} 0.21428571428571427 -eval instant at 5m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) +eval instant at 1m sum(sum without (instance)(node_cpu) / ignoring (mode) group_left sum without (instance, mode)(node_cpu)) {} 1.0 # Copy over label from metric with no matching labels, without having to list cross-job target labels ('job' here). -eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0 +eval instant at 1m node_cpu + on(dummy) group_left(foo) random*0 {instance="abc",job="node",mode="idle",foo="bar"} 3 {instance="abc",job="node",mode="user",foo="bar"} 1 {instance="def",job="node",mode="idle",foo="bar"} 8 @@ -374,12 +395,12 @@ eval instant at 5m node_cpu + on(dummy) group_left(foo) random*0 # Use threshold from metric, and copy over target. -eval instant at 5m node_cpu > on(job, instance) group_left(target) threshold +eval instant at 1m node_cpu > on(job, instance) group_left(target) threshold node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 # Use threshold from metric, and a default (1) if it's not present. -eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) +eval instant at 1m node_cpu > on(job, instance) group_left(target) (threshold or on (job, instance) (sum by (job, instance)(node_cpu) * 0 + 1)) node_cpu{instance="abc",job="node",mode="idle",target="a@b.com"} 3 node_cpu{instance="abc",job="node",mode="user",target="a@b.com"} 1 node_cpu{instance="def",job="node",mode="idle"} 8 @@ -387,37 +408,37 @@ eval instant at 5m node_cpu > on(job, instance) group_left(target) (threshold or # Check that binops drop the metric name. -eval instant at 5m node_cpu + 2 +eval instant at 1m node_cpu + 2 {instance="abc",job="node",mode="idle"} 5 {instance="abc",job="node",mode="user"} 3 {instance="def",job="node",mode="idle"} 10 {instance="def",job="node",mode="user"} 4 -eval instant at 5m node_cpu - 2 +eval instant at 1m node_cpu - 2 {instance="abc",job="node",mode="idle"} 1 {instance="abc",job="node",mode="user"} -1 {instance="def",job="node",mode="idle"} 6 {instance="def",job="node",mode="user"} 0 -eval instant at 5m node_cpu / 2 +eval instant at 1m node_cpu / 2 {instance="abc",job="node",mode="idle"} 1.5 {instance="abc",job="node",mode="user"} 0.5 {instance="def",job="node",mode="idle"} 4 {instance="def",job="node",mode="user"} 1 -eval instant at 5m node_cpu * 2 +eval instant at 1m node_cpu * 2 {instance="abc",job="node",mode="idle"} 6 {instance="abc",job="node",mode="user"} 2 {instance="def",job="node",mode="idle"} 16 {instance="def",job="node",mode="user"} 4 -eval instant at 5m node_cpu ^ 2 +eval instant at 1m node_cpu ^ 2 {instance="abc",job="node",mode="idle"} 9 {instance="abc",job="node",mode="user"} 1 {instance="def",job="node",mode="idle"} 64 {instance="def",job="node",mode="user"} 4 -eval instant at 5m node_cpu % 2 +eval instant at 1m node_cpu % 2 {instance="abc",job="node",mode="idle"} 1 {instance="abc",job="node",mode="user"} 1 {instance="def",job="node",mode="idle"} 0 @@ -432,14 +453,14 @@ load 5m metricB{baz="meh"} 4 # On with no labels, for metrics with no common labels. -eval instant at 5m random + on() metricA +eval instant at 1m random + on() metricA {} 5 # Ignoring with no labels is the same as no ignoring. -eval instant at 5m metricA + ignoring() metricB +eval instant at 1m metricA + ignoring() metricB {baz="meh"} 7 -eval instant at 5m metricA + metricB +eval instant at 1m metricA + metricB {baz="meh"} 7 clear @@ -457,16 +478,16 @@ load 5m test_total{instance="localhost"} 50 test_smaller{instance="localhost"} 10 -eval instant at 5m test_total > bool test_smaller +eval instant at 1m test_total > bool test_smaller {instance="localhost"} 1 -eval instant at 5m test_total > test_smaller +eval instant at 1m test_total > test_smaller test_total{instance="localhost"} 50 -eval instant at 5m test_total < bool test_smaller +eval instant at 1m test_total < bool test_smaller {instance="localhost"} 0 -eval instant at 5m test_total < test_smaller +eval instant at 1m test_total < test_smaller clear @@ -476,14 +497,360 @@ load 5m trigx{} 20 trigNaN{} NaN -eval instant at 5m trigy atan2 trigx +eval instant at 1m trigy atan2 trigx {} 0.4636476090008061 -eval instant at 5m trigy atan2 trigNaN +eval instant at 1m trigy atan2 trigNaN {} NaN -eval instant at 5m 10 atan2 20 +eval instant at 1m 10 atan2 20 0.4636476090008061 -eval instant at 5m 10 atan2 NaN +eval instant at 1m 10 atan2 NaN NaN + +clear + +# Test comparison operations with floats and histograms. +load 6m + left_floats 1 2 _ _ 3 stale 4 5 NaN Inf -Inf + right_floats 4 _ _ 5 3 7 -1 20 NaN Inf -Inf + left_histograms {{schema:3 sum:4 count:4 buckets:[1 2 1]}} {{schema:3 sum:4.5 count:5 buckets:[1 3 1]}} _ _ {{schema:3 sum:4.5 count:5 buckets:[1 3 1]}} + right_histograms {{schema:3 sum:4 count:4 buckets:[1 2 1]}} {{schema:3 sum:4 count:4 buckets:[1 2 1]}} {{schema:3 sum:4 count:4 buckets:[1 2 1]}} _ _ + right_floats_for_histograms 0 -1 2 3 4 + +eval range from 0 to 60m step 6m left_floats == right_floats + left_floats _ _ _ _ 3 _ _ _ _ Inf -Inf + +eval range from 0 to 60m step 6m left_floats == bool right_floats + {} 0 _ _ _ 1 _ 0 0 0 1 1 + +eval range from 0 to 60m step 6m left_floats == does_not_match + # No results. + +eval range from 0 to 24m step 6m left_histograms == right_histograms + left_histograms {{schema:3 sum:4 count:4 buckets:[1 2 1]}} _ _ _ _ + +eval range from 0 to 24m step 6m left_histograms == bool right_histograms + {} 1 0 _ _ _ + +eval_info range from 0 to 24m step 6m left_histograms == right_floats_for_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms == bool right_floats_for_histograms + # No results. + +eval range from 0 to 60m step 6m left_floats != right_floats + left_floats 1 _ _ _ _ _ 4 5 NaN _ _ + +eval range from 0 to 60m step 6m left_floats != bool right_floats + {} 1 _ _ _ 0 _ 1 1 1 0 0 + +eval range from 0 to 24m step 6m left_histograms != right_histograms + left_histograms _ {{schema:3 sum:4.5 count:5 buckets:[1 3 1]}} _ _ _ + +eval range from 0 to 24m step 6m left_histograms != bool right_histograms + {} 0 1 _ _ _ + +eval_info range from 0 to 24m step 6m left_histograms != right_floats_for_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms != bool right_floats_for_histograms + # No results. + +eval range from 0 to 60m step 6m left_floats > right_floats + left_floats _ _ _ _ _ _ 4 _ _ _ _ + +eval range from 0 to 60m step 6m left_floats > bool right_floats + {} 0 _ _ _ 0 _ 1 0 0 0 0 + +eval_info range from 0 to 24m step 6m left_histograms > right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > bool right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > right_floats_for_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > bool right_floats_for_histograms + # No results. + +eval range from 0 to 60m step 6m left_floats >= right_floats + left_floats _ _ _ _ 3 _ 4 _ _ Inf -Inf + +eval range from 0 to 60m step 6m left_floats >= bool right_floats + {} 0 _ _ _ 1 _ 1 0 0 1 1 + +eval_info range from 0 to 24m step 6m left_histograms >= right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= bool right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= right_floats_for_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= bool right_floats_for_histograms + # No results. + +eval range from 0 to 60m step 6m left_floats < right_floats + left_floats 1 _ _ _ _ _ _ 5 _ _ _ + +eval range from 0 to 60m step 6m left_floats < bool right_floats + {} 1 _ _ _ 0 _ 0 1 0 0 0 + +eval_info range from 0 to 24m step 6m left_histograms < right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < bool right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < right_floats_for_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < bool right_floats_for_histograms + # No results. + +eval range from 0 to 60m step 6m left_floats <= right_floats + left_floats 1 _ _ _ 3 _ _ 5 _ Inf -Inf + +eval range from 0 to 60m step 6m left_floats <= bool right_floats + {} 1 _ _ _ 1 _ 0 1 0 1 1 + +eval_info range from 0 to 24m step 6m left_histograms <= right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= bool right_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= right_floats_for_histograms + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= bool right_floats_for_histograms + # No results. + +# Vector / scalar combinations with scalar on right side +eval range from 0 to 60m step 6m left_floats == 3 + left_floats _ _ _ _ 3 _ _ _ _ _ _ + +eval range from 0 to 60m step 6m left_floats != 3 + left_floats 1 2 _ _ _ _ 4 5 NaN Inf -Inf + +eval range from 0 to 60m step 6m left_floats > 3 + left_floats _ _ _ _ _ _ 4 5 _ Inf _ + +eval range from 0 to 60m step 6m left_floats >= 3 + left_floats _ _ _ _ 3 _ 4 5 _ Inf _ + +eval range from 0 to 60m step 6m left_floats < 3 + left_floats 1 2 _ _ _ _ _ _ _ _ -Inf + +eval range from 0 to 60m step 6m left_floats <= 3 + left_floats 1 2 _ _ 3 _ _ _ _ _ -Inf + +eval range from 0 to 60m step 6m left_floats == bool 3 + {} 0 0 _ _ 1 _ 0 0 0 0 0 + +eval range from 0 to 60m step 6m left_floats == Inf + left_floats _ _ _ _ _ _ _ _ _ Inf _ + +eval range from 0 to 60m step 6m left_floats == bool Inf + {} 0 0 _ _ 0 _ 0 0 0 1 0 + +eval range from 0 to 60m step 6m left_floats == NaN + # No results. + +eval range from 0 to 60m step 6m left_floats == bool NaN + {} 0 0 _ _ 0 _ 0 0 0 0 0 + +eval_info range from 0 to 24m step 6m left_histograms == 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms == 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms != 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms != 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms == bool 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms == bool 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms != bool 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms != bool 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > bool 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms > bool 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= bool 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms >= bool 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < bool 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms < bool 0 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= bool 3 + # No results. + +eval_info range from 0 to 24m step 6m left_histograms <= bool 0 + # No results. + +# Vector / scalar combinations with scalar on left side +eval range from 0 to 60m step 6m 3 == left_floats + left_floats _ _ _ _ 3 _ _ _ _ _ _ + +eval range from 0 to 60m step 6m 3 != left_floats + left_floats 1 2 _ _ _ _ 4 5 NaN Inf -Inf + +eval range from 0 to 60m step 6m 3 < left_floats + left_floats _ _ _ _ _ _ 4 5 _ Inf _ + +eval range from 0 to 60m step 6m 3 <= left_floats + left_floats _ _ _ _ 3 _ 4 5 _ Inf _ + +eval range from 0 to 60m step 6m 3 > left_floats + left_floats 1 2 _ _ _ _ _ _ _ _ -Inf + +eval range from 0 to 60m step 6m 3 >= left_floats + left_floats 1 2 _ _ 3 _ _ _ _ _ -Inf + +eval range from 0 to 60m step 6m 3 == bool left_floats + {} 0 0 _ _ 1 _ 0 0 0 0 0 + +eval range from 0 to 60m step 6m Inf == left_floats + left_floats _ _ _ _ _ _ _ _ _ Inf _ + +eval range from 0 to 60m step 6m Inf == bool left_floats + {} 0 0 _ _ 0 _ 0 0 0 1 0 + +eval range from 0 to 60m step 6m NaN == left_floats + # No results. + +eval range from 0 to 60m step 6m NaN == bool left_floats + {} 0 0 _ _ 0 _ 0 0 0 0 0 + +eval_info range from 0 to 24m step 6m 3 == left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 0 == left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 3 != left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 0 != left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 3 < left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 0 < left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 3 < left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 0 < left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 3 > left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 0 > left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 3 >= left_histograms + # No results. + +eval_info range from 0 to 24m step 6m 0 >= left_histograms + # No results. + +clear + +# Test completely discarding or completely including series in results with "and on" +load_with_nhcb 5m + testhistogram_bucket{le="0.1", id="1"} 0+5x10 + testhistogram_bucket{le="0.2", id="1"} 0+7x10 + testhistogram_bucket{le="+Inf", id="1"} 0+12x10 + testhistogram_bucket{le="0.1", id="2"} 0+4x10 + testhistogram_bucket{le="0.2", id="2"} 0+6x10 + testhistogram_bucket{le="+Inf", id="2"} 0+11x10 + +# Include all series when "and on" with non-empty vector. +eval instant at 10m (testhistogram_bucket) and on() (vector(1) == 1) + {__name__="testhistogram_bucket", le="0.1", id="1"} 10.0 + {__name__="testhistogram_bucket", le="0.2", id="1"} 14.0 + {__name__="testhistogram_bucket", le="+Inf", id="1"} 24.0 + {__name__="testhistogram_bucket", le="0.1", id="2"} 8.0 + {__name__="testhistogram_bucket", le="0.2", id="2"} 12.0 + {__name__="testhistogram_bucket", le="+Inf", id="2"} 22.0 + +eval range from 0 to 10m step 5m (testhistogram_bucket) and on() (vector(1) == 1) + {__name__="testhistogram_bucket", le="0.1", id="1"} 0.0 5.0 10.0 + {__name__="testhistogram_bucket", le="0.2", id="1"} 0.0 7.0 14.0 + {__name__="testhistogram_bucket", le="+Inf", id="1"} 0.0 12.0 24.0 + {__name__="testhistogram_bucket", le="0.1", id="2"} 0.0 4.0 8.0 + {__name__="testhistogram_bucket", le="0.2", id="2"} 0.0 6.0 12.0 + {__name__="testhistogram_bucket", le="+Inf", id="2"} 0.0 11.0 22.0 + +# Exclude all series when "and on" with empty vector. +eval instant at 10m (testhistogram_bucket) and on() (vector(-1) == 1) + +eval range from 0 to 10m step 5m (testhistogram_bucket) and on() (vector(-1) == 1) + +# Include all native histogram series when "and on" with non-empty vector. +eval instant at 10m (testhistogram) and on() (vector(1) == 1) + {__name__="testhistogram", id="1"} {{schema:-53 sum:0 count:24 buckets:[10 4 10] custom_values:[0.1 0.2]}} + {__name__="testhistogram", id="2"} {{schema:-53 sum:0 count:22 buckets:[8 4 10] custom_values:[0.1 0.2]}} + +eval range from 0 to 10m step 5m (testhistogram) and on() (vector(1) == 1) + {__name__="testhistogram", id="1"} {{schema:-53 sum:0 count:0 custom_values:[0.1 0.2]}} {{schema:-53 sum:0 count:12 buckets:[5 2 5] custom_values:[0.1 0.2]}} {{schema:-53 sum:0 count:24 buckets:[10 4 10] custom_values:[0.1 0.2]}} + {__name__="testhistogram", id="2"} {{schema:-53 sum:0 count:0 custom_values:[0.1 0.2]}} {{schema:-53 sum:0 count:11 buckets:[4 2 5] custom_values:[0.1 0.2]}} {{schema:-53 sum:0 count:22 buckets:[8 4 10] custom_values:[0.1 0.2]}} + +# Exclude all native histogram series when "and on" with empty vector. +eval instant at 10m (testhistogram) and on() (vector(-1) == 1) + +eval range from 0 to 10m step 5m (testhistogram) and on() (vector(-1) == 1) + +clear diff --git a/promql/promqltest/testdata/range_queries.test b/promql/promqltest/testdata/range_queries.test index e695109602..3bfe2ce4cb 100644 --- a/promql/promqltest/testdata/range_queries.test +++ b/promql/promqltest/testdata/range_queries.test @@ -1,18 +1,18 @@ # sum_over_time with all values -load 30s +load 15s bar 0 1 10 100 1000 -eval range from 0 to 2m step 1m sum_over_time(bar[30s]) +eval range from 0 to 1m step 30s sum_over_time(bar[30s]) {} 0 11 1100 clear # sum_over_time with trailing values -load 30s +load 15s bar 0 1 10 100 1000 0 0 0 0 eval range from 0 to 2m step 1m sum_over_time(bar[30s]) - {} 0 11 1100 + {} 0 1100 0 clear @@ -21,15 +21,15 @@ load 30s bar 0 1 10 100 1000 10000 100000 1000000 10000000 eval range from 0 to 4m step 1m sum_over_time(bar[30s]) - {} 0 11 1100 110000 11000000 + {} 0 10 1000 100000 10000000 clear # sum_over_time with all values random -load 30s +load 15s bar 5 17 42 2 7 905 51 -eval range from 0 to 3m step 1m sum_over_time(bar[30s]) +eval range from 0 to 90s step 30s sum_over_time(bar[30s]) {} 5 59 9 956 clear diff --git a/promql/promqltest/testdata/selectors.test b/promql/promqltest/testdata/selectors.test index 6742d83e99..3a1f5263dd 100644 --- a/promql/promqltest/testdata/selectors.test +++ b/promql/promqltest/testdata/selectors.test @@ -1,109 +1,109 @@ load 10s - http_requests{job="api-server", instance="0", group="production"} 0+10x1000 100+30x1000 - http_requests{job="api-server", instance="1", group="production"} 0+20x1000 200+30x1000 - http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 - http_requests{job="api-server", instance="1", group="canary"} 0+40x2000 + http_requests_total{job="api-server", instance="0", group="production"} 0+10x1000 100+30x1000 + http_requests_total{job="api-server", instance="1", group="production"} 0+20x1000 200+30x1000 + http_requests_total{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 + http_requests_total{job="api-server", instance="1", group="canary"} 0+40x2000 -eval instant at 8000s rate(http_requests[1m]) +eval instant at 8000s rate(http_requests_total[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 {job="api-server", instance="0", group="canary"} 3 {job="api-server", instance="1", group="canary"} 4 -eval instant at 18000s rate(http_requests[1m]) +eval instant at 18000s rate(http_requests_total[1m]) {job="api-server", instance="0", group="production"} 3 {job="api-server", instance="1", group="production"} 3 {job="api-server", instance="0", group="canary"} 8 {job="api-server", instance="1", group="canary"} 4 -eval instant at 8000s rate(http_requests{group=~"pro.*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~"pro.*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 18000s rate(http_requests{group=~".*ry", instance="1"}[1m]) +eval instant at 18000s rate(http_requests_total{group=~".*ry", instance="1"}[1m]) {job="api-server", instance="1", group="canary"} 4 -eval instant at 18000s rate(http_requests{instance!="3"}[1m] offset 10000s) +eval instant at 18000s rate(http_requests_total{instance!="3"}[1m] offset 10000s) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 {job="api-server", instance="0", group="canary"} 3 {job="api-server", instance="1", group="canary"} 4 -eval instant at 4000s rate(http_requests{instance!="3"}[1m] offset -4000s) +eval instant at 4000s rate(http_requests_total{instance!="3"}[1m] offset -4000s) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 {job="api-server", instance="0", group="canary"} 3 {job="api-server", instance="1", group="canary"} 4 -eval instant at 18000s rate(http_requests[40s]) - rate(http_requests[1m] offset 10000s) +eval instant at 18000s rate(http_requests_total[40s]) - rate(http_requests_total[1m] offset 10000s) {job="api-server", instance="0", group="production"} 2 {job="api-server", instance="1", group="production"} 1 {job="api-server", instance="0", group="canary"} 5 {job="api-server", instance="1", group="canary"} 0 # https://github.com/prometheus/prometheus/issues/3575 -eval instant at 0s http_requests{foo!="bar"} - http_requests{job="api-server", instance="0", group="production"} 0 - http_requests{job="api-server", instance="1", group="production"} 0 - http_requests{job="api-server", instance="0", group="canary"} 0 - http_requests{job="api-server", instance="1", group="canary"} 0 +eval instant at 0s http_requests_total{foo!="bar"} + http_requests_total{job="api-server", instance="0", group="production"} 0 + http_requests_total{job="api-server", instance="1", group="production"} 0 + http_requests_total{job="api-server", instance="0", group="canary"} 0 + http_requests_total{job="api-server", instance="1", group="canary"} 0 -eval instant at 0s http_requests{foo!="bar", job="api-server"} - http_requests{job="api-server", instance="0", group="production"} 0 - http_requests{job="api-server", instance="1", group="production"} 0 - http_requests{job="api-server", instance="0", group="canary"} 0 - http_requests{job="api-server", instance="1", group="canary"} 0 +eval instant at 0s http_requests_total{foo!="bar", job="api-server"} + http_requests_total{job="api-server", instance="0", group="production"} 0 + http_requests_total{job="api-server", instance="1", group="production"} 0 + http_requests_total{job="api-server", instance="0", group="canary"} 0 + http_requests_total{job="api-server", instance="1", group="canary"} 0 -eval instant at 0s http_requests{foo!~"bar", job="api-server"} - http_requests{job="api-server", instance="0", group="production"} 0 - http_requests{job="api-server", instance="1", group="production"} 0 - http_requests{job="api-server", instance="0", group="canary"} 0 - http_requests{job="api-server", instance="1", group="canary"} 0 +eval instant at 0s http_requests_total{foo!~"bar", job="api-server"} + http_requests_total{job="api-server", instance="0", group="production"} 0 + http_requests_total{job="api-server", instance="1", group="production"} 0 + http_requests_total{job="api-server", instance="0", group="canary"} 0 + http_requests_total{job="api-server", instance="1", group="canary"} 0 -eval instant at 0s http_requests{foo!~"bar", job="api-server", instance="1", x!="y", z="", group!=""} - http_requests{job="api-server", instance="1", group="production"} 0 - http_requests{job="api-server", instance="1", group="canary"} 0 +eval instant at 0s http_requests_total{foo!~"bar", job="api-server", instance="1", x!="y", z="", group!=""} + http_requests_total{job="api-server", instance="1", group="production"} 0 + http_requests_total{job="api-server", instance="1", group="canary"} 0 # https://github.com/prometheus/prometheus/issues/7994 -eval instant at 8000s rate(http_requests{group=~"(?i:PRO).*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~"(?i:PRO).*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*?(?i:PRO).*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*?(?i:PRO).*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*(?i:DUC).*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*(?i:DUC).*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*(?i:TION)"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*(?i:TION)"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*(?i:TION).*?"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*(?i:TION).*?"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~"((?i)PRO).*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~"((?i)PRO).*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*((?i)DUC).*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*((?i)DUC).*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*((?i)TION)"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*((?i)TION)"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~"(?i:PRODUCTION)"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~"(?i:PRODUCTION)"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 8000s rate(http_requests{group=~".*(?i:C).*"}[1m]) +eval instant at 8000s rate(http_requests_total{group=~".*(?i:C).*"}[1m]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 {job="api-server", instance="0", group="canary"} 3 @@ -133,14 +133,14 @@ load 5m label_grouping_test{a="a", b="abb"} 0+20x10 load 5m - http_requests{job="api-server", instance="0", group="production"} 0+10x10 - http_requests{job="api-server", instance="1", group="production"} 0+20x10 - http_requests{job="api-server", instance="0", group="canary"} 0+30x10 - http_requests{job="api-server", instance="1", group="canary"} 0+40x10 - http_requests{job="app-server", instance="0", group="production"} 0+50x10 - http_requests{job="app-server", instance="1", group="production"} 0+60x10 - http_requests{job="app-server", instance="0", group="canary"} 0+70x10 - http_requests{job="app-server", instance="1", group="canary"} 0+80x10 + http_requests_total{job="api-server", instance="0", group="production"} 0+10x10 + http_requests_total{job="api-server", instance="1", group="production"} 0+20x10 + http_requests_total{job="api-server", instance="0", group="canary"} 0+30x10 + http_requests_total{job="api-server", instance="1", group="canary"} 0+40x10 + http_requests_total{job="app-server", instance="0", group="production"} 0+50x10 + http_requests_total{job="app-server", instance="1", group="production"} 0+60x10 + http_requests_total{job="app-server", instance="0", group="canary"} 0+70x10 + http_requests_total{job="app-server", instance="1", group="canary"} 0+80x10 # Single-letter label names and values. eval instant at 50m x{y="testvalue"} @@ -148,14 +148,14 @@ eval instant at 50m x{y="testvalue"} # Basic Regex eval instant at 50m {__name__=~".+"} - http_requests{group="canary", instance="0", job="api-server"} 300 - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="api-server"} 400 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="1", job="app-server"} 600 + http_requests_total{group="canary", instance="0", job="api-server"} 300 + http_requests_total{group="canary", instance="0", job="app-server"} 700 + http_requests_total{group="canary", instance="1", job="api-server"} 400 + http_requests_total{group="canary", instance="1", job="app-server"} 800 + http_requests_total{group="production", instance="0", job="api-server"} 100 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="1", job="app-server"} 600 x{y="testvalue"} 100 label_grouping_test{a="a", b="abb"} 200 label_grouping_test{a="aa", b="bb"} 100 @@ -164,34 +164,34 @@ eval instant at 50m {__name__=~".+"} cpu_count{instance="0", type="numa"} 300 eval instant at 50m {job=~".+-server", job!~"api-.+"} - http_requests{group="canary", instance="0", job="app-server"} 700 - http_requests{group="canary", instance="1", job="app-server"} 800 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="app-server"} 600 + http_requests_total{group="canary", instance="0", job="app-server"} 700 + http_requests_total{group="canary", instance="1", job="app-server"} 800 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="app-server"} 600 -eval instant at 50m http_requests{group!="canary"} - http_requests{group="production", instance="1", job="app-server"} 600 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="0", job="api-server"} 100 +eval instant at 50m http_requests_total{group!="canary"} + http_requests_total{group="production", instance="1", job="app-server"} 600 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="0", job="api-server"} 100 -eval instant at 50m http_requests{job=~".+-server",group!="canary"} - http_requests{group="production", instance="1", job="app-server"} 600 - http_requests{group="production", instance="0", job="app-server"} 500 - http_requests{group="production", instance="1", job="api-server"} 200 - http_requests{group="production", instance="0", job="api-server"} 100 +eval instant at 50m http_requests_total{job=~".+-server",group!="canary"} + http_requests_total{group="production", instance="1", job="app-server"} 600 + http_requests_total{group="production", instance="0", job="app-server"} 500 + http_requests_total{group="production", instance="1", job="api-server"} 200 + http_requests_total{group="production", instance="0", job="api-server"} 100 -eval instant at 50m http_requests{job!~"api-.+",group!="canary"} - http_requests{group="production", instance="1", job="app-server"} 600 - http_requests{group="production", instance="0", job="app-server"} 500 +eval instant at 50m http_requests_total{job!~"api-.+",group!="canary"} + http_requests_total{group="production", instance="1", job="app-server"} 600 + http_requests_total{group="production", instance="0", job="app-server"} 500 -eval instant at 50m http_requests{group="production",job=~"api-.+"} - http_requests{group="production", instance="0", job="api-server"} 100 - http_requests{group="production", instance="1", job="api-server"} 200 +eval instant at 50m http_requests_total{group="production",job=~"api-.+"} + http_requests_total{group="production", instance="0", job="api-server"} 100 + http_requests_total{group="production", instance="1", job="api-server"} 200 -eval instant at 50m http_requests{group="production",job="api-server"} offset 5m - http_requests{group="production", instance="0", job="api-server"} 90 - http_requests{group="production", instance="1", job="api-server"} 180 +eval instant at 50m http_requests_total{group="production",job="api-server"} offset 5m + http_requests_total{group="production", instance="0", job="api-server"} 90 + http_requests_total{group="production", instance="1", job="api-server"} 180 clear diff --git a/promql/promqltest/testdata/staleness.test b/promql/promqltest/testdata/staleness.test index 4fdbc997b7..a48473d439 100644 --- a/promql/promqltest/testdata/staleness.test +++ b/promql/promqltest/testdata/staleness.test @@ -14,10 +14,10 @@ eval instant at 40s metric {__name__="metric"} 2 # It goes stale 5 minutes after the last sample. -eval instant at 330s metric +eval instant at 329s metric {__name__="metric"} 2 -eval instant at 331s metric +eval instant at 330s metric # Range vector ignores stale sample. @@ -30,9 +30,13 @@ eval instant at 10s count_over_time(metric[1s]) eval instant at 20s count_over_time(metric[1s]) eval instant at 20s count_over_time(metric[10s]) + +eval instant at 20s count_over_time(metric[20s]) {} 1 eval instant at 20s count_over_time(metric[10]) + +eval instant at 20s count_over_time(metric[20]) {} 1 @@ -48,7 +52,7 @@ eval instant at 0s metric eval instant at 150s metric {__name__="metric"} 0 -eval instant at 300s metric +eval instant at 299s metric {__name__="metric"} 0 -eval instant at 301s metric +eval instant at 300s metric diff --git a/promql/promqltest/testdata/subquery.test b/promql/promqltest/testdata/subquery.test index 1d338d9764..5a5e4e0092 100644 --- a/promql/promqltest/testdata/subquery.test +++ b/promql/promqltest/testdata/subquery.test @@ -1,41 +1,41 @@ load 10s - metric 1 2 + metric_total 1 2 # Evaluation before 0s gets no sample. -eval instant at 10s sum_over_time(metric[50s:10s]) +eval instant at 10s sum_over_time(metric_total[50s:10s]) {} 3 -eval instant at 10s sum_over_time(metric[50s:5s]) +eval instant at 10s sum_over_time(metric_total[50s:5s]) {} 4 # Every evaluation yields the last value, i.e. 2 -eval instant at 5m sum_over_time(metric[50s:10s]) - {} 12 +eval instant at 5m sum_over_time(metric_total[50s:10s]) + {} 10 -# Series becomes stale at 5m10s (5m after last sample) -# Hence subquery gets a single sample at 6m-50s=5m10s. -eval instant at 6m sum_over_time(metric[50s:10s]) +# Series becomes stale at 5m10s (5m after last sample). +# Hence subquery gets a single sample at 5m10s. +eval instant at 5m59s sum_over_time(metric_total[60s:10s]) {} 2 -eval instant at 10s rate(metric[20s:10s]) +eval instant at 10s rate(metric_total[20s:10s]) {} 0.1 -eval instant at 20s rate(metric[20s:5s]) - {} 0.05 +eval instant at 20s rate(metric_total[20s:5s]) + {} 0.06666666666666667 clear load 10s - http_requests{job="api-server", instance="1", group="production"} 0+20x1000 200+30x1000 - http_requests{job="api-server", instance="0", group="production"} 0+10x1000 100+30x1000 - http_requests{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 - http_requests{job="api-server", instance="1", group="canary"} 0+40x2000 + http_requests_total{job="api-server", instance="1", group="production"} 0+20x1000 200+30x1000 + http_requests_total{job="api-server", instance="0", group="production"} 0+10x1000 100+30x1000 + http_requests_total{job="api-server", instance="0", group="canary"} 0+30x1000 300+80x1000 + http_requests_total{job="api-server", instance="1", group="canary"} 0+40x2000 -eval instant at 8000s rate(http_requests{group=~"pro.*"}[1m:10s]) +eval instant at 8000s rate(http_requests_total{group=~"pro.*"}[1m:10s]) {job="api-server", instance="0", group="production"} 1 {job="api-server", instance="1", group="production"} 2 -eval instant at 20000s avg_over_time(rate(http_requests[1m])[1m:1s]) +eval instant at 20000s avg_over_time(rate(http_requests_total[1m])[1m:1s]) {job="api-server", instance="0", group="canary"} 8 {job="api-server", instance="1", group="canary"} 4 {job="api-server", instance="1", group="production"} 3 @@ -44,89 +44,93 @@ eval instant at 20000s avg_over_time(rate(http_requests[1m])[1m:1s]) clear load 10s - metric1 0+1x1000 - metric2 0+2x1000 - metric3 0+3x1000 + metric1_total 0+1x1000 + metric2_total 0+2x1000 + metric3_total 0+3x1000 -eval instant at 1000s sum_over_time(metric1[30s:10s]) - {} 394 +eval instant at 1000s sum_over_time(metric1_total[30s:10s]) + {} 297 -# This is (394*2 - 100), because other than the last 100 at 1000s, +# This is (97 + 98*2 + 99*2 + 100), because other than 97@975s and 100@1000s, # everything else is repeated with the 5s step. -eval instant at 1000s sum_over_time(metric1[30s:5s]) - {} 688 +eval instant at 1000s sum_over_time(metric1_total[30s:5s]) + {} 591 -# Offset is aligned with the step. -eval instant at 1010s sum_over_time(metric1[30s:10s] offset 10s) - {} 394 +# Offset is aligned with the step, so this is from [98@980s, 99@990s, 100@1000s]. +eval instant at 1010s sum_over_time(metric1_total[30s:10s] offset 10s) + {} 297 # Same result for different offsets due to step alignment. -eval instant at 1010s sum_over_time(metric1[30s:10s] offset 9s) +eval instant at 1010s sum_over_time(metric1_total[30s:10s] offset 9s) {} 297 -eval instant at 1010s sum_over_time(metric1[30s:10s] offset 7s) +eval instant at 1010s sum_over_time(metric1_total[30s:10s] offset 7s) {} 297 -eval instant at 1010s sum_over_time(metric1[30s:10s] offset 5s) +eval instant at 1010s sum_over_time(metric1_total[30s:10s] offset 5s) {} 297 -eval instant at 1010s sum_over_time(metric1[30s:10s] offset 3s) +eval instant at 1010s sum_over_time(metric1_total[30s:10s] offset 3s) {} 297 -eval instant at 1010s sum_over_time((metric1)[30s:10s] offset 3s) +eval instant at 1010s sum_over_time((metric1_total)[30s:10s] offset 3s) {} 297 -eval instant at 1010s sum_over_time(metric1[30:10] offset 3) +eval instant at 1010s sum_over_time(metric1_total[30:10] offset 3) {} 297 -eval instant at 1010s sum_over_time((metric1)[30:10s] offset 3s) +eval instant at 1010s sum_over_time((metric1_total)[30:10s] offset 3s) {} 297 -eval instant at 1010s sum_over_time((metric1)[30:10s] offset 3s) +eval instant at 1010s sum_over_time((metric1_total)[30:10s] offset 3s) {} 297 -eval instant at 1010s sum_over_time((metric1)[30:10] offset 3s) +eval instant at 1010s sum_over_time((metric1_total)[30:10] offset 3s) {} 297 -eval instant at 1010s sum_over_time((metric1)[30:10] offset 3) +eval instant at 1010s sum_over_time((metric1_total)[30:10] offset 3) {} 297 -# Nested subqueries -eval instant at 1000s rate(sum_over_time(metric1[30s:10s])[50s:10s]) - {} 0.4 +# Nested subqueries. +eval instant at 1000s rate(sum_over_time(metric1_total[30s:10s])[50s:10s]) + {} 0.30000000000000004 -eval instant at 1000s rate(sum_over_time(metric2[30s:10s])[50s:10s]) - {} 0.8 +eval instant at 1000s rate(sum_over_time(metric2_total[30s:10s])[50s:10s]) + {} 0.6000000000000001 -eval instant at 1000s rate(sum_over_time(metric3[30s:10s])[50s:10s]) - {} 1.2 +eval instant at 1000s rate(sum_over_time(metric3_total[30s:10s])[50s:10s]) + {} 0.9 -eval instant at 1000s rate(sum_over_time((metric1+metric2+metric3)[30s:10s])[30s:10s]) - {} 2.4 +eval instant at 1000s rate(sum_over_time((metric1_total+metric2_total+metric3_total)[30s:10s])[30s:10s]) + {} 1.8 clear # Fibonacci sequence, to ensure the rate is not constant. # Additional note: using subqueries unnecessarily is unwise. load 7s - metric 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 2584 4181 6765 10946 17711 28657 46368 75025 121393 196418 317811 514229 832040 1346269 2178309 3524578 5702887 9227465 14930352 24157817 39088169 63245986 102334155 165580141 267914296 433494437 701408733 1134903170 1836311903 2971215073 4807526976 7778742049 12586269025 20365011074 32951280099 53316291173 86267571272 139583862445 225851433717 365435296162 591286729879 956722026041 1548008755920 2504730781961 4052739537881 6557470319842 10610209857723 17167680177565 27777890035288 44945570212853 72723460248141 117669030460994 190392490709135 308061521170129 498454011879264 806515533049393 1304969544928657 2111485077978050 3416454622906707 5527939700884757 8944394323791464 14472334024676221 23416728348467685 37889062373143906 61305790721611591 99194853094755497 160500643816367088 259695496911122585 420196140727489673 679891637638612258 1100087778366101931 1779979416004714189 2880067194370816120 4660046610375530309 7540113804746346429 12200160415121876738 19740274219868223167 31940434634990099905 51680708854858323072 83621143489848422977 135301852344706746049 218922995834555169026 354224848179261915075 573147844013817084101 927372692193078999176 1500520536206896083277 2427893228399975082453 3928413764606871165730 6356306993006846248183 10284720757613717413913 16641027750620563662096 26925748508234281076009 43566776258854844738105 70492524767089125814114 114059301025943970552219 184551825793033096366333 298611126818977066918552 483162952612010163284885 781774079430987230203437 1264937032042997393488322 2046711111473984623691759 3311648143516982017180081 5358359254990966640871840 8670007398507948658051921 14028366653498915298923761 22698374052006863956975682 36726740705505779255899443 59425114757512643212875125 96151855463018422468774568 155576970220531065681649693 251728825683549488150424261 407305795904080553832073954 659034621587630041982498215 1066340417491710595814572169 1725375039079340637797070384 2791715456571051233611642553 4517090495650391871408712937 7308805952221443105020355490 11825896447871834976429068427 19134702400093278081449423917 30960598847965113057878492344 50095301248058391139327916261 81055900096023504197206408605 131151201344081895336534324866 212207101440105399533740733471 343358302784187294870275058337 555565404224292694404015791808 898923707008479989274290850145 1454489111232772683678306641953 2353412818241252672952597492098 3807901929474025356630904134051 6161314747715278029583501626149 9969216677189303386214405760200 16130531424904581415797907386349 26099748102093884802012313146549 42230279526998466217810220532898 68330027629092351019822533679447 110560307156090817237632754212345 178890334785183168257455287891792 289450641941273985495088042104137 468340976726457153752543329995929 757791618667731139247631372100066 1226132595394188293000174702095995 1983924214061919432247806074196061 3210056809456107725247980776292056 5193981023518027157495786850488117 8404037832974134882743767626780173 13598018856492162040239554477268290 22002056689466296922983322104048463 35600075545958458963222876581316753 57602132235424755886206198685365216 93202207781383214849429075266681969 150804340016807970735635273952047185 244006547798191185585064349218729154 394810887814999156320699623170776339 638817435613190341905763972389505493 1033628323428189498226463595560281832 1672445759041379840132227567949787325 2706074082469569338358691163510069157 4378519841510949178490918731459856482 7084593923980518516849609894969925639 11463113765491467695340528626429782121 18547707689471986212190138521399707760 + metric_total 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 2584 4181 6765 10946 17711 28657 46368 75025 121393 196418 317811 514229 832040 1346269 2178309 3524578 5702887 9227465 14930352 24157817 39088169 63245986 102334155 165580141 267914296 433494437 701408733 1134903170 1836311903 2971215073 4807526976 7778742049 12586269025 20365011074 32951280099 53316291173 86267571272 139583862445 225851433717 365435296162 591286729879 956722026041 1548008755920 2504730781961 4052739537881 6557470319842 10610209857723 17167680177565 27777890035288 44945570212853 72723460248141 117669030460994 190392490709135 308061521170129 498454011879264 806515533049393 1304969544928657 2111485077978050 3416454622906707 5527939700884757 8944394323791464 14472334024676221 23416728348467685 37889062373143906 61305790721611591 99194853094755497 160500643816367088 259695496911122585 420196140727489673 679891637638612258 1100087778366101931 1779979416004714189 2880067194370816120 4660046610375530309 7540113804746346429 12200160415121876738 19740274219868223167 31940434634990099905 51680708854858323072 83621143489848422977 135301852344706746049 218922995834555169026 354224848179261915075 573147844013817084101 927372692193078999176 1500520536206896083277 2427893228399975082453 3928413764606871165730 6356306993006846248183 10284720757613717413913 16641027750620563662096 26925748508234281076009 43566776258854844738105 70492524767089125814114 114059301025943970552219 184551825793033096366333 298611126818977066918552 483162952612010163284885 781774079430987230203437 1264937032042997393488322 2046711111473984623691759 3311648143516982017180081 5358359254990966640871840 8670007398507948658051921 14028366653498915298923761 22698374052006863956975682 36726740705505779255899443 59425114757512643212875125 96151855463018422468774568 155576970220531065681649693 251728825683549488150424261 407305795904080553832073954 659034621587630041982498215 1066340417491710595814572169 1725375039079340637797070384 2791715456571051233611642553 4517090495650391871408712937 7308805952221443105020355490 11825896447871834976429068427 19134702400093278081449423917 30960598847965113057878492344 50095301248058391139327916261 81055900096023504197206408605 131151201344081895336534324866 212207101440105399533740733471 343358302784187294870275058337 555565404224292694404015791808 898923707008479989274290850145 1454489111232772683678306641953 2353412818241252672952597492098 3807901929474025356630904134051 6161314747715278029583501626149 9969216677189303386214405760200 16130531424904581415797907386349 26099748102093884802012313146549 42230279526998466217810220532898 68330027629092351019822533679447 110560307156090817237632754212345 178890334785183168257455287891792 289450641941273985495088042104137 468340976726457153752543329995929 757791618667731139247631372100066 1226132595394188293000174702095995 1983924214061919432247806074196061 3210056809456107725247980776292056 5193981023518027157495786850488117 8404037832974134882743767626780173 13598018856492162040239554477268290 22002056689466296922983322104048463 35600075545958458963222876581316753 57602132235424755886206198685365216 93202207781383214849429075266681969 150804340016807970735635273952047185 244006547798191185585064349218729154 394810887814999156320699623170776339 638817435613190341905763972389505493 1033628323428189498226463595560281832 1672445759041379840132227567949787325 2706074082469569338358691163510069157 4378519841510949178490918731459856482 7084593923980518516849609894969925639 11463113765491467695340528626429782121 18547707689471986212190138521399707760 # Extrapolated from [3@21, 144@77]: (144 - 3) / (77 - 21) -eval instant at 80s rate(metric[1m]) +eval instant at 80s rate(metric_total[1m]) {} 2.517857143 -# No extrapolation, [2@20, 144@80]: (144 - 2) / 60 -eval instant at 80s rate(metric[1m:10s]) - {} 2.366666667 +# Extrapolated to range start for counter, [2@20, 144@80]: (144 - 2) / (80 - 20) +eval instant at 80s rate(metric_total[1m500ms:10s]) + {} 2.3666666666666667 + +# Extrapolated to zero value for counter, [2@20, 144@80]: (144 - 0) / 61 +eval instant at 80s rate(metric_total[1m1s:10s]) + {} 2.360655737704918 # Only one value between 10s and 20s, 2@14 -eval instant at 20s min_over_time(metric[10s]) +eval instant at 20s min_over_time(metric_total[10s]) {} 2 -# min(1@10, 2@20) -eval instant at 20s min_over_time(metric[10s:10s]) +# min(2@20) +eval instant at 20s min_over_time(metric_total[15s:10s]) {} 1 -eval instant at 20m min_over_time(rate(metric[5m])[20m:1m]) +eval instant at 20m min_over_time(rate(metric_total[5m])[20m:1m]) {} 0.12119047619047618 diff --git a/promql/promqltest/testdata/trig_functions.test b/promql/promqltest/testdata/trig_functions.test index fa5f94651b..036621193d 100644 --- a/promql/promqltest/testdata/trig_functions.test +++ b/promql/promqltest/testdata/trig_functions.test @@ -5,92 +5,92 @@ load 5m trig{l="y"} 20 trig{l="NaN"} NaN -eval instant at 5m sin(trig) +eval instant at 1m sin(trig) {l="x"} -0.5440211108893699 {l="y"} 0.9129452507276277 {l="NaN"} NaN -eval instant at 5m cos(trig) +eval instant at 1m cos(trig) {l="x"} -0.8390715290764524 {l="y"} 0.40808206181339196 {l="NaN"} NaN -eval instant at 5m tan(trig) +eval instant at 1m tan(trig) {l="x"} 0.6483608274590867 {l="y"} 2.2371609442247427 {l="NaN"} NaN -eval instant at 5m asin(trig - 10.1) +eval instant at 1m asin(trig - 10.1) {l="x"} -0.10016742116155944 {l="y"} NaN {l="NaN"} NaN -eval instant at 5m acos(trig - 10.1) +eval instant at 1m acos(trig - 10.1) {l="x"} 1.670963747956456 {l="y"} NaN {l="NaN"} NaN -eval instant at 5m atan(trig) +eval instant at 1m atan(trig) {l="x"} 1.4711276743037345 {l="y"} 1.5208379310729538 {l="NaN"} NaN -eval instant at 5m sinh(trig) +eval instant at 1m sinh(trig) {l="x"} 11013.232920103324 {l="y"} 2.4258259770489514e+08 {l="NaN"} NaN -eval instant at 5m cosh(trig) +eval instant at 1m cosh(trig) {l="x"} 11013.232920103324 {l="y"} 2.4258259770489514e+08 {l="NaN"} NaN -eval instant at 5m tanh(trig) +eval instant at 1m tanh(trig) {l="x"} 0.9999999958776927 {l="y"} 1 {l="NaN"} NaN -eval instant at 5m asinh(trig) +eval instant at 1m asinh(trig) {l="x"} 2.99822295029797 {l="y"} 3.6895038689889055 {l="NaN"} NaN -eval instant at 5m acosh(trig) +eval instant at 1m acosh(trig) {l="x"} 2.993222846126381 {l="y"} 3.6882538673612966 {l="NaN"} NaN -eval instant at 5m atanh(trig - 10.1) +eval instant at 1m atanh(trig - 10.1) {l="x"} -0.10033534773107522 {l="y"} NaN {l="NaN"} NaN -eval instant at 5m rad(trig) +eval instant at 1m rad(trig) {l="x"} 0.17453292519943295 {l="y"} 0.3490658503988659 {l="NaN"} NaN -eval instant at 5m rad(trig - 10) +eval instant at 1m rad(trig - 10) {l="x"} 0 {l="y"} 0.17453292519943295 {l="NaN"} NaN -eval instant at 5m rad(trig - 20) +eval instant at 1m rad(trig - 20) {l="x"} -0.17453292519943295 {l="y"} 0 {l="NaN"} NaN -eval instant at 5m deg(trig) +eval instant at 1m deg(trig) {l="x"} 572.9577951308232 {l="y"} 1145.9155902616465 {l="NaN"} NaN -eval instant at 5m deg(trig - 10) +eval instant at 1m deg(trig - 10) {l="x"} 0 {l="y"} 572.9577951308232 {l="NaN"} NaN -eval instant at 5m deg(trig - 20) +eval instant at 1m deg(trig - 20) {l="x"} -572.9577951308232 {l="y"} 0 {l="NaN"} NaN diff --git a/promql/quantile.go b/promql/quantile.go index 7ddb76acba..a6851810c5 100644 --- a/promql/quantile.go +++ b/promql/quantile.go @@ -51,20 +51,22 @@ var excludedLabels = []string{ labels.BucketLabel, } -type bucket struct { - upperBound float64 - count float64 +// Bucket represents a bucket of a classic histogram. It is used internally by the promql +// package, but it is nevertheless exported for potential use in other PromQL engines. +type Bucket struct { + UpperBound float64 + Count float64 } -// buckets implements sort.Interface. -type buckets []bucket +// Buckets implements sort.Interface. +type Buckets []Bucket type metricWithBuckets struct { metric labels.Labels - buckets buckets + buckets Buckets } -// bucketQuantile calculates the quantile 'q' based on the given buckets. The +// BucketQuantile calculates the quantile 'q' based on the given buckets. The // buckets will be sorted by upperBound by this function (i.e. no sorting // needed before calling this function). The quantile value is interpolated // assuming a linear distribution within a bucket. However, if the quantile @@ -95,7 +97,14 @@ type metricWithBuckets struct { // and another bool to indicate if small differences between buckets (that // are likely artifacts of floating point precision issues) have been // ignored. -func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) { +// +// Generically speaking, BucketQuantile is for calculating the +// histogram_quantile() of classic histograms. See also: HistogramQuantile +// for native histograms. +// +// BucketQuantile is exported as a useful quantile function over a set of +// given buckets. It may be used by other PromQL engine implementations. +func BucketQuantile(q float64, buckets Buckets) (float64, bool, bool) { if math.IsNaN(q) { return math.NaN(), false, false } @@ -105,17 +114,17 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) { if q > 1 { return math.Inf(+1), false, false } - slices.SortFunc(buckets, func(a, b bucket) int { + slices.SortFunc(buckets, func(a, b Bucket) int { // We don't expect the bucket boundary to be a NaN. - if a.upperBound < b.upperBound { + if a.UpperBound < b.UpperBound { return -1 } - if a.upperBound > b.upperBound { + if a.UpperBound > b.UpperBound { return +1 } return 0 }) - if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) { + if !math.IsInf(buckets[len(buckets)-1].UpperBound, +1) { return math.NaN(), false, false } @@ -125,47 +134,59 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) { if len(buckets) < 2 { return math.NaN(), false, false } - observations := buckets[len(buckets)-1].count + observations := buckets[len(buckets)-1].Count if observations == 0 { return math.NaN(), false, false } rank := q * observations - b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) + b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].Count >= rank }) if b == len(buckets)-1 { - return buckets[len(buckets)-2].upperBound, forcedMonotonic, fixedPrecision + return buckets[len(buckets)-2].UpperBound, forcedMonotonic, fixedPrecision } - if b == 0 && buckets[0].upperBound <= 0 { - return buckets[0].upperBound, forcedMonotonic, fixedPrecision + if b == 0 && buckets[0].UpperBound <= 0 { + return buckets[0].UpperBound, forcedMonotonic, fixedPrecision } var ( bucketStart float64 - bucketEnd = buckets[b].upperBound - count = buckets[b].count + bucketEnd = buckets[b].UpperBound + count = buckets[b].Count ) if b > 0 { - bucketStart = buckets[b-1].upperBound - count -= buckets[b-1].count - rank -= buckets[b-1].count + bucketStart = buckets[b-1].UpperBound + count -= buckets[b-1].Count + rank -= buckets[b-1].Count } return bucketStart + (bucketEnd-bucketStart)*(rank/count), forcedMonotonic, fixedPrecision } -// histogramQuantile calculates the quantile 'q' based on the given histogram. +// HistogramQuantile calculates the quantile 'q' based on the given histogram. // -// The quantile value is interpolated assuming a linear distribution within a -// bucket. -// TODO(beorn7): Find an interpolation method that is a better fit for -// exponential buckets (and think about configurable interpolation). +// For custom buckets, the result is interpolated linearly, i.e. it is assumed +// the observations are uniformly distributed within each bucket. (This is a +// quite blunt assumption, but it is consistent with the interpolation method +// used for classic histograms so far.) +// +// For exponential buckets, the interpolation is done under the assumption that +// the samples within each bucket are distributed in a way that they would +// uniformly populate the buckets in a hypothetical histogram with higher +// resolution. For example, if the rank calculation suggests that the requested +// quantile is right in the middle of the population of the (1,2] bucket, we +// assume the quantile would be right at the bucket boundary between the two +// buckets the (1,2] bucket would be divided into if the histogram had double +// the resolution, which is 2**2**-1 = 1.4142... We call this exponential +// interpolation. +// +// However, for a quantile that ends up in the zero bucket, this method isn't +// very helpful (because there is an infinite number of buckets close to zero, +// so we would have to assume zero as the result). Therefore, we return to +// linear interpolation in the zero bucket. // // A natural lower bound of 0 is assumed if the histogram has only positive // buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has // only negative buckets. -// TODO(beorn7): Come to terms if we want that. // -// There are a number of special cases (once we have a way to report errors -// happening during evaluations of AST functions, we should report those -// explicitly): +// There are a number of special cases: // // If the histogram has 0 observations, NaN is returned. // @@ -174,7 +195,13 @@ func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) { // If q>1, +Inf is returned. // // If q is NaN, NaN is returned. -func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { +// +// HistogramQuantile is for calculating the histogram_quantile() of native +// histograms. See also: BucketQuantile for classic histograms. +// +// HistogramQuantile is exported as it may be used by other PromQL engine +// implementations. +func HistogramQuantile(q float64, h *histogram.FloatHistogram) float64 { if q < 0 { return math.Inf(-1) } @@ -193,9 +220,9 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { rank float64 ) - // if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator - // if the q < 0.5, use the forward iterator - // if the q >= 0.5, use the reverse iterator + // If there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator. + // If q < 0.5, use the forward iterator. + // If q >= 0.5, use the reverse iterator. if math.IsNaN(h.Sum) || q < 0.5 { it = h.AllBucketIterator() rank = q * h.Count @@ -260,19 +287,40 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { rank = count - rank } - // TODO(codesome): Use a better estimation than linear. - return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count) + // The fraction of how far we are into the current bucket. + fraction := rank / bucket.Count + + // Return linear interpolation for custom buckets and for quantiles that + // end up in the zero bucket. + if h.UsesCustomBuckets() || (bucket.Lower <= 0 && bucket.Upper >= 0) { + return bucket.Lower + (bucket.Upper-bucket.Lower)*fraction + } + + // For exponential buckets, we interpolate on a logarithmic scale. On a + // logarithmic scale, the exponential bucket boundaries (for any schema) + // become linear (every bucket has the same width). Therefore, after + // taking the logarithm of both bucket boundaries, we can use the + // calculated fraction in the same way as for linear interpolation (see + // above). Finally, we return to the normal scale by applying the + // exponential function to the result. + logLower := math.Log2(math.Abs(bucket.Lower)) + logUpper := math.Log2(math.Abs(bucket.Upper)) + if bucket.Lower > 0 { // Positive bucket. + return math.Exp2(logLower + (logUpper-logLower)*fraction) + } + // Otherwise, we are in a negative bucket and have to mirror things. + return -math.Exp2(logUpper + (logLower-logUpper)*(1-fraction)) } -// histogramFraction calculates the fraction of observations between the +// HistogramFraction calculates the fraction of observations between the // provided lower and upper bounds, based on the provided histogram. // -// histogramFraction is in a certain way the inverse of histogramQuantile. If -// histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h) +// HistogramFraction is in a certain way the inverse of histogramQuantile. If +// HistogramQuantile(0.9, h) returns 123.4, then HistogramFraction(-Inf, 123.4, h) // returns 0.9. // -// The same notes (and TODOs) with regard to interpolation and assumptions about -// the zero bucket boundaries apply as for histogramQuantile. +// The same notes with regard to interpolation and assumptions about the zero +// bucket boundaries apply as for histogramQuantile. // // Whether either boundary is inclusive or exclusive doesn’t actually matter as // long as interpolation has to be performed anyway. In the case of a boundary @@ -295,7 +343,10 @@ func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 { // If lower or upper is NaN, NaN is returned. // // If lower >= upper and the histogram has at least 1 observation, zero is returned. -func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 { +// +// HistogramFraction is exported as it may be used by other PromQL engine +// implementations. +func HistogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 { if h.Count == 0 || math.IsNaN(lower) || math.IsNaN(upper) { return math.NaN() } @@ -310,7 +361,35 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 ) for it.Next() { b := it.At() - if b.Lower < 0 && b.Upper > 0 { + zeroBucket := false + + // interpolateLinearly is used for custom buckets to be + // consistent with the linear interpolation known from classic + // histograms. It is also used for the zero bucket. + interpolateLinearly := func(v float64) float64 { + return rank + b.Count*(v-b.Lower)/(b.Upper-b.Lower) + } + + // interpolateExponentially is using the same exponential + // interpolation method as above for histogramQuantile. This + // method is a better fit for exponential bucketing. + interpolateExponentially := func(v float64) float64 { + var ( + logLower = math.Log2(math.Abs(b.Lower)) + logUpper = math.Log2(math.Abs(b.Upper)) + logV = math.Log2(math.Abs(v)) + fraction float64 + ) + if v > 0 { + fraction = (logV - logLower) / (logUpper - logLower) + } else { + fraction = 1 - ((logV - logUpper) / (logLower - logUpper)) + } + return rank + b.Count*fraction + } + + if b.Lower <= 0 && b.Upper >= 0 { + zeroBucket = true switch { case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0: // This is the zero bucket and the histogram has only @@ -325,10 +404,12 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 } } if !lowerSet && b.Lower >= lower { + // We have hit the lower value at the lower bucket boundary. lowerRank = rank lowerSet = true } if !upperSet && b.Lower >= upper { + // We have hit the upper value at the lower bucket boundary. upperRank = rank upperSet = true } @@ -336,11 +417,21 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 break } if !lowerSet && b.Lower < lower && b.Upper > lower { - lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower) + // The lower value is in this bucket. + if h.UsesCustomBuckets() || zeroBucket { + lowerRank = interpolateLinearly(lower) + } else { + lowerRank = interpolateExponentially(lower) + } lowerSet = true } if !upperSet && b.Lower < upper && b.Upper > upper { - upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower) + // The upper value is in this bucket. + if h.UsesCustomBuckets() || zeroBucket { + upperRank = interpolateLinearly(upper) + } else { + upperRank = interpolateExponentially(upper) + } upperSet = true } if lowerSet && upperSet { @@ -361,12 +452,12 @@ func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float6 // coalesceBuckets merges buckets with the same upper bound. // // The input buckets must be sorted. -func coalesceBuckets(buckets buckets) buckets { +func coalesceBuckets(buckets Buckets) Buckets { last := buckets[0] i := 0 for _, b := range buckets[1:] { - if b.upperBound == last.upperBound { - last.count += b.count + if b.UpperBound == last.UpperBound { + last.Count += b.Count } else { buckets[i] = last last = b @@ -403,11 +494,11 @@ func coalesceBuckets(buckets buckets) buckets { // // We return a bool to indicate if this monotonicity was forced or not, and // another bool to indicate if small deltas were ignored or not. -func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bool, bool) { +func ensureMonotonicAndIgnoreSmallDeltas(buckets Buckets, tolerance float64) (bool, bool) { var forcedMonotonic, fixedPrecision bool - prev := buckets[0].count + prev := buckets[0].Count for i := 1; i < len(buckets); i++ { - curr := buckets[i].count // Assumed always positive. + curr := buckets[i].Count // Assumed always positive. if curr == prev { // No correction needed if the counts are identical between buckets. continue @@ -416,14 +507,14 @@ func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bo // Silently correct numerically insignificant differences from floating // point precision errors, regardless of direction. // Do not update the 'prev' value as we are ignoring the difference. - buckets[i].count = prev + buckets[i].Count = prev fixedPrecision = true continue } if curr < prev { // Force monotonicity by removing any decreases regardless of magnitude. // Do not update the 'prev' value as we are ignoring the decrease. - buckets[i].count = prev + buckets[i].Count = prev forcedMonotonic = true continue } diff --git a/promql/quantile_test.go b/promql/quantile_test.go index 84f4c0b06d..a1047d73f4 100644 --- a/promql/quantile_test.go +++ b/promql/quantile_test.go @@ -24,29 +24,29 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { eps := 1e-12 for name, tc := range map[string]struct { - getInput func() buckets // The buckets can be modified in-place so return a new one each time. + getInput func() Buckets // The buckets can be modified in-place so return a new one each time. expectedForced bool expectedFixed bool expectedValues map[float64]float64 }{ "simple - monotonic": { - getInput: func() buckets { - return buckets{ + getInput: func() Buckets { + return Buckets{ { - upperBound: 10, - count: 10, + UpperBound: 10, + Count: 10, }, { - upperBound: 15, - count: 15, + UpperBound: 15, + Count: 15, }, { - upperBound: 20, - count: 15, + UpperBound: 20, + Count: 15, }, { - upperBound: 30, - count: 15, + UpperBound: 30, + Count: 15, }, { - upperBound: math.Inf(1), - count: 15, + UpperBound: math.Inf(1), + Count: 15, }, } }, @@ -60,23 +60,23 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { }, }, "simple - non-monotonic middle": { - getInput: func() buckets { - return buckets{ + getInput: func() Buckets { + return Buckets{ { - upperBound: 10, - count: 10, + UpperBound: 10, + Count: 10, }, { - upperBound: 15, - count: 15, + UpperBound: 15, + Count: 15, }, { - upperBound: 20, - count: 15.00000000001, // Simulate the case there's a small imprecision in float64. + UpperBound: 20, + Count: 15.00000000001, // Simulate the case there's a small imprecision in float64. }, { - upperBound: 30, - count: 15, + UpperBound: 30, + Count: 15, }, { - upperBound: math.Inf(1), - count: 15, + UpperBound: math.Inf(1), + Count: 15, }, } }, @@ -90,41 +90,41 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { }, }, "real example - monotonic": { - getInput: func() buckets { - return buckets{ + getInput: func() Buckets { + return Buckets{ { - upperBound: 1, - count: 6454661.3014166197, + UpperBound: 1, + Count: 6454661.3014166197, }, { - upperBound: 5, - count: 8339611.2001912938, + UpperBound: 5, + Count: 8339611.2001912938, }, { - upperBound: 10, - count: 14118319.2444762159, + UpperBound: 10, + Count: 14118319.2444762159, }, { - upperBound: 25, - count: 14130031.5272856522, + UpperBound: 25, + Count: 14130031.5272856522, }, { - upperBound: 50, - count: 46001270.3030008152, + UpperBound: 50, + Count: 46001270.3030008152, }, { - upperBound: 64, - count: 46008473.8585563600, + UpperBound: 64, + Count: 46008473.8585563600, }, { - upperBound: 80, - count: 46008473.8585563600, + UpperBound: 80, + Count: 46008473.8585563600, }, { - upperBound: 100, - count: 46008473.8585563600, + UpperBound: 100, + Count: 46008473.8585563600, }, { - upperBound: 250, - count: 46008473.8585563600, + UpperBound: 250, + Count: 46008473.8585563600, }, { - upperBound: 1000, - count: 46008473.8585563600, + UpperBound: 1000, + Count: 46008473.8585563600, }, { - upperBound: math.Inf(1), - count: 46008473.8585563600, + UpperBound: math.Inf(1), + Count: 46008473.8585563600, }, } }, @@ -138,41 +138,41 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { }, }, "real example - non-monotonic": { - getInput: func() buckets { - return buckets{ + getInput: func() Buckets { + return Buckets{ { - upperBound: 1, - count: 6454661.3014166225, + UpperBound: 1, + Count: 6454661.3014166225, }, { - upperBound: 5, - count: 8339611.2001912957, + UpperBound: 5, + Count: 8339611.2001912957, }, { - upperBound: 10, - count: 14118319.2444762159, + UpperBound: 10, + Count: 14118319.2444762159, }, { - upperBound: 25, - count: 14130031.5272856504, + UpperBound: 25, + Count: 14130031.5272856504, }, { - upperBound: 50, - count: 46001270.3030008227, + UpperBound: 50, + Count: 46001270.3030008227, }, { - upperBound: 64, - count: 46008473.8585563824, + UpperBound: 64, + Count: 46008473.8585563824, }, { - upperBound: 80, - count: 46008473.8585563898, + UpperBound: 80, + Count: 46008473.8585563898, }, { - upperBound: 100, - count: 46008473.8585563824, + UpperBound: 100, + Count: 46008473.8585563824, }, { - upperBound: 250, - count: 46008473.8585563824, + UpperBound: 250, + Count: 46008473.8585563824, }, { - upperBound: 1000, - count: 46008473.8585563898, + UpperBound: 1000, + Count: 46008473.8585563898, }, { - upperBound: math.Inf(1), - count: 46008473.8585563824, + UpperBound: math.Inf(1), + Count: 46008473.8585563824, }, } }, @@ -186,53 +186,53 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { }, }, "real example 2 - monotonic": { - getInput: func() buckets { - return buckets{ + getInput: func() Buckets { + return Buckets{ { - upperBound: 0.005, - count: 9.6, + UpperBound: 0.005, + Count: 9.6, }, { - upperBound: 0.01, - count: 9.688888889, + UpperBound: 0.01, + Count: 9.688888889, }, { - upperBound: 0.025, - count: 9.755555556, + UpperBound: 0.025, + Count: 9.755555556, }, { - upperBound: 0.05, - count: 9.844444444, + UpperBound: 0.05, + Count: 9.844444444, }, { - upperBound: 0.1, - count: 9.888888889, + UpperBound: 0.1, + Count: 9.888888889, }, { - upperBound: 0.25, - count: 9.888888889, + UpperBound: 0.25, + Count: 9.888888889, }, { - upperBound: 0.5, - count: 9.888888889, + UpperBound: 0.5, + Count: 9.888888889, }, { - upperBound: 1, - count: 9.888888889, + UpperBound: 1, + Count: 9.888888889, }, { - upperBound: 2.5, - count: 9.888888889, + UpperBound: 2.5, + Count: 9.888888889, }, { - upperBound: 5, - count: 9.888888889, + UpperBound: 5, + Count: 9.888888889, }, { - upperBound: 10, - count: 9.888888889, + UpperBound: 10, + Count: 9.888888889, }, { - upperBound: 25, - count: 9.888888889, + UpperBound: 25, + Count: 9.888888889, }, { - upperBound: 50, - count: 9.888888889, + UpperBound: 50, + Count: 9.888888889, }, { - upperBound: 100, - count: 9.888888889, + UpperBound: 100, + Count: 9.888888889, }, { - upperBound: math.Inf(1), - count: 9.888888889, + UpperBound: math.Inf(1), + Count: 9.888888889, }, } }, @@ -246,53 +246,53 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { }, }, "real example 2 - non-monotonic": { - getInput: func() buckets { - return buckets{ + getInput: func() Buckets { + return Buckets{ { - upperBound: 0.005, - count: 9.6, + UpperBound: 0.005, + Count: 9.6, }, { - upperBound: 0.01, - count: 9.688888889, + UpperBound: 0.01, + Count: 9.688888889, }, { - upperBound: 0.025, - count: 9.755555556, + UpperBound: 0.025, + Count: 9.755555556, }, { - upperBound: 0.05, - count: 9.844444444, + UpperBound: 0.05, + Count: 9.844444444, }, { - upperBound: 0.1, - count: 9.888888889, + UpperBound: 0.1, + Count: 9.888888889, }, { - upperBound: 0.25, - count: 9.888888889, + UpperBound: 0.25, + Count: 9.888888889, }, { - upperBound: 0.5, - count: 9.888888889, + UpperBound: 0.5, + Count: 9.888888889, }, { - upperBound: 1, - count: 9.888888889, + UpperBound: 1, + Count: 9.888888889, }, { - upperBound: 2.5, - count: 9.888888889, + UpperBound: 2.5, + Count: 9.888888889, }, { - upperBound: 5, - count: 9.888888889, + UpperBound: 5, + Count: 9.888888889, }, { - upperBound: 10, - count: 9.888888889001, // Simulate the case there's a small imprecision in float64. + UpperBound: 10, + Count: 9.888888889001, // Simulate the case there's a small imprecision in float64. }, { - upperBound: 25, - count: 9.888888889, + UpperBound: 25, + Count: 9.888888889, }, { - upperBound: 50, - count: 9.888888888999, // Simulate the case there's a small imprecision in float64. + UpperBound: 50, + Count: 9.888888888999, // Simulate the case there's a small imprecision in float64. }, { - upperBound: 100, - count: 9.888888889, + UpperBound: 100, + Count: 9.888888889, }, { - upperBound: math.Inf(1), - count: 9.888888889, + UpperBound: math.Inf(1), + Count: 9.888888889, }, } }, @@ -308,7 +308,7 @@ func TestBucketQuantile_ForcedMonotonicity(t *testing.T) { } { t.Run(name, func(t *testing.T) { for q, v := range tc.expectedValues { - res, forced, fixed := bucketQuantile(q, tc.getInput()) + res, forced, fixed := BucketQuantile(q, tc.getInput()) require.Equal(t, tc.expectedForced, forced) require.Equal(t, tc.expectedFixed, fixed) require.InEpsilon(t, v, res, eps) diff --git a/promql/query_logger.go b/promql/query_logger.go index 7e06ebb97f..c0a70b66d7 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -19,6 +19,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "strings" @@ -26,14 +27,12 @@ import ( "unicode/utf8" "github.com/edsrzf/mmap-go" - "github.com/go-kit/log" - "github.com/go-kit/log/level" ) type ActiveQueryTracker struct { - mmapedFile []byte + mmappedFile []byte getNextIndex chan int - logger log.Logger + logger *slog.Logger closer io.Closer maxConcurrent int } @@ -63,11 +62,11 @@ func parseBrokenJSON(brokenJSON []byte) (string, bool) { return queries, true } -func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { +func logUnfinishedQueries(filename string, filesize int, logger *slog.Logger) { if _, err := os.Stat(filename); err == nil { fd, err := os.Open(filename) if err != nil { - level.Error(logger).Log("msg", "Failed to open query log file", "err", err) + logger.Error("Failed to open query log file", "err", err) return } defer fd.Close() @@ -75,7 +74,7 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { brokenJSON := make([]byte, filesize) _, err = fd.Read(brokenJSON) if err != nil { - level.Error(logger).Log("msg", "Failed to read query log file", "err", err) + logger.Error("Failed to read query log file", "err", err) return } @@ -83,72 +82,72 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { if !queriesExist { return } - level.Info(logger).Log("msg", "These queries didn't finish in prometheus' last run:", "queries", queries) + logger.Info("These queries didn't finish in prometheus' last run:", "queries", queries) } } -type mmapedFile struct { +type mmappedFile struct { f io.Closer m mmap.MMap } -func (f *mmapedFile) Close() error { +func (f *mmappedFile) Close() error { err := f.m.Unmap() if err != nil { - err = fmt.Errorf("mmapedFile: unmapping: %w", err) + err = fmt.Errorf("mmappedFile: unmapping: %w", err) } if fErr := f.f.Close(); fErr != nil { - return errors.Join(fmt.Errorf("close mmapedFile.f: %w", fErr), err) + return errors.Join(fmt.Errorf("close mmappedFile.f: %w", fErr), err) } return err } -func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { +func getMMappedFile(filename string, filesize int, logger *slog.Logger) ([]byte, io.Closer, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) if err != nil { absPath, pathErr := filepath.Abs(filename) if pathErr != nil { absPath = filename } - level.Error(logger).Log("msg", "Error opening query log file", "file", absPath, "err", err) + logger.Error("Error opening query log file", "file", absPath, "err", err) return nil, nil, err } err = file.Truncate(int64(filesize)) if err != nil { file.Close() - level.Error(logger).Log("msg", "Error setting filesize.", "filesize", filesize, "err", err) + logger.Error("Error setting filesize.", "filesize", filesize, "err", err) return nil, nil, err } fileAsBytes, err := mmap.Map(file, mmap.RDWR, 0) if err != nil { file.Close() - level.Error(logger).Log("msg", "Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) + logger.Error("Failed to mmap", "file", filename, "Attempted size", filesize, "err", err) return nil, nil, err } - return fileAsBytes, &mmapedFile{f: file, m: fileAsBytes}, err + return fileAsBytes, &mmappedFile{f: file, m: fileAsBytes}, err } -func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger log.Logger) *ActiveQueryTracker { +func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger *slog.Logger) *ActiveQueryTracker { err := os.MkdirAll(localStoragePath, 0o777) if err != nil { - level.Error(logger).Log("msg", "Failed to create directory for logging active queries") + logger.Error("Failed to create directory for logging active queries") } filename, filesize := filepath.Join(localStoragePath, "queries.active"), 1+maxConcurrent*entrySize logUnfinishedQueries(filename, filesize, logger) - fileAsBytes, closer, err := getMMapedFile(filename, filesize, logger) + fileAsBytes, closer, err := getMMappedFile(filename, filesize, logger) if err != nil { panic("Unable to create mmap-ed active query log") } copy(fileAsBytes, "[") activeQueryTracker := ActiveQueryTracker{ - mmapedFile: fileAsBytes, + mmappedFile: fileAsBytes, closer: closer, getNextIndex: make(chan int, maxConcurrent), logger: logger, @@ -174,18 +173,18 @@ func trimStringByBytes(str string, size int) string { return string(bytesStr[:trimIndex]) } -func _newJSONEntry(query string, timestamp int64, logger log.Logger) []byte { +func _newJSONEntry(query string, timestamp int64, logger *slog.Logger) []byte { entry := Entry{query, timestamp} jsonEntry, err := json.Marshal(entry) if err != nil { - level.Error(logger).Log("msg", "Cannot create json of query", "query", query) + logger.Error("Cannot create json of query", "query", query) return []byte{} } return jsonEntry } -func newJSONEntry(query string, logger log.Logger) []byte { +func newJSONEntry(query string, logger *slog.Logger) []byte { timestamp := time.Now().Unix() minEntryJSON := _newJSONEntry("", timestamp, logger) @@ -206,14 +205,14 @@ func (tracker ActiveQueryTracker) GetMaxConcurrent() int { } func (tracker ActiveQueryTracker) Delete(insertIndex int) { - copy(tracker.mmapedFile[insertIndex:], strings.Repeat("\x00", entrySize)) + copy(tracker.mmappedFile[insertIndex:], strings.Repeat("\x00", entrySize)) tracker.getNextIndex <- insertIndex } func (tracker ActiveQueryTracker) Insert(ctx context.Context, query string) (int, error) { select { case i := <-tracker.getNextIndex: - fileBytes := tracker.mmapedFile + fileBytes := tracker.mmappedFile entry := newJSONEntry(query, tracker.logger) start, end := i, i+entrySize diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 7bd93781ec..eb06e513ef 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -26,7 +26,7 @@ import ( func TestQueryLogging(t *testing.T) { fileAsBytes := make([]byte, 4096) queryLogger := ActiveQueryTracker{ - mmapedFile: fileAsBytes, + mmappedFile: fileAsBytes, logger: nil, getNextIndex: make(chan int, 4), } @@ -70,7 +70,7 @@ func TestQueryLogging(t *testing.T) { func TestIndexReuse(t *testing.T) { queryBytes := make([]byte, 1+3*entrySize) queryLogger := ActiveQueryTracker{ - mmapedFile: queryBytes, + mmappedFile: queryBytes, logger: nil, getNextIndex: make(chan int, 3), } @@ -106,10 +106,10 @@ func TestIndexReuse(t *testing.T) { func TestMMapFile(t *testing.T) { dir := t.TempDir() - fpath := filepath.Join(dir, "mmapedFile") + fpath := filepath.Join(dir, "mmappedFile") const data = "ab" - fileAsBytes, closer, err := getMMapedFile(fpath, 2, nil) + fileAsBytes, closer, err := getMMappedFile(fpath, 2, nil) require.NoError(t, err) copy(fileAsBytes, data) require.NoError(t, closer.Close()) diff --git a/promql/value.go b/promql/value.go index f25dbcd780..f19c0b5b58 100644 --- a/promql/value.go +++ b/promql/value.go @@ -526,7 +526,7 @@ func (ssi *storageSeriesIterator) Next() chunkenc.ValueType { ssi.currH = p.H return chunkenc.ValFloatHistogram default: - panic("storageSeriesIterater.Next failed to pick value type") + panic("storageSeriesIterator.Next failed to pick value type") } } diff --git a/rules/alerting.go b/rules/alerting.go index 2dc0917dce..e7f15baefe 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -15,14 +15,14 @@ package rules import ( "context" + "errors" "fmt" + "log/slog" "net/url" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -141,7 +141,7 @@ type AlertingRule struct { // the fingerprint of the labelset they correspond to. active map[uint64]*Alert - logger log.Logger + logger *slog.Logger noDependentRules *atomic.Bool noDependencyRules *atomic.Bool @@ -151,7 +151,7 @@ type AlertingRule struct { func NewAlertingRule( name string, vec parser.Expr, hold, keepFiringFor time.Duration, labels, annotations, externalLabels labels.Labels, externalURL string, - restored bool, logger log.Logger, + restored bool, logger *slog.Logger, ) *AlertingRule { el := externalLabels.Map() @@ -381,7 +381,7 @@ func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts t result, err := tmpl.Expand() if err != nil { result = fmt.Sprintf("", err) - level.Warn(r.logger).Log("msg", "Expanding alert template failed", "err", err, "data", tmplData) + r.logger.Warn("Expanding alert template failed", "err", err, "data", tmplData) } return result } @@ -404,7 +404,7 @@ func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts t resultFPs[h] = struct{}{} if _, ok := alerts[h]; ok { - return nil, fmt.Errorf("vector contains metrics with the same labelset after applying alert labels") + return nil, errors.New("vector contains metrics with the same labelset after applying alert labels") } alerts[h] = &Alert{ diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 67d683c851..f0aa339cc7 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -19,8 +19,8 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -276,7 +276,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) ruleWithExternalLabels := NewAlertingRule( "ExternalLabelExists", @@ -287,7 +287,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { labels.EmptyLabels(), labels.FromStrings("foo", "bar", "dings", "bums"), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -371,7 +371,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) ruleWithExternalURL := NewAlertingRule( "ExternalURLExists", @@ -382,7 +382,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "http://localhost:1234", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -466,7 +466,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) result := promql.Vector{ promql.Sample{ @@ -527,7 +527,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; `), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) evalTime := time.Unix(0, 0) @@ -607,7 +607,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) @@ -651,7 +651,7 @@ func TestAlertingRuleLimit(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) evalTime := time.Unix(0, 0) @@ -779,7 +779,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { }, }, } - nm := notifier.NewManager(&opts, log.NewNopLogger()) + nm := notifier.NewManager(&opts, promslog.NewNopLogger()) f := SendAlerts(nm, "") notifyFunc := func(ctx context.Context, expr string, alerts ...*Alert) { @@ -986,7 +986,7 @@ func TestAlertingEvalWithOrigin(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { @@ -1008,7 +1008,7 @@ func TestAlertingRule_SetNoDependentRules(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) require.False(t, rule.NoDependentRules()) @@ -1029,7 +1029,7 @@ func TestAlertingRule_SetNoDependencyRules(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) require.False(t, rule.NoDependencyRules()) diff --git a/rules/fixtures/rules1.yaml b/rules/fixtures/rules1.yaml new file mode 100644 index 0000000000..76fbf71f3b --- /dev/null +++ b/rules/fixtures/rules1.yaml @@ -0,0 +1,5 @@ +groups: + - name: test_1 + rules: + - record: test_2 + expr: vector(2) diff --git a/rules/group.go b/rules/group.go index 201d3a67d7..1f7953188b 100644 --- a/rules/group.go +++ b/rules/group.go @@ -16,6 +16,7 @@ package rules import ( "context" "errors" + "log/slog" "math" "slices" "strings" @@ -26,10 +27,9 @@ import ( "github.com/prometheus/prometheus/promql/parser" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -65,7 +65,7 @@ type Group struct { terminated chan struct{} managerDone chan struct{} - logger log.Logger + logger *slog.Logger metrics *Metrics @@ -75,6 +75,7 @@ type Group struct { // concurrencyController controls the rules evaluation concurrency. concurrencyController RuleConcurrencyController + appOpts *storage.AppendOptions } // GroupEvalIterationFunc is used to implement and extend rule group @@ -98,9 +99,13 @@ type GroupOptions struct { // NewGroup makes a new Group with the given name, options, and rules. func NewGroup(o GroupOptions) *Group { - metrics := o.Opts.Metrics + opts := o.Opts + if opts == nil { + opts = &ManagerOptions{} + } + metrics := opts.Metrics if metrics == nil { - metrics = NewGroupMetrics(o.Opts.Registerer) + metrics = NewGroupMetrics(opts.Registerer) } key := GroupKey(o.File, o.Name) @@ -119,11 +124,15 @@ func NewGroup(o GroupOptions) *Group { evalIterationFunc = DefaultEvalIterationFunc } - concurrencyController := o.Opts.RuleConcurrencyController + concurrencyController := opts.RuleConcurrencyController if concurrencyController == nil { concurrencyController = sequentialRuleEvalController{} } + if opts.Logger == nil { + opts.Logger = promslog.NewNopLogger() + } + return &Group{ name: o.Name, file: o.File, @@ -132,15 +141,16 @@ func NewGroup(o GroupOptions) *Group { limit: o.Limit, rules: o.Rules, shouldRestore: o.ShouldRestore, - opts: o.Opts, + opts: opts, seriesInPreviousEval: make([]map[string]labels.Labels, len(o.Rules)), done: make(chan struct{}), managerDone: o.done, terminated: make(chan struct{}), - logger: log.With(o.Opts.Logger, "file", o.File, "group", o.Name), + logger: opts.Logger.With("file", o.File, "group", o.Name), metrics: metrics, evalIterationFunc: evalIterationFunc, concurrencyController: concurrencyController, + appOpts: &storage.AppendOptions{DiscardOutOfOrder: true}, } } @@ -188,7 +198,7 @@ func matchesMatcherSets(matcherSets [][]*labels.Matcher, lbls labels.Labels) boo return ok } -// Queryable returns the group's querable. +// Queryable returns the group's queryable. func (g *Group) Queryable() storage.Queryable { return g.opts.Queryable } // Context returns the group's context. @@ -200,7 +210,7 @@ func (g *Group) Interval() time.Duration { return g.interval } // Limit returns the group's limit. func (g *Group) Limit() int { return g.limit } -func (g *Group) Logger() log.Logger { return g.logger } +func (g *Group) Logger() *slog.Logger { return g.logger } func (g *Group) run(ctx context.Context) { defer close(g.terminated) @@ -272,7 +282,7 @@ func (g *Group) run(ctx context.Context) { g.RestoreForState(restoreStartTime) totalRestoreTimeSeconds := time.Since(restoreStartTime).Seconds() g.metrics.GroupLastRestoreDuration.WithLabelValues(GroupKey(g.file, g.name)).Set(totalRestoreTimeSeconds) - level.Debug(g.logger).Log("msg", "'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) + g.logger.Debug("'for' state restoration completed", "duration_seconds", totalRestoreTimeSeconds) g.shouldRestore = false } @@ -495,7 +505,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { defer cleanup() } - logger := log.WithPrefix(g.logger, "name", rule.Name(), "index", i) + logger := g.logger.With("name", rule.Name(), "index", i) ctx, sp := otel.Tracer("").Start(ctx, "rule") sp.SetAttributes(attribute.String("name", rule.Name())) defer func(t time.Time) { @@ -508,7 +518,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { }(time.Now()) if sp.SpanContext().IsSampled() && sp.SpanContext().HasTraceID() { - logger = log.WithPrefix(logger, "trace_id", sp.SpanContext().TraceID()) + logger = logger.With("trace_id", sp.SpanContext().TraceID()) } g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() @@ -524,7 +534,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { // happens on shutdown and thus we skip logging of any errors here. var eqc promql.ErrQueryCanceled if !errors.As(err, &eqc) { - level.Warn(logger).Log("msg", "Evaluating rule failed", "rule", rule, "err", err) + logger.Warn("Evaluating rule failed", "rule", rule, "err", err) } return } @@ -550,7 +560,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { sp.SetStatus(codes.Error, err.Error()) g.metrics.EvalFailures.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() - level.Warn(logger).Log("msg", "Rule sample appending failed", "err", err) + logger.Warn("Rule sample appending failed", "err", err) return } g.seriesInPreviousEval[i] = seriesReturned @@ -560,6 +570,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { if s.H != nil { _, err = app.AppendHistogram(0, s.Metric, s.T, nil, s.H) } else { + app.SetOptions(g.appOpts) _, err = app.Append(0, s.Metric, s.T, s.F) } @@ -574,15 +585,15 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { switch { case errors.Is(unwrappedErr, storage.ErrOutOfOrderSample): numOutOfOrder++ - level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Debug("Rule evaluation result discarded", "err", err, "sample", s) case errors.Is(unwrappedErr, storage.ErrTooOldSample): numTooOld++ - level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Debug("Rule evaluation result discarded", "err", err, "sample", s) case errors.Is(unwrappedErr, storage.ErrDuplicateSampleForTimestamp): numDuplicates++ - level.Debug(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Debug("Rule evaluation result discarded", "err", err, "sample", s) default: - level.Warn(logger).Log("msg", "Rule evaluation result discarded", "err", err, "sample", s) + logger.Warn("Rule evaluation result discarded", "err", err, "sample", s) } } else { buf := [1024]byte{} @@ -590,13 +601,13 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { } } if numOutOfOrder > 0 { - level.Warn(logger).Log("msg", "Error on ingesting out-of-order result from rule evaluation", "num_dropped", numOutOfOrder) + logger.Warn("Error on ingesting out-of-order result from rule evaluation", "num_dropped", numOutOfOrder) } if numTooOld > 0 { - level.Warn(logger).Log("msg", "Error on ingesting too old result from rule evaluation", "num_dropped", numTooOld) + logger.Warn("Error on ingesting too old result from rule evaluation", "num_dropped", numTooOld) } if numDuplicates > 0 { - level.Warn(logger).Log("msg", "Error on ingesting results from rule evaluation with different value but same timestamp", "num_dropped", numDuplicates) + logger.Warn("Error on ingesting results from rule evaluation with different value but same timestamp", "num_dropped", numDuplicates) } for metric, lset := range g.seriesInPreviousEval[i] { @@ -615,7 +626,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { // Do not count these in logging, as this is expected if series // is exposed from a different rule. default: - level.Warn(logger).Log("msg", "Adding stale sample failed", "sample", lset.String(), "err", err) + logger.Warn("Adding stale sample failed", "sample", lset.String(), "err", err) } } } @@ -656,6 +667,7 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { return } app := g.opts.Appendable.Appender(ctx) + app.SetOptions(g.appOpts) queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. @@ -672,11 +684,11 @@ func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { // Do not count these in logging, as this is expected if series // is exposed from a different rule. default: - level.Warn(g.logger).Log("msg", "Adding stale sample for previous configuration failed", "sample", s, "err", err) + g.logger.Warn("Adding stale sample for previous configuration failed", "sample", s, "err", err) } } if err := app.Commit(); err != nil { - level.Warn(g.logger).Log("msg", "Stale sample appending for previous configuration failed", "err", err) + g.logger.Warn("Stale sample appending for previous configuration failed", "err", err) } else { g.staleSeries = nil } @@ -691,12 +703,12 @@ func (g *Group) RestoreForState(ts time.Time) { mintMS := int64(model.TimeFromUnixNano(mint.UnixNano())) q, err := g.opts.Queryable.Querier(mintMS, maxtMS) if err != nil { - level.Error(g.logger).Log("msg", "Failed to get Querier", "err", err) + g.logger.Error("Failed to get Querier", "err", err) return } defer func() { if err := q.Close(); err != nil { - level.Error(g.logger).Log("msg", "Failed to close Querier", "err", err) + g.logger.Error("Failed to close Querier", "err", err) } }() @@ -717,8 +729,8 @@ func (g *Group) RestoreForState(ts time.Time) { sset, err := alertRule.QueryForStateSeries(g.opts.Context, q) if err != nil { - level.Error(g.logger).Log( - "msg", "Failed to restore 'for' state", + g.logger.Error( + "Failed to restore 'for' state", labels.AlertName, alertRule.Name(), "stage", "Select", "err", err, @@ -737,7 +749,7 @@ func (g *Group) RestoreForState(ts time.Time) { // No results for this alert rule. if len(seriesByLabels) == 0 { - level.Debug(g.logger).Log("msg", "No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) + g.logger.Debug("No series found to restore the 'for' state of the alert rule", labels.AlertName, alertRule.Name()) alertRule.SetRestored(true) continue } @@ -757,7 +769,7 @@ func (g *Group) RestoreForState(ts time.Time) { t, v = it.At() } if it.Err() != nil { - level.Error(g.logger).Log("msg", "Failed to restore 'for' state", + g.logger.Error("Failed to restore 'for' state", labels.AlertName, alertRule.Name(), "stage", "Iterator", "err", it.Err()) return } @@ -799,7 +811,7 @@ func (g *Group) RestoreForState(ts time.Time) { } a.ActiveAt = restoredActiveAt - level.Debug(g.logger).Log("msg", "'for' state restored", + g.logger.Debug("'for' state restored", labels.AlertName, alertRule.Name(), "restored_time", a.ActiveAt.Format(time.RFC850), "labels", a.Labels.String()) }) diff --git a/rules/group_test.go b/rules/group_test.go index ff1ef3d6c1..3911640a82 100644 --- a/rules/group_test.go +++ b/rules/group_test.go @@ -17,9 +17,18 @@ import ( "testing" "time" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) +func TestNewGroup(t *testing.T) { + g := NewGroup(GroupOptions{ + File: "test-file", + Name: "test-name", + }) + require.Equal(t, promslog.NewNopLogger().With("file", "test-file", "group", "test-name"), g.logger) +} + func TestGroup_Equals(t *testing.T) { tests := map[string]struct { first *Group diff --git a/rules/manager.go b/rules/manager.go index 9e5b33fbc9..6e9bf64691 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -17,15 +17,15 @@ import ( "context" "errors" "fmt" + "log/slog" "net/url" "slices" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "golang.org/x/sync/semaphore" "github.com/prometheus/prometheus/model/labels" @@ -96,7 +96,7 @@ type Manager struct { done chan struct{} restored bool - logger log.Logger + logger *slog.Logger } // NotifyFunc sends notifications about a set of alerts generated by the given expression. @@ -110,7 +110,7 @@ type ManagerOptions struct { Context context.Context Appendable storage.Appendable Queryable storage.Queryable - Logger log.Logger + Logger *slog.Logger Registerer prometheus.Registerer OutageTolerance time.Duration ForGracePeriod time.Duration @@ -148,6 +148,10 @@ func NewManager(o *ManagerOptions) *Manager { o.RuleDependencyController = ruleDependencyController{} } + if o.Logger == nil { + o.Logger = promslog.NewNopLogger() + } + m := &Manager{ groups: map[string]*Group{}, opts: o, @@ -161,7 +165,7 @@ func NewManager(o *ManagerOptions) *Manager { // Run starts processing of the rule manager. It is blocking. func (m *Manager) Run() { - level.Info(m.logger).Log("msg", "Starting rule manager...") + m.logger.Info("Starting rule manager...") m.start() <-m.done } @@ -175,7 +179,7 @@ func (m *Manager) Stop() { m.mtx.Lock() defer m.mtx.Unlock() - level.Info(m.logger).Log("msg", "Stopping rule manager...") + m.logger.Info("Stopping rule manager...") for _, eg := range m.groups { eg.stop() @@ -185,7 +189,7 @@ func (m *Manager) Stop() { // staleness markers. close(m.done) - level.Info(m.logger).Log("msg", "Rule manager stopped") + m.logger.Info("Rule manager stopped") } // Update the rule manager's state as the config requires. If @@ -206,7 +210,7 @@ func (m *Manager) Update(interval time.Duration, files []string, externalLabels if errs != nil { for _, e := range errs { - level.Error(m.logger).Log("msg", "loading groups failed", "err", e) + m.logger.Error("loading groups failed", "err", e) } return errors.New("error loading rules, previous rule set restored") } @@ -312,25 +316,27 @@ func (m *Manager) LoadGroups( return nil, []error{fmt.Errorf("%s: %w", fn, err)} } + mLabels := FromMaps(rg.Labels, r.Labels) + if r.Alert.Value != "" { rules = append(rules, NewAlertingRule( r.Alert.Value, expr, time.Duration(r.For), time.Duration(r.KeepFiringFor), - labels.FromMap(r.Labels), + mLabels, labels.FromMap(r.Annotations), externalLabels, externalURL, m.restored, - log.With(m.logger, "alert", r.Alert), + m.logger.With("alert", r.Alert), )) continue } rules = append(rules, NewRecordingRule( r.Record.Value, expr, - labels.FromMap(r.Labels), + mLabels, )) } @@ -501,3 +507,16 @@ func (c sequentialRuleEvalController) Allow(_ context.Context, _ *Group, _ Rule) } func (c sequentialRuleEvalController) Done(_ context.Context) {} + +// FromMaps returns new sorted Labels from the given maps, overriding each other in order. +func FromMaps(maps ...map[string]string) labels.Labels { + mLables := make(map[string]string) + + for _, m := range maps { + for k, v := range m { + mLables[k] = v + } + } + + return labels.FromMap(mLables) +} diff --git a/rules/manager_test.go b/rules/manager_test.go index b9f6db3273..6afac993d8 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -26,10 +26,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "gopkg.in/yaml.v2" @@ -374,7 +374,7 @@ func TestForStateRestore(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, OutageTolerance: 30 * time.Minute, ForGracePeriod: 10 * time.Minute, @@ -547,7 +547,7 @@ func TestStaleness(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("a + 1") @@ -641,7 +641,7 @@ groups: require.NoError(t, err) m := NewManager(&ManagerOptions{ - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), DefaultRuleQueryOffset: func() time.Duration { return time.Minute }, @@ -781,7 +781,7 @@ func TestUpdate(t *testing.T) { Queryable: st, QueryFunc: EngineQueryFunc(engine, st), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() defer ruleManager.Stop() @@ -855,10 +855,11 @@ type ruleGroupsTest struct { // ruleGroupTest forms a testing struct for running tests over rules. type ruleGroupTest struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []rulefmt.Rule `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []rulefmt.Rule `yaml:"rules"` + Labels map[string]string `yaml:"labels,omitempty"` } func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { @@ -881,6 +882,7 @@ func formatRules(r *rulefmt.RuleGroups) ruleGroupsTest { Interval: g.Interval, Limit: g.Limit, Rules: rtmp, + Labels: g.Labels, }) } return ruleGroupsTest{ @@ -923,14 +925,14 @@ func TestNotify(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: notifyFunc, ResendDelay: 2 * time.Second, } expr, err := parser.ParseExpr("a > 1") require.NoError(t, err) - rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) group := NewGroup(GroupOptions{ Name: "alert", Interval: time.Second, @@ -994,7 +996,7 @@ func TestMetricsUpdate(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Registerer: registry, }) ruleManager.start() @@ -1068,7 +1070,7 @@ func TestGroupStalenessOnRemoval(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) var stopped bool ruleManager.start() @@ -1145,7 +1147,7 @@ func TestMetricsStalenessOnManagerShutdown(t *testing.T) { Queryable: storage, QueryFunc: EngineQueryFunc(engine, storage), Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) var stopped bool ruleManager.start() @@ -1193,6 +1195,53 @@ func countStaleNaN(t *testing.T, st storage.Storage) int { return c } +func TestRuleMovedBetweenGroups(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + storage := teststorage.New(t, 600000) + defer storage.Close() + opts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(opts) + ruleManager := NewManager(&ManagerOptions{ + Appendable: storage, + Queryable: storage, + QueryFunc: EngineQueryFunc(engine, storage), + Context: context.Background(), + Logger: promslog.NewNopLogger(), + }) + var stopped bool + ruleManager.start() + defer func() { + if !stopped { + ruleManager.Stop() + } + }() + + rule2 := "fixtures/rules2.yaml" + rule1 := "fixtures/rules1.yaml" + + // Load initial configuration of rules2 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule2}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated + time.Sleep(3 * time.Second) + + // Reload configuration of rules1 + require.NoError(t, ruleManager.Update(1*time.Second, []string{rule1}, labels.EmptyLabels(), "", nil)) + + // Wait for rule to be evaluated in new location and potential staleness marker + time.Sleep(3 * time.Second) + + require.Equal(t, 0, countStaleNaN(t, storage)) // Not expecting any stale markers. +} + func TestGroupHasAlertingRules(t *testing.T) { tests := []struct { group *Group @@ -1247,7 +1296,7 @@ func TestRuleHealthUpdates(t *testing.T) { Appendable: st, Queryable: st, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("a + 1") @@ -1345,7 +1394,7 @@ func TestRuleGroupEvalIterationFunc(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, OutageTolerance: 30 * time.Minute, ForGracePeriod: 10 * time.Minute, @@ -1431,7 +1480,7 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { Appendable: storage, Queryable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum(histogram_metric)") @@ -1479,7 +1528,7 @@ func TestManager_LoadGroups_ShouldCheckWhetherEachRuleHasDependentsAndDependenci ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Appendable: storage, QueryFunc: func(ctx context.Context, q string, ts time.Time) (promql.Vector, error) { return nil, nil }, }) @@ -1535,7 +1584,7 @@ func TestDependencyMap(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1544,7 +1593,7 @@ func TestDependencyMap(t *testing.T) { expr, err = parser.ParseExpr("user:requests:rate1m <= 0") require.NoError(t, err) - rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) expr, err = parser.ParseExpr("sum by (user) (rate(requests[5m]))") require.NoError(t, err) @@ -1584,7 +1633,7 @@ func TestNoDependency(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1607,7 +1656,7 @@ func TestDependenciesEdgeCases(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } t.Run("empty group", func(t *testing.T) { @@ -1765,7 +1814,7 @@ func TestNoMetricSelector(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1794,7 +1843,7 @@ func TestDependentRulesWithNonMetricExpression(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } expr, err := parser.ParseExpr("sum by (user) (rate(requests[1m]))") @@ -1803,7 +1852,7 @@ func TestDependentRulesWithNonMetricExpression(t *testing.T) { expr, err = parser.ParseExpr("user:requests:rate1m <= 0") require.NoError(t, err) - rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule2 := NewAlertingRule("ZeroRequests", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, promslog.NewNopLogger()) expr, err = parser.ParseExpr("3") require.NoError(t, err) @@ -1826,7 +1875,7 @@ func TestRulesDependentOnMetaMetrics(t *testing.T) { ctx := context.Background() opts := &ManagerOptions{ Context: ctx, - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), } // This rule is not dependent on any other rules in its group but it does depend on `ALERTS`, which is produced by @@ -1855,7 +1904,7 @@ func TestDependencyMapUpdatesOnGroupUpdate(t *testing.T) { files := []string{"fixtures/rules.yaml"} ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() @@ -2107,7 +2156,7 @@ func TestUpdateWhenStopped(t *testing.T) { files := []string{"fixtures/rules.yaml"} ruleManager := NewManager(&ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), }) ruleManager.start() err := ruleManager.Update(10*time.Second, files, labels.EmptyLabels(), "", nil) @@ -2129,7 +2178,7 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I return &ManagerOptions{ Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), ConcurrentEvalsEnabled: concurrent, MaxConcurrentEvals: maxConcurrent, Appendable: storage, @@ -2158,3 +2207,18 @@ func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.I }, } } + +func TestLabels_FromMaps(t *testing.T) { + mLabels := FromMaps( + map[string]string{"aaa": "101", "bbb": "222"}, + map[string]string{"aaa": "111", "ccc": "333"}, + ) + + expected := labels.New( + labels.Label{Name: "aaa", Value: "111"}, + labels.Label{Name: "bbb", Value: "222"}, + labels.Label{Name: "ccc", Value: "333"}, + ) + + require.Equal(t, expected, mLabels, "unexpected labelset") +} diff --git a/rules/origin_test.go b/rules/origin_test.go index 75c83f9a4e..0bf428f3c1 100644 --- a/rules/origin_test.go +++ b/rules/origin_test.go @@ -19,9 +19,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" @@ -96,7 +97,7 @@ func TestNewRuleDetail(t *testing.T) { labels.EmptyLabels(), labels.EmptyLabels(), "", - true, log.NewNopLogger(), + true, promslog.NewNopLogger(), ) detail := NewRuleDetail(rule) diff --git a/rules/recording.go b/rules/recording.go index 17a75fdd1a..52c2a875ab 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -15,6 +15,7 @@ package rules import ( "context" + "errors" "fmt" "net/url" "time" @@ -103,7 +104,7 @@ func (rule *RecordingRule) Eval(ctx context.Context, queryOffset time.Duration, // Check that the rule does not produce identical metrics after applying // labels. if vector.ContainsSameLabelset() { - return nil, fmt.Errorf("vector contains metrics with the same labelset after applying rule labels") + return nil, errors.New("vector contains metrics with the same labelset after applying rule labels") } numSeries := len(vector) diff --git a/scrape/helpers_test.go b/scrape/helpers_test.go index 116fa5c94b..12a56d7071 100644 --- a/scrape/helpers_test.go +++ b/scrape/helpers_test.go @@ -43,6 +43,8 @@ func (a nopAppendable) Appender(_ context.Context) storage.Appender { type nopAppender struct{} +func (a nopAppender) SetOptions(opts *storage.AppendOptions) {} + func (a nopAppender) Append(storage.SeriesRef, labels.Labels, int64, float64) (storage.SeriesRef, error) { return 0, nil } @@ -55,6 +57,10 @@ func (a nopAppender) AppendHistogram(storage.SeriesRef, labels.Labels, int64, *h return 0, nil } +func (a nopAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + return 0, nil +} + func (a nopAppender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Metadata) (storage.SeriesRef, error) { return 0, nil } @@ -78,9 +84,10 @@ func equalFloatSamples(a, b floatSample) bool { } type histogramSample struct { - t int64 - h *histogram.Histogram - fh *histogram.FloatHistogram + metric labels.Labels + t int64 + h *histogram.Histogram + fh *histogram.FloatHistogram } type collectResultAppendable struct { @@ -109,6 +116,8 @@ type collectResultAppender struct { pendingMetadata []metadata.Metadata } +func (a *collectResultAppender) SetOptions(opts *storage.AppendOptions) {} + func (a *collectResultAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() @@ -146,7 +155,7 @@ func (a *collectResultAppender) AppendExemplar(ref storage.SeriesRef, l labels.L func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() - a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t}) + a.pendingHistograms = append(a.pendingHistograms, histogramSample{h: h, fh: fh, t: t, metric: l}) if a.next == nil { return 0, nil } @@ -154,6 +163,13 @@ func (a *collectResultAppender) AppendHistogram(ref storage.SeriesRef, l labels. return a.next.AppendHistogram(ref, l, t, h, fh) } +func (a *collectResultAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + return a.AppendHistogram(ref, l, ct, &histogram.Histogram{}, nil) + } + return a.AppendHistogram(ref, l, ct, nil, &histogram.FloatHistogram{}) +} + func (a *collectResultAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { a.mtx.Lock() defer a.mtx.Unlock() diff --git a/scrape/manager.go b/scrape/manager.go index e3dba5f0ee..04da3162e6 100644 --- a/scrape/manager.go +++ b/scrape/manager.go @@ -17,31 +17,32 @@ import ( "errors" "fmt" "hash/fnv" + "log/slog" "reflect" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/osutil" "github.com/prometheus/prometheus/util/pool" ) // NewManager is the Manager constructor. -func NewManager(o *Options, logger log.Logger, app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { +func NewManager(o *Options, logger *slog.Logger, newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error), app storage.Appendable, registerer prometheus.Registerer) (*Manager, error) { if o == nil { o = &Options{} } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } sm, err := newScrapeMetrics(registerer) @@ -50,15 +51,16 @@ func NewManager(o *Options, logger log.Logger, app storage.Appendable, registere } m := &Manager{ - append: app, - opts: o, - logger: logger, - scrapeConfigs: make(map[string]*config.ScrapeConfig), - scrapePools: make(map[string]*scrapePool), - graceShut: make(chan struct{}), - triggerReload: make(chan struct{}, 1), - metrics: sm, - buffers: pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }), + append: app, + opts: o, + logger: logger, + newScrapeFailureLogger: newScrapeFailureLogger, + scrapeConfigs: make(map[string]*config.ScrapeConfig), + scrapePools: make(map[string]*scrapePool), + graceShut: make(chan struct{}), + triggerReload: make(chan struct{}, 1), + metrics: sm, + buffers: pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }), } m.metrics.setTargetMetadataCacheGatherer(m) @@ -68,8 +70,7 @@ func NewManager(o *Options, logger log.Logger, app storage.Appendable, registere // Options are the configuration parameters to the scrape manager. type Options struct { - ExtraMetrics bool - NoDefaultPort bool + ExtraMetrics bool // Option used by downstream scraper users like OpenTelemetry Collector // to help lookup metric metadata. Should be false for Prometheus. PassMetadataInContext bool @@ -93,22 +94,22 @@ type Options struct { skipOffsetting bool } -const DefaultNameEscapingScheme = model.ValueEncodingEscaping - // Manager maintains a set of scrape pools and manages start/stop cycles // when receiving new target groups from the discovery manager. type Manager struct { opts *Options - logger log.Logger + logger *slog.Logger append storage.Appendable graceShut chan struct{} - offsetSeed uint64 // Global offsetSeed seed is used to spread scrape workload across HA setup. - mtxScrape sync.Mutex // Guards the fields below. - scrapeConfigs map[string]*config.ScrapeConfig - scrapePools map[string]*scrapePool - targetSets map[string][]*targetgroup.Group - buffers *pool.Pool + offsetSeed uint64 // Global offsetSeed seed is used to spread scrape workload across HA setup. + mtxScrape sync.Mutex // Guards the fields below. + scrapeConfigs map[string]*config.ScrapeConfig + scrapePools map[string]*scrapePool + newScrapeFailureLogger func(string) (*logging.JSONFileLogger, error) + scrapeFailureLoggers map[string]*logging.JSONFileLogger + targetSets map[string][]*targetgroup.Group + buffers *pool.Pool triggerReload chan struct{} @@ -172,17 +173,27 @@ func (m *Manager) reload() { if _, ok := m.scrapePools[setName]; !ok { scrapeConfig, ok := m.scrapeConfigs[setName] if !ok { - level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName) + m.logger.Error("error reloading target set", "err", "invalid config id:"+setName) + continue + } + if scrapeConfig.ConvertClassicHistogramsToNHCB && m.opts.EnableCreatedTimestampZeroIngestion { + // TODO(krajorama): fix https://github.com/prometheus/prometheus/issues/15137 + m.logger.Error("error reloading target set", "err", "cannot convert classic histograms to native histograms with custom buckets and ingest created timestamp zero samples at the same time due to https://github.com/prometheus/prometheus/issues/15137") continue } m.metrics.targetScrapePools.Inc() - sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, log.With(m.logger, "scrape_pool", setName), m.buffers, m.opts, m.metrics) + sp, err := newScrapePool(scrapeConfig, m.append, m.offsetSeed, m.logger.With("scrape_pool", setName), m.buffers, m.opts, m.metrics) if err != nil { m.metrics.targetScrapePoolsFailed.Inc() - level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName) + m.logger.Error("error creating new scrape pool", "err", err, "scrape_pool", setName) continue } m.scrapePools[setName] = sp + if l, ok := m.scrapeFailureLoggers[scrapeConfig.ScrapeFailureLogFile]; ok { + sp.SetScrapeFailureLogger(l) + } else { + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", setName) + } } wg.Add(1) @@ -238,11 +249,36 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { } c := make(map[string]*config.ScrapeConfig) + scrapeFailureLoggers := map[string]*logging.JSONFileLogger{ + "": nil, // Emptying the file name sets the scrape logger to nil. + } for _, scfg := range scfgs { c[scfg.JobName] = scfg + if _, ok := scrapeFailureLoggers[scfg.ScrapeFailureLogFile]; !ok { + // We promise to reopen the file on each reload. + var ( + logger *logging.JSONFileLogger + err error + ) + if m.newScrapeFailureLogger != nil { + if logger, err = m.newScrapeFailureLogger(scfg.ScrapeFailureLogFile); err != nil { + return err + } + } + scrapeFailureLoggers[scfg.ScrapeFailureLogFile] = logger + } } m.scrapeConfigs = c + oldScrapeFailureLoggers := m.scrapeFailureLoggers + for _, s := range oldScrapeFailureLoggers { + if s != nil { + defer s.Close() + } + } + + m.scrapeFailureLoggers = scrapeFailureLoggers + if err := m.setOffsetSeed(cfg.GlobalConfig.ExternalLabels); err != nil { return err } @@ -257,9 +293,16 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { case !reflect.DeepEqual(sp.config, cfg): err := sp.reload(cfg) if err != nil { - level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name) + m.logger.Error("error reloading scrape pool", "err", err, "scrape_pool", name) failed = true } + fallthrough + case ok: + if l, ok := m.scrapeFailureLoggers[cfg.ScrapeFailureLogFile]; ok { + sp.SetScrapeFailureLogger(l) + } else { + sp.logger.Error("No logger found. This is a bug in Prometheus that should be reported upstream.", "scrape_pool", name) + } } } diff --git a/scrape/manager_test.go b/scrape/manager_test.go index c71691c95d..f446c99789 100644 --- a/scrape/manager_test.go +++ b/scrape/manager_test.go @@ -14,7 +14,9 @@ package scrape import ( + "bytes" "context" + "errors" "fmt" "net/http" "net/http/httptest" @@ -26,33 +28,42 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "google.golang.org/protobuf/types/known/timestamppb" "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/model/timestamp" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" _ "github.com/prometheus/prometheus/discovery/file" "github.com/prometheus/prometheus/discovery/targetgroup" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" + "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/runutil" "github.com/prometheus/prometheus/util/testutil" ) +func init() { + // This can be removed when the default validation scheme in common is updated. + model.NameValidationScheme = model.UTF8Validation +} + func TestPopulateLabels(t *testing.T) { cases := []struct { - in labels.Labels - cfg *config.ScrapeConfig - noDefaultPort bool - res labels.Labels - resOrig labels.Labels - err string + in labels.Labels + cfg *config.ScrapeConfig + res labels.Labels + resOrig labels.Labels + err string }{ // Regular population of scrape config options. { @@ -106,8 +117,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeTimeout: model.Duration(time.Second), }, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4:80", - model.InstanceLabel: "1.2.3.4:80", + model.AddressLabel: "1.2.3.4", + model.InstanceLabel: "1.2.3.4", model.SchemeLabel: "http", model.MetricsPathLabel: "/custom", model.JobLabel: "custom-job", @@ -137,7 +148,7 @@ func TestPopulateLabels(t *testing.T) { ScrapeTimeout: model.Duration(time.Second), }, res: labels.FromMap(map[string]string{ - model.AddressLabel: "[::1]:443", + model.AddressLabel: "[::1]", model.InstanceLabel: "custom-instance", model.SchemeLabel: "https", model.MetricsPathLabel: "/metrics", @@ -360,7 +371,6 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4", model.InstanceLabel: "1.2.3.4", @@ -379,7 +389,7 @@ func TestPopulateLabels(t *testing.T) { model.ScrapeTimeoutLabel: "1s", }), }, - // Remove default port (http). + // verify that the default port is not removed (http). { in: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4:80", @@ -391,9 +401,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4", + model.AddressLabel: "1.2.3.4:80", model.InstanceLabel: "1.2.3.4:80", model.SchemeLabel: "http", model.MetricsPathLabel: "/metrics", @@ -410,7 +419,7 @@ func TestPopulateLabels(t *testing.T) { model.ScrapeTimeoutLabel: "1s", }), }, - // Remove default port (https). + // verify that the default port is not removed (https). { in: labels.FromMap(map[string]string{ model.AddressLabel: "1.2.3.4:443", @@ -422,9 +431,8 @@ func TestPopulateLabels(t *testing.T) { ScrapeInterval: model.Duration(time.Second), ScrapeTimeout: model.Duration(time.Second), }, - noDefaultPort: true, res: labels.FromMap(map[string]string{ - model.AddressLabel: "1.2.3.4", + model.AddressLabel: "1.2.3.4:443", model.InstanceLabel: "1.2.3.4:443", model.SchemeLabel: "https", model.MetricsPathLabel: "/metrics", @@ -445,7 +453,7 @@ func TestPopulateLabels(t *testing.T) { for _, c := range cases { in := c.in.Copy() - res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg, c.noDefaultPort) + res, orig, err := PopulateLabels(labels.NewBuilder(c.in), c.cfg) if c.err != "" { require.EqualError(t, err, c.err) } else { @@ -511,7 +519,7 @@ scrape_configs: ) opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) newLoop := func(scrapeLoopOptions) loop { ch <- struct{}{} @@ -576,7 +584,7 @@ scrape_configs: func TestManagerTargetsUpdates(t *testing.T) { opts := Options{} testRegistry := prometheus.NewRegistry() - m, err := NewManager(&opts, nil, nil, testRegistry) + m, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) ts := make(chan map[string][]*targetgroup.Group) @@ -629,7 +637,7 @@ global: opts := Options{} testRegistry := prometheus.NewRegistry() - scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) // Load the first config. @@ -706,7 +714,7 @@ scrape_configs: } opts := Options{} - scrapeManager, err := NewManager(&opts, nil, nil, testRegistry) + scrapeManager, err := NewManager(&opts, nil, nil, nil, testRegistry) require.NoError(t, err) reload(scrapeManager, cfg1) @@ -716,37 +724,256 @@ scrape_configs: require.ElementsMatch(t, []string{"job1", "job3"}, scrapeManager.ScrapePools()) } -// TestManagerCTZeroIngestion tests scrape manager for CT cases. +func setupScrapeManager(t *testing.T, honorTimestamps, enableCTZeroIngestion bool) (*collectResultAppender, *Manager) { + app := &collectResultAppender{} + scrapeManager, err := NewManager( + &Options{ + EnableCreatedTimestampZeroIngestion: enableCTZeroIngestion, + skipOffsetting: true, + }, + promslog.New(&promslog.Config{}), + nil, + &collectResultAppendable{app}, + prometheus.NewRegistry(), + ) + require.NoError(t, err) + + require.NoError(t, scrapeManager.ApplyConfig(&config.Config{ + GlobalConfig: config.GlobalConfig{ + // Disable regular scrapes. + ScrapeInterval: model.Duration(9999 * time.Minute), + ScrapeTimeout: model.Duration(5 * time.Second), + ScrapeProtocols: []config.ScrapeProtocol{config.OpenMetricsText1_0_0, config.PrometheusProto}, + }, + ScrapeConfigs: []*config.ScrapeConfig{{JobName: "test", HonorTimestamps: honorTimestamps}}, + })) + + return app, scrapeManager +} + +func setupTestServer(t *testing.T, typ string, toWrite []byte) *httptest.Server { + once := sync.Once{} + + server := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fail := true + once.Do(func() { + fail = false + w.Header().Set("Content-Type", typ) + w.Write(toWrite) + }) + + if fail { + w.WriteHeader(http.StatusInternalServerError) + } + }), + ) + + t.Cleanup(func() { server.Close() }) + + return server +} + +// TestManagerCTZeroIngestion tests scrape manager for various CT cases. func TestManagerCTZeroIngestion(t *testing.T) { - const mName = "expected_counter" + const ( + // _total suffix is required, otherwise expfmt with OMText will mark metric as "unknown" + expectedMetricName = "expected_metric_total" + expectedCreatedMetricName = "expected_metric_created" + expectedSampleValue = 17.0 + ) + + for _, testFormat := range []config.ScrapeProtocol{config.PrometheusProto, config.OpenMetricsText1_0_0} { + t.Run(fmt.Sprintf("format=%s", testFormat), func(t *testing.T) { + for _, testWithCT := range []bool{false, true} { + t.Run(fmt.Sprintf("withCT=%v", testWithCT), func(t *testing.T) { + for _, testCTZeroIngest := range []bool{false, true} { + t.Run(fmt.Sprintf("ctZeroIngest=%v", testCTZeroIngest), func(t *testing.T) { + sampleTs := time.Now() + ctTs := time.Time{} + if testWithCT { + ctTs = sampleTs.Add(-2 * time.Minute) + } + + // TODO(bwplotka): Add more types than just counter? + encoded := prepareTestEncodedCounter(t, testFormat, expectedMetricName, expectedSampleValue, sampleTs, ctTs) + app, scrapeManager := setupScrapeManager(t, true, testCTZeroIngest) + + // Perform the test. + doOneScrape(t, scrapeManager, app, setupTestServer(t, config.ScrapeProtocolsHeaders[testFormat], encoded)) + + // Verify results. + // Verify what we got vs expectations around CT injection. + samples := findSamplesForMetric(app.resultFloats, expectedMetricName) + if testWithCT && testCTZeroIngest { + require.Len(t, samples, 2) + require.Equal(t, 0.0, samples[0].f) + require.Equal(t, timestamp.FromTime(ctTs), samples[0].t) + require.Equal(t, expectedSampleValue, samples[1].f) + require.Equal(t, timestamp.FromTime(sampleTs), samples[1].t) + } else { + require.Len(t, samples, 1) + require.Equal(t, expectedSampleValue, samples[0].f) + require.Equal(t, timestamp.FromTime(sampleTs), samples[0].t) + } + + // Verify what we got vs expectations around additional _created series for OM text. + // enableCTZeroInjection also kills that _created line. + createdSeriesSamples := findSamplesForMetric(app.resultFloats, expectedCreatedMetricName) + if testFormat == config.OpenMetricsText1_0_0 && testWithCT && !testCTZeroIngest { + // For OM Text, when counter has CT, and feature flag disabled we should see _created lines. + require.Len(t, createdSeriesSamples, 1) + // Conversion taken from common/expfmt.writeOpenMetricsFloat. + // We don't check the ct timestamp as explicit ts was not implemented in expfmt.Encoder, + // but exists in OM https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#:~:text=An%20example%20with%20a%20Metric%20with%20no%20labels%2C%20and%20a%20MetricPoint%20with%20a%20timestamp%20and%20a%20created + // We can implement this, but we want to potentially get rid of OM 1.0 CT lines + require.Equal(t, float64(timestamppb.New(ctTs).AsTime().UnixNano())/1e9, createdSeriesSamples[0].f) + } else { + require.Empty(t, createdSeriesSamples) + } + }) + } + }) + } + }) + } +} + +func prepareTestEncodedCounter(t *testing.T, format config.ScrapeProtocol, mName string, v float64, ts, ct time.Time) (encoded []byte) { + t.Helper() + + counter := &dto.Counter{Value: proto.Float64(v)} + if !ct.IsZero() { + counter.CreatedTimestamp = timestamppb.New(ct) + } + ctrType := dto.MetricType_COUNTER + inputMetric := &dto.MetricFamily{ + Name: proto.String(mName), + Type: &ctrType, + Metric: []*dto.Metric{{ + TimestampMs: proto.Int64(timestamp.FromTime(ts)), + Counter: counter, + }}, + } + switch format { + case config.PrometheusProto: + return protoMarshalDelimited(t, inputMetric) + case config.OpenMetricsText1_0_0: + buf := &bytes.Buffer{} + require.NoError(t, expfmt.NewEncoder(buf, expfmt.NewFormat(expfmt.TypeOpenMetrics), expfmt.WithCreatedLines(), expfmt.WithUnit()).Encode(inputMetric)) + _, _ = buf.WriteString("# EOF") + + t.Log("produced OM text to expose:", buf.String()) + return buf.Bytes() + default: + t.Fatalf("not implemented format: %v", format) + return nil + } +} + +func doOneScrape(t *testing.T, manager *Manager, appender *collectResultAppender, server *httptest.Server) { + t.Helper() + + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) + + // Add fake target directly into tsets + reload + manager.updateTsets(map[string][]*targetgroup.Group{ + "test": {{ + Targets: []model.LabelSet{{ + model.SchemeLabel: model.LabelValue(serverURL.Scheme), + model.AddressLabel: model.LabelValue(serverURL.Host), + }}, + }}, + }) + manager.reload() + + // Wait for one scrape. + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error { + appender.mtx.Lock() + defer appender.mtx.Unlock() + + // Check if scrape happened and grab the relevant samples. + if len(appender.resultFloats) > 0 { + return nil + } + return errors.New("expected some float samples, got none") + }), "after 1 minute") + manager.Stop() +} + +func findSamplesForMetric(floats []floatSample, metricName string) (ret []floatSample) { + for _, f := range floats { + if f.metric.Get(model.MetricNameLabel) == metricName { + ret = append(ret, f) + } + } + return ret +} + +// generateTestHistogram generates the same thing as tsdbutil.GenerateTestHistogram, +// but in the form of dto.Histogram. +func generateTestHistogram(i int) *dto.Histogram { + helper := tsdbutil.GenerateTestHistogram(i) + h := &dto.Histogram{} + h.SampleCount = proto.Uint64(helper.Count) + h.SampleSum = proto.Float64(helper.Sum) + h.Schema = proto.Int32(helper.Schema) + h.ZeroThreshold = proto.Float64(helper.ZeroThreshold) + h.ZeroCount = proto.Uint64(helper.ZeroCount) + h.PositiveSpan = make([]*dto.BucketSpan, len(helper.PositiveSpans)) + for i, span := range helper.PositiveSpans { + h.PositiveSpan[i] = &dto.BucketSpan{ + Offset: proto.Int32(span.Offset), + Length: proto.Uint32(span.Length), + } + } + h.PositiveDelta = helper.PositiveBuckets + h.NegativeSpan = make([]*dto.BucketSpan, len(helper.NegativeSpans)) + for i, span := range helper.NegativeSpans { + h.NegativeSpan[i] = &dto.BucketSpan{ + Offset: proto.Int32(span.Offset), + Length: proto.Uint32(span.Length), + } + } + h.NegativeDelta = helper.NegativeBuckets + return h +} + +func TestManagerCTZeroIngestionHistogram(t *testing.T) { + const mName = "expected_histogram" for _, tc := range []struct { name string - counterSample *dto.Counter + inputHistSample *dto.Histogram enableCTZeroIngestion bool }{ { - name: "disabled with CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - // Timestamp does not matter as long as it exists in this test. - CreatedTimestamp: timestamppb.Now(), - }, + name: "disabled with CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + h.CreatedTimestamp = timestamppb.Now() + return h + }(), + enableCTZeroIngestion: false, }, { - name: "enabled with CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - // Timestamp does not matter as long as it exists in this test. - CreatedTimestamp: timestamppb.Now(), - }, + name: "enabled with CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + h.CreatedTimestamp = timestamppb.Now() + return h + }(), enableCTZeroIngestion: true, }, { - name: "enabled without CT on counter", - counterSample: &dto.Counter{ - Value: proto.Float64(1.0), - }, + name: "enabled without CT on histogram", + inputHistSample: func() *dto.Histogram { + h := generateTestHistogram(0) + return h + }(), enableCTZeroIngestion: true, }, } { @@ -755,9 +982,11 @@ func TestManagerCTZeroIngestion(t *testing.T) { scrapeManager, err := NewManager( &Options{ EnableCreatedTimestampZeroIngestion: tc.enableCTZeroIngestion, + EnableNativeHistogramsIngestion: true, skipOffsetting: true, }, - log.NewLogfmtLogger(os.Stderr), + promslog.New(&promslog.Config{}), + nil, &collectResultAppendable{app}, prometheus.NewRegistry(), ) @@ -779,16 +1008,16 @@ func TestManagerCTZeroIngestion(t *testing.T) { // Start fake HTTP target to that allow one scrape only. server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fail := true + fail := true // TODO(bwplotka): Kill or use? once.Do(func() { fail = false w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) - ctrType := dto.MetricType_COUNTER + ctrType := dto.MetricType_HISTOGRAM w.Write(protoMarshalDelimited(t, &dto.MetricFamily{ Name: proto.String(mName), Type: &ctrType, - Metric: []*dto.Metric{{Counter: tc.counterSample}}, + Metric: []*dto.Metric{{Histogram: tc.inputHistSample}}, })) }) @@ -814,7 +1043,8 @@ func TestManagerCTZeroIngestion(t *testing.T) { }) scrapeManager.reload() - var got []float64 + var got []histogramSample + // Wait for one scrape. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) defer cancel() @@ -822,32 +1052,35 @@ func TestManagerCTZeroIngestion(t *testing.T) { app.mtx.Lock() defer app.mtx.Unlock() - // Check if scrape happened and grab the relevant samples, they have to be there - or it's a bug + // Check if scrape happened and grab the relevant histograms, they have to be there - or it's a bug // and it's not worth waiting. - for _, f := range app.resultFloats { - if f.metric.Get(model.MetricNameLabel) == mName { - got = append(got, f.f) + for _, h := range app.resultHistograms { + if h.metric.Get(model.MetricNameLabel) == mName { + got = append(got, h) } } - if len(app.resultFloats) > 0 { + if len(app.resultHistograms) > 0 { return nil } - return fmt.Errorf("expected some samples, got none") + return errors.New("expected some histogram samples, got none") }), "after 1 minute") scrapeManager.Stop() - // Check for zero samples, assuming we only injected always one sample. + // Check for zero samples, assuming we only injected always one histogram sample. // Did it contain CT to inject? If yes, was CT zero enabled? - if tc.counterSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { + if tc.inputHistSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion { require.Len(t, got, 2) - require.Equal(t, 0.0, got[0]) - require.Equal(t, tc.counterSample.GetValue(), got[1]) + // Zero sample. + require.Equal(t, histogram.Histogram{}, *got[0].h) + // Quick soft check to make sure it's the same sample or at least not zero. + require.Equal(t, tc.inputHistSample.GetSampleSum(), got[1].h.Sum) return } // Expect only one, valid sample. require.Len(t, got, 1) - require.Equal(t, tc.counterSample.GetValue(), got[0]) + // Quick soft check to make sure it's the same sample or at least not zero. + require.Equal(t, tc.inputHistSample.GetSampleSum(), got[0].h.Sum) }) } } @@ -857,7 +1090,7 @@ func TestUnregisterMetrics(t *testing.T) { // Check that all metrics can be unregistered, allowing a second manager to be created. for i := 0; i < 2; i++ { opts := Options{} - manager, err := NewManager(&opts, nil, nil, reg) + manager, err := NewManager(&opts, nil, nil, nil, reg) require.NotNil(t, manager) require.NoError(t, err) // Unregister all metrics. @@ -893,7 +1126,7 @@ func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manage require.NoError(t, err) discoveryManager := discovery.NewManager( ctx, - log.NewNopLogger(), + promslog.NewNopLogger(), reg, sdMetrics, discovery.Updatert(100*time.Millisecond), @@ -901,6 +1134,7 @@ func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manage scrapeManager, err := NewManager( &Options{DiscoveryReloadInterval: model.Duration(100 * time.Millisecond)}, nil, + nil, nopAppendable{}, prometheus.NewRegistry(), ) @@ -1179,7 +1413,7 @@ scrape_configs: } // TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when one of them should no -// longer discover targets, only the stale targets of that provier are dropped. +// longer discover targets, only the stale targets of that provider are dropped. func TestOnlyStaleTargetsAreDropped(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/scrape/scrape.go b/scrape/scrape.go index 2abd4691d6..22dff0d6d8 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -20,6 +20,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "net/http" "reflect" @@ -29,11 +30,10 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/klauspost/compress/gzip" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/version" "github.com/prometheus/prometheus/config" @@ -47,6 +47,7 @@ import ( "github.com/prometheus/prometheus/model/timestamp" "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/pool" ) @@ -63,7 +64,7 @@ var errNameLabelMandatory = fmt.Errorf("missing metric name (%s label)", labels. // scrapePool manages scrapes for sets of targets. type scrapePool struct { appendable storage.Appendable - logger log.Logger + logger *slog.Logger cancel context.CancelFunc httpOpts []config_util.HTTPClientOption @@ -87,9 +88,10 @@ type scrapePool struct { // Constructor for new scrape loops. This is settable for testing convenience. newLoop func(scrapeLoopOptions) loop - noDefaultPort bool - metrics *scrapeMetrics + + scrapeFailureLogger *logging.JSONFileLogger + scrapeFailureLoggerMtx sync.RWMutex } type labelLimits struct { @@ -110,8 +112,10 @@ type scrapeLoopOptions struct { trackTimestampsStaleness bool interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string mrc []*relabel.Config cache *scrapeCache @@ -123,9 +127,9 @@ const maxAheadTime = 10 * time.Minute // returning an empty label set is interpreted as "drop". type labelsMutator func(labels.Labels) labels.Labels -func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger log.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { +func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed uint64, logger *slog.Logger, buffers *pool.Pool, options *Options, metrics *scrapeMetrics) (*scrapePool, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName, options.HTTPClientOptions...) @@ -146,7 +150,6 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed logger: logger, metrics: metrics, httpOpts: options.HTTPClientOptions, - noDefaultPort: options.NoDefaultPort, } sp.newLoop = func(opts scrapeLoopOptions) loop { // Update the targets retrieval function for metadata to a new scrape cache. @@ -159,7 +162,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed return newScrapeLoop( ctx, opts.scraper, - log.With(logger, "target", opts.target), + logger.With("target", opts.target), buffers, func(l labels.Labels) labels.Labels { return mutateSampleLabels(l, opts.target, opts.honorLabels, opts.mrc) @@ -178,7 +181,8 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed opts.labelLimits, opts.interval, opts.timeout, - opts.scrapeClassicHistograms, + opts.alwaysScrapeClassicHist, + opts.convertClassicHistToNHCB, options.EnableNativeHistogramsIngestion, options.EnableCreatedTimestampZeroIngestion, options.ExtraMetrics, @@ -188,6 +192,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed metrics, options.skipOffsetting, opts.validationScheme, + opts.fallbackScrapeProtocol, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -218,6 +223,27 @@ func (sp *scrapePool) DroppedTargetsCount() int { return sp.droppedTargetsCount } +func (sp *scrapePool) SetScrapeFailureLogger(l *logging.JSONFileLogger) { + sp.scrapeFailureLoggerMtx.Lock() + defer sp.scrapeFailureLoggerMtx.Unlock() + if l != nil { + l.With("job_name", sp.config.JobName) + } + sp.scrapeFailureLogger = l + + sp.targetMtx.Lock() + defer sp.targetMtx.Unlock() + for _, s := range sp.loops { + s.setScrapeFailureLogger(sp.scrapeFailureLogger) + } +} + +func (sp *scrapePool) getScrapeFailureLogger() *logging.JSONFileLogger { + sp.scrapeFailureLoggerMtx.RLock() + defer sp.scrapeFailureLoggerMtx.RUnlock() + return sp.scrapeFailureLogger +} + // stop terminates all scrape loops and returns after they all terminated. func (sp *scrapePool) stop() { sp.mtx.Lock() @@ -303,11 +329,12 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() ) - validationScheme := model.LegacyValidation - if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig { - validationScheme = model.UTF8Validation + validationScheme := model.UTF8Validation + if sp.config.MetricNameValidationScheme == config.LegacyValidationConfig { + validationScheme = model.LegacyValidation } sp.targetMtx.Lock() @@ -323,12 +350,12 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { } t := sp.activeTargets[fp] - interval, timeout, err := t.intervalAndTimeout(interval, timeout) + targetInterval, targetTimeout, err := t.intervalAndTimeout(interval, timeout) var ( s = &targetScraper{ Target: t, client: sp.client, - timeout: timeout, + timeout: targetTimeout, bodySizeLimit: bodySizeLimit, acceptHeader: acceptHeader(sp.config.ScrapeProtocols, validationScheme), acceptEncodingHeader: acceptEncodingHeader(enableCompression), @@ -346,9 +373,10 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { trackTimestampsStaleness: trackTimestampsStaleness, mrc: mrc, cache: cache, - interval: interval, - timeout: timeout, + interval: targetInterval, + timeout: targetTimeout, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) ) if err != nil { @@ -361,6 +389,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { wg.Done() newLoop.setForcedError(forcedErr) + newLoop.setScrapeFailureLogger(sp.getScrapeFailureLogger()) newLoop.run(nil) }(oldLoop, newLoop) @@ -404,9 +433,9 @@ func (sp *scrapePool) Sync(tgs []*targetgroup.Group) { sp.droppedTargets = []*Target{} sp.droppedTargetsCount = 0 for _, tg := range tgs { - targets, failures := TargetsFromGroup(tg, sp.config, sp.noDefaultPort, targets, lb) + targets, failures := TargetsFromGroup(tg, sp.config, targets, lb) for _, err := range failures { - level.Error(sp.logger).Log("msg", "Creating target failed", "err", err) + sp.logger.Error("Creating target failed", "err", err) } sp.metrics.targetSyncFailed.WithLabelValues(sp.config.JobName).Add(float64(len(failures))) for _, t := range targets { @@ -457,12 +486,14 @@ func (sp *scrapePool) sync(targets []*Target) { enableCompression = sp.config.EnableCompression trackTimestampsStaleness = sp.config.TrackTimestampsStaleness mrc = sp.config.MetricRelabelConfigs - scrapeClassicHistograms = sp.config.ScrapeClassicHistograms + fallbackScrapeProtocol = sp.config.ScrapeFallbackProtocol.HeaderMediaType() + alwaysScrapeClassicHist = sp.config.AlwaysScrapeClassicHistograms + convertClassicHistToNHCB = sp.config.ConvertClassicHistogramsToNHCB ) - validationScheme := model.LegacyValidation - if sp.config.MetricNameValidationScheme == config.UTF8ValidationConfig { - validationScheme = model.UTF8Validation + validationScheme := model.UTF8Validation + if sp.config.MetricNameValidationScheme == config.LegacyValidationConfig { + validationScheme = model.LegacyValidation } sp.targetMtx.Lock() @@ -498,11 +529,15 @@ func (sp *scrapePool) sync(targets []*Target) { mrc: mrc, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, + validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, }) if err != nil { l.setForcedError(err) } + l.setScrapeFailureLogger(sp.scrapeFailureLogger) sp.activeTargets[hash] = t sp.loops[hash] = l @@ -825,6 +860,7 @@ func (s *targetScraper) readResponse(ctx context.Context, resp *http.Response, w type loop interface { run(errc chan<- error) setForcedError(err error) + setScrapeFailureLogger(*logging.JSONFileLogger) stop() getCache() *scrapeCache disableEndOfRunStalenessMarkers() @@ -839,7 +875,9 @@ type cacheEntry struct { type scrapeLoop struct { scraper scraper - l log.Logger + l *slog.Logger + scrapeFailureLogger *logging.JSONFileLogger + scrapeFailureLoggerMtx sync.RWMutex cache *scrapeCache lastScrapeSize int buffers *pool.Pool @@ -855,8 +893,10 @@ type scrapeLoop struct { labelLimits *labelLimits interval time.Duration timeout time.Duration - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool + convertClassicHistToNHCB bool validationScheme model.ValidationScheme + fallbackScrapeProtocol string // Feature flagged options. enableNativeHistogramIngestion bool @@ -1138,7 +1178,7 @@ func (c *scrapeCache) LengthMetadata() int { func newScrapeLoop(ctx context.Context, sc scraper, - l log.Logger, + l *slog.Logger, buffers *pool.Pool, sampleMutator labelsMutator, reportSampleMutator labelsMutator, @@ -1155,7 +1195,8 @@ func newScrapeLoop(ctx context.Context, labelLimits *labelLimits, interval time.Duration, timeout time.Duration, - scrapeClassicHistograms bool, + alwaysScrapeClassicHist bool, + convertClassicHistToNHCB bool, enableNativeHistogramIngestion bool, enableCTZeroIngestion bool, reportExtraMetrics bool, @@ -1165,9 +1206,10 @@ func newScrapeLoop(ctx context.Context, metrics *scrapeMetrics, skipOffsetting bool, validationScheme model.ValidationScheme, + fallbackScrapeProtocol string, ) *scrapeLoop { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if buffers == nil { buffers = pool.New(1e3, 1e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) }) @@ -1209,7 +1251,8 @@ func newScrapeLoop(ctx context.Context, labelLimits: labelLimits, interval: interval, timeout: timeout, - scrapeClassicHistograms: scrapeClassicHistograms, + alwaysScrapeClassicHist: alwaysScrapeClassicHist, + convertClassicHistToNHCB: convertClassicHistToNHCB, enableNativeHistogramIngestion: enableNativeHistogramIngestion, enableCTZeroIngestion: enableCTZeroIngestion, reportExtraMetrics: reportExtraMetrics, @@ -1217,12 +1260,22 @@ func newScrapeLoop(ctx context.Context, metrics: metrics, skipOffsetting: skipOffsetting, validationScheme: validationScheme, + fallbackScrapeProtocol: fallbackScrapeProtocol, } sl.ctx, sl.cancel = context.WithCancel(ctx) return sl } +func (sl *scrapeLoop) setScrapeFailureLogger(l *logging.JSONFileLogger) { + sl.scrapeFailureLoggerMtx.Lock() + defer sl.scrapeFailureLoggerMtx.Unlock() + if ts, ok := sl.scraper.(fmt.Stringer); ok && l != nil { + l.With("target", ts.String()) + } + sl.scrapeFailureLogger = l +} + func (sl *scrapeLoop) run(errc chan<- error) { if !sl.skipOffsetting { select { @@ -1316,13 +1369,13 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er } err = app.Commit() if err != nil { - level.Error(sl.l).Log("msg", "Scrape commit failed", "err", err) + sl.l.Error("Scrape commit failed", "err", err) } }() defer func() { if err = sl.report(app, appendTime, time.Since(start), total, added, seriesAdded, bytesRead, scrapeErr); err != nil { - level.Warn(sl.l).Log("msg", "Appending scrape report failed", "err", err) + sl.l.Warn("Appending scrape report failed", "err", err) } }() @@ -1332,7 +1385,7 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Append failed", "err", err) + sl.l.Warn("Append failed", "err", err) } if errc != nil { errc <- forcedErr @@ -1365,7 +1418,12 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er } bytesRead = len(b) } else { - level.Debug(sl.l).Log("msg", "Scrape failed", "err", scrapeErr) + sl.l.Debug("Scrape failed", "err", scrapeErr) + sl.scrapeFailureLoggerMtx.RLock() + if sl.scrapeFailureLogger != nil { + sl.scrapeFailureLogger.Log(context.Background(), slog.LevelError, scrapeErr.Error()) + } + sl.scrapeFailureLoggerMtx.RUnlock() if errc != nil { errc <- scrapeErr } @@ -1380,13 +1438,13 @@ func (sl *scrapeLoop) scrapeAndReport(last, appendTime time.Time, errc chan<- er if appErr != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Debug(sl.l).Log("msg", "Append failed", "err", appErr) + sl.l.Debug("Append failed", "err", appErr) // The append failed, probably due to a parse error or sample limit. // Call sl.append again with an empty scrape to trigger stale markers. if _, _, _, err := sl.append(app, []byte{}, "", appendTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Append failed", "err", err) + sl.l.Warn("Append failed", "err", err) } } @@ -1459,16 +1517,16 @@ func (sl *scrapeLoop) endOfRunStaleness(last time.Time, ticker *time.Ticker, int } err = app.Commit() if err != nil { - level.Warn(sl.l).Log("msg", "Stale commit failed", "err", err) + sl.l.Warn("Stale commit failed", "err", err) } }() if _, _, _, err = sl.append(app, []byte{}, "", staleTime); err != nil { app.Rollback() app = sl.appender(sl.appenderCtx) - level.Warn(sl.l).Log("msg", "Stale append failed", "err", err) + sl.l.Warn("Stale append failed", "err", err) } if err = sl.reportStale(app, staleTime); err != nil { - level.Warn(sl.l).Log("msg", "Stale report failed", "err", err) + sl.l.Warn("Stale report failed", "err", err) } } @@ -1494,18 +1552,56 @@ type appendErrors struct { numExemplarOutOfOrder int } +// Update the stale markers. +func (sl *scrapeLoop) updateStaleMarkers(app storage.Appender, defTime int64) (err error) { + sl.cache.forEachStale(func(lset labels.Labels) bool { + // Series no longer exposed, mark it stale. + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) + _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) + app.SetOptions(nil) + switch { + case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): + // Do not count these in logging, as this is expected if a target + // goes away and comes back again with a new scrape loop. + err = nil + } + return err == nil + }) + return +} + func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, ts time.Time) (total, added, seriesAdded int, err error) { - p, err := textparse.New(b, contentType, sl.scrapeClassicHistograms, sl.symbolTable) - if err != nil { - level.Debug(sl.l).Log( - "msg", "Invalid content type on scrape, using prometheus parser as fallback.", + defTime := timestamp.FromTime(ts) + + if len(b) == 0 { + // Empty scrape. Just update the stale makers and swap the cache (but don't flush it). + err = sl.updateStaleMarkers(app, defTime) + sl.cache.iterDone(false) + return + } + + p, err := textparse.New(b, contentType, sl.fallbackScrapeProtocol, sl.alwaysScrapeClassicHist, sl.enableCTZeroIngestion, sl.symbolTable) + if p == nil { + sl.l.Error( + "Failed to determine correct type of scrape target.", "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, + "err", err, + ) + return + } + if sl.convertClassicHistToNHCB { + p = textparse.NewNHCBParser(p, sl.symbolTable, sl.alwaysScrapeClassicHist) + } + if err != nil { + sl.l.Debug( + "Invalid content type on scrape, using fallback setting.", + "content_type", contentType, + "fallback_media_type", sl.fallbackScrapeProtocol, "err", err, ) } - var ( - defTime = timestamp.FromTime(ts) appErrs = appendErrors{} sampleLimitErr error bucketLimitErr error @@ -1515,7 +1611,7 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, metadataChanged bool ) - exemplars := make([]exemplar.Exemplar, 1) + exemplars := make([]exemplar.Exemplar, 0, 1) // updateMetadata updates the current iteration's metadata object and the // metadataChanged value if we have metadata in the scrape cache AND the @@ -1546,9 +1642,8 @@ func (sl *scrapeLoop) append(app storage.Appender, b []byte, contentType string, if err != nil { return } - // Only perform cache cleaning if the scrape was not empty. - // An empty scrape (usually) is used to indicate a failed scrape. - sl.cache.iterDone(len(b) > 0) + // Flush and swap the cache as the scrape was non-empty. + sl.cache.iterDone(true) }() loop: @@ -1657,11 +1752,19 @@ loop: } else { if sl.enableCTZeroIngestion { if ctMs := p.CreatedTimestamp(); ctMs != nil { - ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + if isHistogram && sl.enableNativeHistogramIngestion { + if h != nil { + ref, err = app.AppendHistogramCTZeroSample(ref, lset, t, *ctMs, h, nil) + } else { + ref, err = app.AppendHistogramCTZeroSample(ref, lset, t, *ctMs, nil, fh) + } + } else { + ref, err = app.AppendCTZeroSample(ref, lset, t, *ctMs) + } if err != nil && !errors.Is(err, storage.ErrOutOfOrderCT) { // OOO is a common case, ignoring completely for now. // CT is an experimental feature. For now, we don't need to fail the // scrape on errors updating the created timestamp, log debug. - level.Debug(sl.l).Log("msg", "Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) + sl.l.Debug("Error when appending CT in scrape loop", "series", string(met), "ct", *ctMs, "t", t, "err", err) } } } @@ -1686,7 +1789,7 @@ loop: sampleAdded, err = sl.checkAddError(met, err, &sampleLimitErr, &bucketLimitErr, &appErrs) if err != nil { if !errors.Is(err, storage.ErrNotFound) { - level.Debug(sl.l).Log("msg", "Unexpected error", "series", string(met), "err", err) + sl.l.Debug("Unexpected error", "series", string(met), "err", err) } break loop } @@ -1738,21 +1841,21 @@ loop: outOfOrderExemplars++ default: // Since exemplar storage is still experimental, we don't fail the scrape on ingestion errors. - level.Debug(sl.l).Log("msg", "Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) + sl.l.Debug("Error while adding exemplar in AddExemplar", "exemplar", fmt.Sprintf("%+v", e), "err", exemplarErr) } } if outOfOrderExemplars > 0 && outOfOrderExemplars == len(exemplars) { // Only report out of order exemplars if all are out of order, otherwise this was a partial update // to some existing set of exemplars. appErrs.numExemplarOutOfOrder += outOfOrderExemplars - level.Debug(sl.l).Log("msg", "Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) + sl.l.Debug("Out of order exemplars", "count", outOfOrderExemplars, "latest", fmt.Sprintf("%+v", exemplars[len(exemplars)-1])) sl.metrics.targetScrapeExemplarOutOfOrder.Add(float64(outOfOrderExemplars)) } if sl.appendMetadataToWAL && metadataChanged { if _, merr := app.UpdateMetadata(ref, lset, meta); merr != nil { // No need to fail the scrape on errors appending metadata. - level.Debug(sl.l).Log("msg", "Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) + sl.l.Debug("Error when appending metadata in scrape loop", "ref", fmt.Sprintf("%d", ref), "metadata", fmt.Sprintf("%+v", meta), "err", merr) } } } @@ -1771,29 +1874,19 @@ loop: sl.metrics.targetScrapeNativeHistogramBucketLimit.Inc() } if appErrs.numOutOfOrder > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) + sl.l.Warn("Error on ingesting out-of-order samples", "num_dropped", appErrs.numOutOfOrder) } if appErrs.numDuplicates > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) + sl.l.Warn("Error on ingesting samples with different value but same timestamp", "num_dropped", appErrs.numDuplicates) } if appErrs.numOutOfBounds > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) + sl.l.Warn("Error on ingesting samples that are too old or are too far into the future", "num_dropped", appErrs.numOutOfBounds) } if appErrs.numExemplarOutOfOrder > 0 { - level.Warn(sl.l).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) + sl.l.Warn("Error on ingesting out-of-order exemplars", "num_dropped", appErrs.numExemplarOutOfOrder) } if err == nil { - sl.cache.forEachStale(func(lset labels.Labels) bool { - // Series no longer exposed, mark it stale. - _, err = app.Append(0, lset, defTime, math.Float64frombits(value.StaleNaN)) - switch { - case errors.Is(err, storage.ErrOutOfOrderSample), errors.Is(err, storage.ErrDuplicateSampleForTimestamp): - // Do not count these in logging, as this is expected if a target - // goes away and comes back again with a new scrape loop. - err = nil - } - return err == nil - }) + err = sl.updateStaleMarkers(app, defTime) } return } @@ -1808,17 +1901,17 @@ func (sl *scrapeLoop) checkAddError(met []byte, err error, sampleLimitErr, bucke return false, storage.ErrNotFound case errors.Is(err, storage.ErrOutOfOrderSample): appErrs.numOutOfOrder++ - level.Debug(sl.l).Log("msg", "Out of order sample", "series", string(met)) + sl.l.Debug("Out of order sample", "series", string(met)) sl.metrics.targetScrapeSampleOutOfOrder.Inc() return false, nil case errors.Is(err, storage.ErrDuplicateSampleForTimestamp): appErrs.numDuplicates++ - level.Debug(sl.l).Log("msg", "Duplicate sample for timestamp", "series", string(met)) + sl.l.Debug("Duplicate sample for timestamp", "series", string(met)) sl.metrics.targetScrapeSampleDuplicate.Inc() return false, nil case errors.Is(err, storage.ErrOutOfBounds): appErrs.numOutOfBounds++ - level.Debug(sl.l).Log("msg", "Out of bounds metric", "series", string(met)) + sl.l.Debug("Out of bounds metric", "series", string(met)) sl.metrics.targetScrapeSampleOutOfBounds.Inc() return false, nil case errors.Is(err, errSampleLimit): @@ -1891,7 +1984,7 @@ func (sl *scrapeLoop) report(app storage.Appender, start time.Time, duration tim func (sl *scrapeLoop) reportStale(app storage.Appender, start time.Time) (err error) { ts := timestamp.FromTime(start) - + app.SetOptions(&storage.AppendOptions{DiscardOutOfOrder: true}) stale := math.Float64frombits(value.StaleNaN) b := labels.NewBuilder(labels.EmptyLabels()) diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index b703f21d46..f8d2175244 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -29,16 +29,18 @@ import ( "strings" "sync" "testing" + "text/template" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" + "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" dto "github.com/prometheus/client_model/go" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -53,6 +55,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" + "github.com/prometheus/prometheus/util/logging" "github.com/prometheus/prometheus/util/pool" "github.com/prometheus/prometheus/util/teststorage" "github.com/prometheus/prometheus/util/testutil" @@ -62,10 +65,15 @@ func TestMain(m *testing.M) { testutil.TolerantVerifyLeak(m) } -func newTestScrapeMetrics(t testing.TB) *scrapeMetrics { +func newTestRegistryAndScrapeMetrics(t testing.TB) (*prometheus.Registry, *scrapeMetrics) { reg := prometheus.NewRegistry() metrics, err := newScrapeMetrics(reg) require.NoError(t, err) + return reg, metrics +} + +func newTestScrapeMetrics(t testing.TB) *scrapeMetrics { + _, metrics := newTestRegistryAndScrapeMetrics(t) return metrics } @@ -83,6 +91,97 @@ func TestNewScrapePool(t *testing.T) { require.NotNil(t, sp.newLoop, "newLoop function not initialized.") } +func TestStorageHandlesOutOfOrderTimestamps(t *testing.T) { + // Test with default OutOfOrderTimeWindow (0) + t.Run("Out-Of-Order Sample Disabled", func(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + + runScrapeLoopTest(t, s, false) + }) + + // Test with specific OutOfOrderTimeWindow (600000) + t.Run("Out-Of-Order Sample Enabled", func(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + runScrapeLoopTest(t, s, true) + }) +} + +func runScrapeLoopTest(t *testing.T, s *teststorage.TestStorage, expectOutOfOrder bool) { + // Create an appender for adding samples to the storage. + app := s.Appender(context.Background()) + capp := &collectResultAppender{next: app} + sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return capp }, 0) + + // Current time for generating timestamps. + now := time.Now() + + // Calculate timestamps for the samples based on the current time. + now = now.Truncate(time.Minute) // round down the now timestamp to the nearest minute + timestampInorder1 := now + timestampOutOfOrder := now.Add(-5 * time.Minute) + timestampInorder2 := now.Add(5 * time.Minute) + + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "text/plain", timestampInorder1) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 2`), "text/plain", timestampOutOfOrder) + require.NoError(t, err) + + _, _, _, err = sl.append(slApp, []byte(`metric_a{a="1",b="1"} 3`), "text/plain", timestampInorder2) + require.NoError(t, err) + + require.NoError(t, slApp.Commit()) + + // Query the samples back from the storage. + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + // Use a matcher to filter the metric name. + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + + // Define the expected results + want := []floatSample{ + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder1), + f: 1, + }, + { + metric: labels.FromStrings("__name__", "metric_a", "a", "1", "b", "1"), + t: timestamp.FromTime(timestampInorder2), + f: 3, + }, + } + + if expectOutOfOrder { + require.NotEqual(t, want, results, "Expected results to include out-of-order sample:\n%s", results) + } else { + require.Equal(t, want, results, "Appended samples not as expected:\n%s", results) + } +} + func TestDroppedTargetsList(t *testing.T) { var ( app = &nopAppendable{} @@ -158,6 +257,9 @@ type testLoop struct { timeout time.Duration } +func (l *testLoop) setScrapeFailureLogger(*logging.JSONFileLogger) { +} + func (l *testLoop) run(errc chan<- error) { if l.runOnce { panic("loop must be started only once") @@ -273,6 +375,7 @@ func TestScrapePoolReload(t *testing.T) { return l } + reg, metrics := newTestRegistryAndScrapeMetrics(t) sp := &scrapePool{ appendable: &nopAppendable{}, activeTargets: map[uint64]*Target{}, @@ -280,7 +383,7 @@ func TestScrapePoolReload(t *testing.T) { newLoop: newLoop, logger: nil, client: http.DefaultClient, - metrics: newTestScrapeMetrics(t), + metrics: metrics, symbolTable: labels.NewSymbolTable(), } @@ -335,6 +438,12 @@ func TestScrapePoolReload(t *testing.T) { require.Equal(t, sp.activeTargets, beforeTargets, "Reloading affected target states unexpectedly") require.Len(t, sp.loops, numTargets, "Unexpected number of stopped loops after reload") + + got, err := gatherLabels(reg, "prometheus_target_reload_length_seconds") + require.NoError(t, err) + expectedName, expectedValue := "interval", "3s" + require.Equal(t, [][]*dto.LabelPair{{{Name: &expectedName, Value: &expectedValue}}}, got) + require.Equal(t, 1.0, prom_testutil.ToFloat64(sp.metrics.targetScrapePoolReloads)) } func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) { @@ -350,6 +459,7 @@ func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) { } return l } + reg, metrics := newTestRegistryAndScrapeMetrics(t) sp := &scrapePool{ appendable: &nopAppendable{}, activeTargets: map[uint64]*Target{ @@ -363,7 +473,7 @@ func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) { newLoop: newLoop, logger: nil, client: http.DefaultClient, - metrics: newTestScrapeMetrics(t), + metrics: metrics, symbolTable: labels.NewSymbolTable(), } @@ -371,6 +481,30 @@ func TestScrapePoolReloadPreserveRelabeledIntervalTimeout(t *testing.T) { if err != nil { t.Fatalf("unable to reload configuration: %s", err) } + // Check that the reload metric is labeled with the pool interval, not the overridden interval. + got, err := gatherLabels(reg, "prometheus_target_reload_length_seconds") + require.NoError(t, err) + expectedName, expectedValue := "interval", "3s" + require.Equal(t, [][]*dto.LabelPair{{{Name: &expectedName, Value: &expectedValue}}}, got) +} + +// Gather metrics from the provided Gatherer with specified familyName, +// and return all sets of name/value pairs. +func gatherLabels(g prometheus.Gatherer, familyName string) ([][]*dto.LabelPair, error) { + families, err := g.Gather() + if err != nil { + return nil, err + } + ret := make([][]*dto.LabelPair, 0) + for _, f := range families { + if f.GetName() == familyName { + for _, m := range f.GetMetric() { + ret = append(ret, m.GetLabel()) + } + break + } + } + return ret, nil } func TestScrapePoolTargetLimit(t *testing.T) { @@ -392,7 +526,7 @@ func TestScrapePoolTargetLimit(t *testing.T) { activeTargets: map[uint64]*Target{}, loops: map[uint64]loop{}, newLoop: newLoop, - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), client: http.DefaultClient, metrics: newTestScrapeMetrics(t), symbolTable: labels.NewSymbolTable(), @@ -437,7 +571,7 @@ func TestScrapePoolTargetLimit(t *testing.T) { lerr := l.(*testLoop).getForcedError() if shouldErr { require.Error(t, lerr, "error was expected for %d targets with a limit of %d", targets, limit) - require.Equal(t, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit), lerr.Error()) + require.EqualError(t, lerr, fmt.Sprintf("target_limit exceeded (number of targets: %d, limit: %d)", targets, limit)) } else { require.NoError(t, lerr) } @@ -659,6 +793,10 @@ func TestScrapePoolScrapeLoopsStarted(t *testing.T) { } func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app func(ctx context.Context) storage.Appender, interval time.Duration) *scrapeLoop { + return newBasicScrapeLoopWithFallback(t, ctx, scraper, app, interval, "") +} + +func newBasicScrapeLoopWithFallback(t testing.TB, ctx context.Context, scraper scraper, app func(ctx context.Context) storage.Appender, interval time.Duration, fallback string) *scrapeLoop { return newScrapeLoop(ctx, scraper, nil, nil, @@ -680,11 +818,13 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, false, false, + false, nil, false, newTestScrapeMetrics(t), false, model.LegacyValidation, + fallback, ) } @@ -745,7 +885,8 @@ func TestScrapeLoopStop(t *testing.T) { app = func(ctx context.Context) storage.Appender { return appender } ) - sl := newBasicScrapeLoop(t, context.Background(), scraper, app, 10*time.Millisecond) + // Since we're writing samples directly below we need to provide a protocol fallback. + sl := newBasicScrapeLoopWithFallback(t, context.Background(), scraper, app, 10*time.Millisecond, "text/plain") // Terminate loop after 2 scrapes. numScrapes := 0 @@ -823,11 +964,13 @@ func TestScrapeLoopRun(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "", ) // The loop must terminate during the initial offset if the context @@ -909,7 +1052,7 @@ func TestScrapeLoopForcedErr(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) sl := newBasicScrapeLoop(t, ctx, scraper, app, time.Second) - forcedErr := fmt.Errorf("forced err") + forcedErr := errors.New("forced err") sl.setForcedError(forcedErr) scraper.scrapeFunc = func(context.Context, io.Writer) error { @@ -968,11 +1111,13 @@ func TestScrapeLoopMetadata(t *testing.T) { false, false, false, + false, nil, false, scrapeMetrics, false, model.LegacyValidation, + "", ) defer cancel() @@ -1023,7 +1168,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) { ctx, sl := simpleTestScrapeLoop(t) slApp := sl.appender(ctx) - total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\n"), "", time.Time{}) + total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\n"), "text/plain", time.Time{}) require.NoError(t, err) require.NoError(t, slApp.Commit()) require.Equal(t, 1, total) @@ -1031,7 +1176,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) { require.Equal(t, 1, seriesAdded) slApp = sl.appender(ctx) - total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\n"), "", time.Time{}) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\n"), "text/plain", time.Time{}) require.NoError(t, slApp.Commit()) require.NoError(t, err) require.Equal(t, 1, total) @@ -1040,6 +1185,7 @@ func TestScrapeLoopSeriesAdded(t *testing.T) { } func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { + model.NameValidationScheme = model.LegacyValidation s := teststorage.New(t) defer s.Close() ctx, cancel := context.WithCancel(context.Background()) @@ -1060,7 +1206,7 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { } slApp := sl.appender(ctx) - total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\n"), "", time.Time{}) + total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\n"), "text/plain", time.Time{}) require.ErrorContains(t, err, "invalid metric name or label names") require.NoError(t, slApp.Rollback()) require.Equal(t, 1, total) @@ -1084,7 +1230,7 @@ func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { sl.validationScheme = model.LegacyValidation slApp := sl.appender(ctx) - total, added, seriesAdded, err := sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{}) + total, added, seriesAdded, err := sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "text/plain", time.Time{}) require.ErrorContains(t, err, "invalid metric name or label names") require.NoError(t, slApp.Rollback()) require.Equal(t, 1, total) @@ -1095,7 +1241,7 @@ func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { sl.validationScheme = model.UTF8Validation slApp = sl.appender(ctx) - total, added, seriesAdded, err = sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{}) + total, added, seriesAdded, err = sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "text/plain", time.Time{}) require.NoError(t, err) require.Equal(t, 1, total) require.Equal(t, 1, added) @@ -1125,7 +1271,7 @@ func BenchmarkScrapeLoopAppend(b *testing.B) { for i := 0; i < b.N; i++ { ts = ts.Add(time.Second) - _, _, _, _ = sl.append(slApp, metrics, "", ts) + _, _, _, _ = sl.append(slApp, metrics, "text/plain", ts) } } @@ -1144,6 +1290,87 @@ func BenchmarkScrapeLoopAppendOM(b *testing.B) { } } +func TestSetOptionsHandlingStaleness(t *testing.T) { + s := teststorage.New(t, 600000) + defer s.Close() + + signal := make(chan struct{}, 1) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Function to run the scrape loop + runScrapeLoop := func(ctx context.Context, t *testing.T, cue int, action func(*scrapeLoop)) { + var ( + scraper = &testScraper{} + app = func(ctx context.Context) storage.Appender { + return s.Appender(ctx) + } + ) + sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + numScrapes := 0 + scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { + numScrapes++ + if numScrapes == cue { + action(sl) + } + w.Write([]byte(fmt.Sprintf("metric_a{a=\"1\",b=\"1\"} %d\n", 42+numScrapes))) + return nil + } + sl.run(nil) + } + go func() { + runScrapeLoop(ctx, t, 2, func(sl *scrapeLoop) { + go sl.stop() + // Wait a bit then start a new target. + time.Sleep(100 * time.Millisecond) + go func() { + runScrapeLoop(ctx, t, 4, func(_ *scrapeLoop) { + cancel() + }) + signal <- struct{}{} + }() + }) + }() + + select { + case <-signal: + case <-time.After(10 * time.Second): + t.Fatalf("Scrape wasn't stopped.") + } + + ctx1, cancel := context.WithCancel(context.Background()) + defer cancel() + + q, err := s.Querier(0, time.Now().UnixNano()) + + require.NoError(t, err) + defer q.Close() + + series := q.Select(ctx1, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "metric_a")) + + var results []floatSample + for series.Next() { + it := series.At().Iterator(nil) + for it.Next() == chunkenc.ValFloat { + t, v := it.At() + results = append(results, floatSample{ + metric: series.At().Labels(), + t: t, + f: v, + }) + } + require.NoError(t, it.Err()) + } + require.NoError(t, series.Err()) + var c int + for _, s := range results { + if value.IsStaleNaN(s.f) { + c++ + } + } + require.Equal(t, 0, c, "invalid count of staleness markers after stopping the engine") +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { appender := &collectResultAppender{} var ( @@ -1153,7 +1380,8 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrape(t *testing.T) { ) ctx, cancel := context.WithCancel(context.Background()) - sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + // Since we're writing samples directly below we need to provide a protocol fallback. + sl := newBasicScrapeLoopWithFallback(t, ctx, scraper, app, 10*time.Millisecond, "text/plain") // Succeed once, several failures, then stop. numScrapes := 0 @@ -1199,7 +1427,8 @@ func TestScrapeLoopRunCreatesStaleMarkersOnParseFailure(t *testing.T) { ) ctx, cancel := context.WithCancel(context.Background()) - sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + // Since we're writing samples directly below we need to provide a protocol fallback. + sl := newBasicScrapeLoopWithFallback(t, ctx, scraper, app, 10*time.Millisecond, "text/plain") // Succeed once, several failures, then stop. scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { @@ -1250,7 +1479,8 @@ func TestScrapeLoopCache(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) // Decreasing the scrape interval could make the test fail, as multiple scrapes might be initiated at identical millisecond timestamps. // See https://github.com/prometheus/prometheus/issues/12727. - sl := newBasicScrapeLoop(t, ctx, scraper, app, 100*time.Millisecond) + // Since we're writing samples directly below we need to provide a protocol fallback. + sl := newBasicScrapeLoopWithFallback(t, ctx, scraper, app, 100*time.Millisecond, "text/plain") numScrapes := 0 @@ -1279,7 +1509,7 @@ func TestScrapeLoopCache(t *testing.T) { case 4: cancel() } - return fmt.Errorf("scrape failed") + return errors.New("scrape failed") } go func() { @@ -1415,7 +1645,7 @@ func TestScrapeLoopAppend(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "", now) + _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1496,7 +1726,7 @@ func TestScrapeLoopAppendForConflictingPrefixedLabels(t *testing.T) { return mutateSampleLabels(l, &Target{labels: labels.FromStrings(tc.targetLabels...)}, false, nil) } slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC)) + _, _, _, err := sl.append(slApp, []byte(tc.exposedLabels), "text/plain", time.Date(2000, 1, 1, 1, 0, 0, 0, time.UTC)) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1520,7 +1750,8 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { fakeRef := storage.SeriesRef(1) expValue := float64(1) metric := []byte(`metric{n="1"} 1`) - p, warning := textparse.New(metric, "", false, labels.NewSymbolTable()) + p, warning := textparse.New(metric, "text/plain", "", false, false, labels.NewSymbolTable()) + require.NotNil(t, p) require.NoError(t, warning) var lset labels.Labels @@ -1533,7 +1764,7 @@ func TestScrapeLoopAppendCacheEntryButErrNotFound(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, metric, "", now) + _, _, _, err := sl.append(slApp, metric, "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1570,7 +1801,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - total, added, seriesAdded, err := sl.append(app, []byte("metric_a 1\nmetric_b 1\nmetric_c 1\n"), "", now) + total, added, seriesAdded, err := sl.append(app, []byte("metric_a 1\nmetric_b 1\nmetric_c 1\n"), "text/plain", now) require.ErrorIs(t, err, errSampleLimit) require.NoError(t, slApp.Rollback()) require.Equal(t, 3, total) @@ -1599,7 +1830,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) { now = time.Now() slApp = sl.appender(context.Background()) - total, added, seriesAdded, err = sl.append(slApp, []byte("metric_a 1\nmetric_b 1\nmetric_c{deleteme=\"yes\"} 1\nmetric_d 1\nmetric_e 1\nmetric_f 1\nmetric_g 1\nmetric_h{deleteme=\"yes\"} 1\nmetric_i{deleteme=\"yes\"} 1\n"), "", now) + total, added, seriesAdded, err = sl.append(slApp, []byte("metric_a 1\nmetric_b 1\nmetric_c{deleteme=\"yes\"} 1\nmetric_d 1\nmetric_e 1\nmetric_f 1\nmetric_g 1\nmetric_h{deleteme=\"yes\"} 1\nmetric_i{deleteme=\"yes\"} 1\n"), "text/plain", now) require.ErrorIs(t, err, errSampleLimit) require.NoError(t, slApp.Rollback()) require.Equal(t, 9, total) @@ -1727,12 +1958,12 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "", now) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1`), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) slApp = sl.appender(context.Background()) - _, _, _, err = sl.append(slApp, []byte(`metric_a{b="1",a="1"} 2`), "", now.Add(time.Minute)) + _, _, _, err = sl.append(slApp, []byte(`metric_a{b="1",a="1"} 2`), "text/plain", now.Add(time.Minute)) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1751,6 +1982,33 @@ func TestScrapeLoop_ChangingMetricString(t *testing.T) { require.Equal(t, want, capp.resultFloats, "Appended samples not as expected:\n%s", appender) } +func TestScrapeLoopAppendFailsWithNoContentType(t *testing.T) { + app := &collectResultAppender{} + + // Explicitly setting the lack of fallback protocol here to make it obvious. + sl := newBasicScrapeLoopWithFallback(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0, "") + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte("metric_a 1\n"), "", now) + // We expect the appropriate error. + require.ErrorContains(t, err, "non-compliant scrape target sending blank Content-Type and no fallback_scrape_protocol specified for target", "Expected \"non-compliant scrape\" error but got: %s", err) +} + +func TestScrapeLoopAppendEmptyWithNoContentType(t *testing.T) { + // This test ensures we there are no errors when we get a blank scrape or just want to append a stale marker. + app := &collectResultAppender{} + + // Explicitly setting the lack of fallback protocol here to make it obvious. + sl := newBasicScrapeLoopWithFallback(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0, "") + + now := time.Now() + slApp := sl.appender(context.Background()) + _, _, _, err := sl.append(slApp, []byte(""), "", now) + require.NoError(t, err) + require.NoError(t, slApp.Commit()) +} + func TestScrapeLoopAppendStaleness(t *testing.T) { app := &collectResultAppender{} @@ -1758,7 +2016,7 @@ func TestScrapeLoopAppendStaleness(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte("metric_a 1\n"), "", now) + _, _, _, err := sl.append(slApp, []byte("metric_a 1\n"), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1787,7 +2045,7 @@ func TestScrapeLoopAppendNoStalenessIfTimestamp(t *testing.T) { sl := newBasicScrapeLoop(t, context.Background(), nil, func(ctx context.Context) storage.Appender { return app }, 0) now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "", now) + _, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1813,7 +2071,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "", now) + _, _, _, err := sl.append(slApp, []byte("metric_a 1 1000\n"), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -1840,7 +2098,7 @@ func TestScrapeLoopAppendStalenessIfTrackTimestampStaleness(t *testing.T) { func TestScrapeLoopAppendExemplar(t *testing.T) { tests := []struct { title string - scrapeClassicHistograms bool + alwaysScrapeClassicHist bool enableNativeHistogramsIngestion bool scrapeText string contentType string @@ -1994,7 +2252,8 @@ metric: < `, contentType: "application/vnd.google.protobuf", histograms: []histogramSample{{ - t: 1234568, + t: 1234568, + metric: labels.FromStrings("__name__", "test_histogram"), h: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2108,7 +2367,7 @@ metric: < > `, - scrapeClassicHistograms: true, + alwaysScrapeClassicHist: true, contentType: "application/vnd.google.protobuf", floats: []floatSample{ {metric: labels.FromStrings("__name__", "test_histogram_count"), t: 1234568, f: 175}, @@ -2120,7 +2379,8 @@ metric: < {metric: labels.FromStrings("__name__", "test_histogram_bucket", "le", "+Inf"), t: 1234568, f: 175}, }, histograms: []histogramSample{{ - t: 1234568, + t: 1234568, + metric: labels.FromStrings("__name__", "test_histogram"), h: &histogram.Histogram{ Count: 175, ZeroCount: 2, @@ -2169,7 +2429,7 @@ metric: < sl.reportSampleMutator = func(l labels.Labels) labels.Labels { return mutateReportSampleLabels(l, discoveryLabels) } - sl.scrapeClassicHistograms = test.scrapeClassicHistograms + sl.alwaysScrapeClassicHist = test.alwaysScrapeClassicHist now := time.Now() @@ -2331,7 +2591,7 @@ func TestScrapeLoopAppendGracefullyIfAmendOrOutOfOrderOrOutOfBounds(t *testing.T now := time.Unix(1, 0) slApp := sl.appender(context.Background()) - total, added, seriesAdded, err := sl.append(slApp, []byte("out_of_order 1\namend 1\nnormal 1\nout_of_bounds 1\n"), "", now) + total, added, seriesAdded, err := sl.append(slApp, []byte("out_of_order 1\namend 1\nnormal 1\nout_of_bounds 1\n"), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -2362,7 +2622,7 @@ func TestScrapeLoopOutOfBoundsTimeError(t *testing.T) { now := time.Now().Add(20 * time.Minute) slApp := sl.appender(context.Background()) - total, added, seriesAdded, err := sl.append(slApp, []byte("normal 1\n"), "", now) + total, added, seriesAdded, err := sl.append(slApp, []byte("normal 1\n"), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) require.Equal(t, 1, total) @@ -2376,20 +2636,34 @@ func TestTargetScraperScrapeOK(t *testing.T) { expectedTimeout = "1.5" ) - var protobufParsing bool - var allowUTF8 bool + var ( + protobufParsing bool + allowUTF8 bool + qValuePattern = regexp.MustCompile(`q=([0-9]+(\.\d+)?)`) + ) server := httptest.NewServer( http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { accept := r.Header.Get("Accept") if allowUTF8 { - require.Truef(t, strings.Contains(accept, "escaping=allow-utf-8"), "Expected Accept header to allow utf8, got %q", accept) + require.Containsf(t, accept, "escaping=allow-utf-8", "Expected Accept header to allow utf8, got %q", accept) } if protobufParsing { require.True(t, strings.HasPrefix(accept, "application/vnd.google.protobuf;"), "Expected Accept header to prefer application/vnd.google.protobuf.") } + contentTypes := strings.Split(accept, ",") + for _, ct := range contentTypes { + match := qValuePattern.FindStringSubmatch(ct) + require.Len(t, match, 3) + qValue, err := strconv.ParseFloat(match[1], 64) + require.NoError(t, err, "Error parsing q value") + require.GreaterOrEqual(t, qValue, float64(0)) + require.LessOrEqual(t, qValue, float64(1)) + require.LessOrEqual(t, len(strings.Split(match[1], ".")[1]), 3, "q value should have at most 3 decimal places") + } + timeout := r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds") require.Equal(t, expectedTimeout, timeout, "Expected scrape timeout header.") @@ -2528,7 +2802,7 @@ func TestTargetScrapeScrapeNotFound(t *testing.T) { resp, err := ts.scrape(context.Background()) require.NoError(t, err) _, err = ts.readResponse(context.Background(), resp, io.Discard) - require.Contains(t, err.Error(), "404", "Expected \"404 NotFound\" error but got: %s", err) + require.ErrorContains(t, err, "404", "Expected \"404 NotFound\" error but got: %s", err) } func TestTargetScraperBodySizeLimit(t *testing.T) { @@ -2648,7 +2922,7 @@ func TestScrapeLoop_RespectTimestamps(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1 0`), "", now) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1 0`), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -2675,7 +2949,7 @@ func TestScrapeLoop_DiscardTimestamps(t *testing.T) { now := time.Now() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1 0`), "", now) + _, _, _, err := sl.append(slApp, []byte(`metric_a{a="1",b="1"} 1 0`), "text/plain", now) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -2699,7 +2973,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { // We add a good and a bad metric to check that both are discarded. slApp := sl.appender(ctx) - _, _, _, err := sl.append(slApp, []byte("test_metric{le=\"500\"} 1\ntest_metric{le=\"600\",le=\"700\"} 1\n"), "", time.Time{}) + _, _, _, err := sl.append(slApp, []byte("test_metric{le=\"500\"} 1\ntest_metric{le=\"600\",le=\"700\"} 1\n"), "text/plain", time.Time{}) require.Error(t, err) require.NoError(t, slApp.Rollback()) // We need to cycle staleness cache maps after a manual rollback. Otherwise they will have old entries in them, @@ -2714,7 +2988,7 @@ func TestScrapeLoopDiscardDuplicateLabels(t *testing.T) { // We add a good metric to check that it is recorded. slApp = sl.appender(ctx) - _, _, _, err = sl.append(slApp, []byte("test_metric{le=\"500\"} 1\n"), "", time.Time{}) + _, _, _, err = sl.append(slApp, []byte("test_metric{le=\"500\"} 1\n"), "text/plain", time.Time{}) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -2743,7 +3017,7 @@ func TestScrapeLoopDiscardUnnamedMetrics(t *testing.T) { defer cancel() slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte("nok 1\nnok2{drop=\"drop\"} 1\n"), "", time.Time{}) + _, _, _, err := sl.append(slApp, []byte("nok 1\nnok2{drop=\"drop\"} 1\n"), "text/plain", time.Time{}) require.Error(t, err) require.NoError(t, slApp.Rollback()) require.Equal(t, errNameLabelMandatory, err) @@ -2989,7 +3263,7 @@ func TestScrapeAddFast(t *testing.T) { defer cancel() slApp := sl.appender(ctx) - _, _, _, err := sl.append(slApp, []byte("up 1\n"), "", time.Time{}) + _, _, _, err := sl.append(slApp, []byte("up 1\n"), "text/plain", time.Time{}) require.NoError(t, err) require.NoError(t, slApp.Commit()) @@ -3000,7 +3274,7 @@ func TestScrapeAddFast(t *testing.T) { } slApp = sl.appender(ctx) - _, _, _, err = sl.append(slApp, []byte("up 1\n"), "", time.Time{}.Add(time.Second)) + _, _, _, err = sl.append(slApp, []byte("up 1\n"), "text/plain", time.Time{}.Add(time.Second)) require.NoError(t, err) require.NoError(t, slApp.Commit()) } @@ -3040,7 +3314,7 @@ func TestReuseCacheRace(t *testing.T) { func TestCheckAddError(t *testing.T) { var appErrs appendErrors - sl := scrapeLoop{l: log.NewNopLogger(), metrics: newTestScrapeMetrics(t)} + sl := scrapeLoop{l: promslog.NewNopLogger(), metrics: newTestScrapeMetrics(t)} sl.checkAddError(nil, storage.ErrOutOfOrderSample, nil, nil, &appErrs) require.Equal(t, 1, appErrs.numOutOfOrder) } @@ -3055,14 +3329,15 @@ func TestScrapeReportSingleAppender(t *testing.T) { ) ctx, cancel := context.WithCancel(context.Background()) - sl := newBasicScrapeLoop(t, ctx, scraper, s.Appender, 10*time.Millisecond) + // Since we're writing samples directly below we need to provide a protocol fallback. + sl := newBasicScrapeLoopWithFallback(t, ctx, scraper, s.Appender, 10*time.Millisecond, "text/plain") numScrapes := 0 scraper.scrapeFunc = func(ctx context.Context, w io.Writer) error { numScrapes++ if numScrapes%4 == 0 { - return fmt.Errorf("scrape failed") + return errors.New("scrape failed") } w.Write([]byte("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n")) return nil @@ -3111,18 +3386,7 @@ func TestScrapeReportLimit(t *testing.T) { ScrapeTimeout: model.Duration(100 * time.Millisecond), } - var ( - scrapes int - scrapedTwice = make(chan bool) - ) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, "metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") - scrapes++ - if scrapes == 2 { - close(scrapedTwice) - } - })) + ts, scrapedTwice := newScrapableServer("metric_a 44\nmetric_b 44\nmetric_c 44\nmetric_d 44\n") defer ts.Close() sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3165,6 +3429,52 @@ func TestScrapeReportLimit(t *testing.T) { require.True(t, found) } +func TestScrapeUTF8(t *testing.T) { + s := teststorage.New(t) + defer s.Close() + model.NameValidationScheme = model.UTF8Validation + t.Cleanup(func() { model.NameValidationScheme = model.LegacyValidation }) + + cfg := &config.ScrapeConfig{ + JobName: "test", + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + MetricNameValidationScheme: config.UTF8ValidationConfig, + } + ts, scrapedTwice := newScrapableServer("{\"with.dots\"} 42\n") + defer ts.Close() + + sp, err := newScrapePool(cfg, s, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped twice") + case <-scrapedTwice: + // If the target has been scraped twice, report samples from the first + // scrape have been inserted in the database. + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := s.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "with.dots")) + + require.True(t, series.Next(), "series not found in tsdb") +} + func TestScrapeLoopLabelLimit(t *testing.T) { tests := []struct { title string @@ -3247,7 +3557,7 @@ func TestScrapeLoopLabelLimit(t *testing.T) { sl.labelLimits = &test.labelLimits slApp := sl.appender(context.Background()) - _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "", time.Now()) + _, _, _, err := sl.append(slApp, []byte(test.scrapeLabels), "text/plain", time.Now()) t.Logf("Test:%s", test.title) if test.expectErr { @@ -3361,16 +3671,7 @@ test_summary_count 199 // The expected "quantile" values do not have the trailing ".0". expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"} - scrapeCount := 0 - scraped := make(chan bool) - - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - fmt.Fprint(w, metricsText) - scrapeCount++ - if scrapeCount > 2 { - close(scraped) - } - })) + ts, scrapedTwice := newScrapableServer(metricsText) defer ts.Close() sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) @@ -3389,7 +3690,7 @@ test_summary_count 199 select { case <-time.After(5 * time.Second): t.Fatalf("target was not scraped") - case <-scraped: + case <-scrapedTwice: } ctx, cancel := context.WithCancel(context.Background()) @@ -3421,6 +3722,524 @@ test_summary_count 199 checkValues("quantile", expectedQuantileValues, series) } +// Testing whether we can automatically convert scraped classic histograms into native histograms with custom buckets. +func TestConvertClassicHistogramsToNHCB(t *testing.T) { + genTestCounterText := func(name string, value int, withMetadata bool) string { + if withMetadata { + return fmt.Sprintf(` +# HELP %s some help text +# TYPE %s counter +%s{address="0.0.0.0",port="5001"} %d +`, name, name, name, value) + } + return fmt.Sprintf(` +%s %d +`, name, value) + } + genTestHistText := func(name string, withMetadata bool) string { + data := map[string]interface{}{ + "name": name, + } + b := &bytes.Buffer{} + if withMetadata { + template.Must(template.New("").Parse(` +# HELP {{.name}} This is a histogram with default buckets +# TYPE {{.name}} histogram +`)).Execute(b, data) + } + template.Must(template.New("").Parse(` +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.005"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.01"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.025"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.05"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.25"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="0.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="1"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="2.5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="5"} 0 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="10"} 1 +{{.name}}_bucket{address="0.0.0.0",port="5001",le="+Inf"} 1 +{{.name}}_sum{address="0.0.0.0",port="5001"} 10 +{{.name}}_count{address="0.0.0.0",port="5001"} 1 +`)).Execute(b, data) + return b.String() + } + genTestCounterProto := func(name string, value int) string { + return fmt.Sprintf(` +name: "%s" +help: "some help text" +type: COUNTER +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + counter: < + value: %d + > +> +`, name, value) + } + genTestHistProto := func(name string, hasClassic, hasExponential bool) string { + var classic string + if hasClassic { + classic = ` +bucket: < + cumulative_count: 0 + upper_bound: 0.005 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.01 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.025 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.05 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.1 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.25 +> +bucket: < + cumulative_count: 0 + upper_bound: 0.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 1 +> +bucket: < + cumulative_count: 0 + upper_bound: 2.5 +> +bucket: < + cumulative_count: 0 + upper_bound: 5 +> +bucket: < + cumulative_count: 1 + upper_bound: 10 +>` + } + var expo string + if hasExponential { + expo = ` +schema: 3 +zero_threshold: 2.938735877055719e-39 +zero_count: 0 +positive_span: < + offset: 2 + length: 1 +> +positive_delta: 1` + } + return fmt.Sprintf(` +name: "%s" +help: "This is a histogram with default buckets" +type: HISTOGRAM +metric: < + label: < + name: "address" + value: "0.0.0.0" + > + label: < + name: "port" + value: "5001" + > + histogram: < + sample_count: 1 + sample_sum: 10 + %s + %s + > + timestamp_ms: 1234568 +> +`, name, classic, expo) + } + + metricsTexts := map[string]struct { + text []string + contentType string + hasClassic bool + hasExponential bool + }{ + "text": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + genTestHistText("test_histogram_3", true), + }, + hasClassic: true, + }, + "text, in different order": { + text: []string{ + genTestCounterText("test_metric_1", 1, true), + genTestCounterText("test_metric_1_count", 1, true), + genTestCounterText("test_metric_1_sum", 1, true), + genTestCounterText("test_metric_1_bucket", 1, true), + genTestHistText("test_histogram_1", true), + genTestCounterText("test_metric_2", 1, true), + genTestCounterText("test_metric_2_count", 1, true), + genTestCounterText("test_metric_2_sum", 1, true), + genTestCounterText("test_metric_2_bucket", 1, true), + genTestHistText("test_histogram_2", true), + genTestHistText("test_histogram_3", true), + genTestCounterText("test_metric_3", 1, true), + genTestCounterText("test_metric_3_count", 1, true), + genTestCounterText("test_metric_3_sum", 1, true), + genTestCounterText("test_metric_3_bucket", 1, true), + }, + hasClassic: true, + }, + "protobuf": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, in different order": { + text: []string{ + genTestHistProto("test_histogram_1", true, false), + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_2", true, false), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_3", true, false), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + }, + "protobuf, with additional native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", true, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", true, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", true, true), + }, + contentType: "application/vnd.google.protobuf", + hasClassic: true, + hasExponential: true, + }, + "protobuf, with only native exponential histogram": { + text: []string{ + genTestCounterProto("test_metric_1", 1), + genTestCounterProto("test_metric_1_count", 1), + genTestCounterProto("test_metric_1_sum", 1), + genTestCounterProto("test_metric_1_bucket", 1), + genTestHistProto("test_histogram_1", false, true), + genTestCounterProto("test_metric_2", 1), + genTestCounterProto("test_metric_2_count", 1), + genTestCounterProto("test_metric_2_sum", 1), + genTestCounterProto("test_metric_2_bucket", 1), + genTestHistProto("test_histogram_2", false, true), + genTestCounterProto("test_metric_3", 1), + genTestCounterProto("test_metric_3_count", 1), + genTestCounterProto("test_metric_3_sum", 1), + genTestCounterProto("test_metric_3_bucket", 1), + genTestHistProto("test_histogram_3", false, true), + }, + contentType: "application/vnd.google.protobuf", + hasExponential: true, + }, + } + + checkBucketValues := func(expectedCount int, series storage.SeriesSet) { + labelName := "le" + var expectedValues []string + if expectedCount > 0 { + expectedValues = []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1.0", "2.5", "5.0", "10.0", "+Inf"} + } + foundLeValues := map[string]bool{} + + for series.Next() { + s := series.At() + v := s.Labels().Get(labelName) + require.NotContains(t, foundLeValues, v, "duplicate label value found") + foundLeValues[v] = true + } + + require.Equal(t, len(expectedValues), len(foundLeValues), "unexpected number of label values, expected %v but found %v", expectedValues, foundLeValues) + for _, v := range expectedValues { + require.Contains(t, foundLeValues, v, "label value not found") + } + } + + // Checks that the expected series is present and runs a basic sanity check of the float values. + checkFloatSeries := func(series storage.SeriesSet, expectedCount int, expectedFloat float64) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + _, f := i.At() + require.Equal(t, expectedFloat, f) + case chunkenc.ValHistogram: + panic("unexpected value type: histogram") + case chunkenc.ValFloatHistogram: + panic("unexpected value type: float histogram") + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of float series not as expected") + } + + // Checks that the expected series is present and runs a basic sanity check of the histogram values. + checkHistSeries := func(series storage.SeriesSet, expectedCount int, expectedSchema int32) { + count := 0 + for series.Next() { + i := series.At().Iterator(nil) + loop: + for { + switch i.Next() { + case chunkenc.ValNone: + break loop + case chunkenc.ValFloat: + panic("unexpected value type: float") + case chunkenc.ValHistogram: + _, h := i.AtHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + case chunkenc.ValFloatHistogram: + _, h := i.AtFloatHistogram(nil) + require.Equal(t, expectedSchema, h.Schema) + require.Equal(t, uint64(1), h.Count) + require.Equal(t, 10.0, h.Sum) + default: + panic("unexpected value type") + } + } + count++ + } + require.Equal(t, expectedCount, count, "number of histogram series not as expected") + } + + for metricsTextName, metricsText := range metricsTexts { + for name, tc := range map[string]struct { + alwaysScrapeClassicHistograms bool + convertClassicHistToNHCB bool + }{ + "convert with scrape": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: true, + }, + "convert without scrape": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: true, + }, + "scrape without convert": { + alwaysScrapeClassicHistograms: true, + convertClassicHistToNHCB: false, + }, + "neither scrape nor convert": { + alwaysScrapeClassicHistograms: false, + convertClassicHistToNHCB: false, + }, + } { + var expectedClassicHistCount, expectedNativeHistCount int + var expectCustomBuckets bool + if metricsText.hasExponential { + expectedNativeHistCount = 1 + expectCustomBuckets = false + expectedClassicHistCount = 0 + if metricsText.hasClassic && tc.alwaysScrapeClassicHistograms { + expectedClassicHistCount = 1 + } + } else if metricsText.hasClassic { + switch { + case tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.alwaysScrapeClassicHistograms && tc.convertClassicHistToNHCB: + expectedClassicHistCount = 0 + expectedNativeHistCount = 1 + expectCustomBuckets = true + case !tc.convertClassicHistToNHCB: + expectedClassicHistCount = 1 + expectedNativeHistCount = 0 + } + } + + t.Run(fmt.Sprintf("%s with %s", name, metricsTextName), func(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(50 * time.Millisecond), + ScrapeTimeout: model.Duration(25 * time.Millisecond), + AlwaysScrapeClassicHistograms: tc.alwaysScrapeClassicHistograms, + ConvertClassicHistogramsToNHCB: tc.convertClassicHistToNHCB, + } + + scrapeCount := 0 + scraped := make(chan bool) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if metricsText.contentType != "" { + w.Header().Set("Content-Type", `application/vnd.google.protobuf; proto=io.prometheus.client.MetricFamily; encoding=delimited`) + for _, text := range metricsText.text { + buf := &bytes.Buffer{} + // In case of protobuf, we have to create the binary representation. + pb := &dto.MetricFamily{} + // From text to proto message. + require.NoError(t, proto.UnmarshalText(text, pb)) + // From proto message to binary protobuf. + protoBuf, err := proto.Marshal(pb) + require.NoError(t, err) + + // Write first length, then binary protobuf. + varintBuf := binary.AppendUvarint(nil, uint64(len(protoBuf))) + buf.Write(varintBuf) + buf.Write(protoBuf) + w.Write(buf.Bytes()) + } + } else { + for _, text := range metricsText.text { + fmt.Fprint(w, text) + } + } + scrapeCount++ + if scrapeCount > 2 { + close(scraped) + } + })) + defer ts.Close() + + sp, err := newScrapePool(config, simpleStorage, 0, nil, nil, &Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Len(t, sp.ActiveTargets(), 1) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + var series storage.SeriesSet + + for i := 1; i <= 3; i++ { + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_count", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_sum", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_metric_%d_bucket", i))) + checkFloatSeries(series, 1, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_count", i))) + checkFloatSeries(series, expectedClassicHistCount, 1.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_sum", i))) + checkFloatSeries(series, expectedClassicHistCount, 10.) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d_bucket", i))) + checkBucketValues(expectedClassicHistCount, series) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", fmt.Sprintf("test_histogram_%d", i))) + + var expectedSchema int32 + if expectCustomBuckets { + expectedSchema = histogram.CustomBucketsSchema + } else { + expectedSchema = 3 + } + checkHistSeries(series, expectedNativeHistCount, expectedSchema) + } + }) + } + } +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { appender := &collectResultAppender{} var ( @@ -3430,7 +4249,8 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * ) ctx, cancel := context.WithCancel(context.Background()) - sl := newBasicScrapeLoop(t, ctx, scraper, app, 10*time.Millisecond) + // Since we're writing samples directly below we need to provide a protocol fallback. + sl := newBasicScrapeLoopWithFallback(t, ctx, scraper, app, 10*time.Millisecond, "text/plain") sl.trackTimestampsStaleness = true // Succeed once, several failures, then stop. numScrapes := 0 @@ -3458,7 +4278,6 @@ func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t * case <-time.After(5 * time.Second): t.Fatalf("Scrape wasn't stopped.") } - // 1 successfully scraped sample, 1 stale marker after first fail, 5 report samples for // each scrape successful or not. require.Len(t, appender.resultFloats, 27, "Appended samples not as expected:\n%s", appender) @@ -3676,7 +4495,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { ctx, sl := simpleTestScrapeLoop(t) slApp := sl.appender(ctx) - total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\ntest_metric 2\ntest_metric 3\n"), "", time.Time{}) + total, added, seriesAdded, err := sl.append(slApp, []byte("test_metric 1\ntest_metric 2\ntest_metric 3\n"), "text/plain", time.Time{}) require.NoError(t, err) require.NoError(t, slApp.Commit()) require.Equal(t, 3, total) @@ -3685,7 +4504,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { require.Equal(t, 2.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate)) slApp = sl.appender(ctx) - total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\ntest_metric 1\ntest_metric 1\n"), "", time.Time{}) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\ntest_metric 1\ntest_metric 1\n"), "text/plain", time.Time{}) require.NoError(t, err) require.NoError(t, slApp.Commit()) require.Equal(t, 3, total) @@ -3695,7 +4514,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) { // When different timestamps are supplied, multiple samples are accepted. slApp = sl.appender(ctx) - total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1 1001\ntest_metric 1 1002\ntest_metric 1 1003\n"), "", time.Time{}) + total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1 1001\ntest_metric 1 1002\ntest_metric 1 1003\n"), "text/plain", time.Time{}) require.NoError(t, err) require.NoError(t, slApp.Commit()) require.Equal(t, 3, total) @@ -3726,7 +4545,9 @@ func TestNativeHistogramMaxSchemaSet(t *testing.T) { }, } for name, tc := range testcases { + tc := tc t.Run(name, func(t *testing.T) { + t.Parallel() testNativeHistogramMaxSchemaSet(t, tc.minBucketFactor, tc.expectedSchema) }) } @@ -3768,8 +4589,9 @@ func testNativeHistogramMaxSchemaSet(t *testing.T, minBucketFactor string, expec // Create a scrape loop with the HTTP server as the target. configStr := fmt.Sprintf(` global: - scrape_interval: 1s - scrape_timeout: 1s + metric_name_validation_scheme: legacy + scrape_interval: 50ms + scrape_timeout: 25ms scrape_configs: - job_name: test %s @@ -3782,9 +4604,9 @@ scrape_configs: s.DB.EnableNativeHistograms() reg := prometheus.NewRegistry() - mng, err := NewManager(&Options{EnableNativeHistogramsIngestion: true}, nil, s, reg) + mng, err := NewManager(&Options{DiscoveryReloadInterval: model.Duration(10 * time.Millisecond), EnableNativeHistogramsIngestion: true}, nil, nil, s, reg) require.NoError(t, err) - cfg, err := config.Load(configStr, false, log.NewNopLogger()) + cfg, err := config.Load(configStr, promslog.NewNopLogger()) require.NoError(t, err) mng.ApplyConfig(cfg) tsets := make(chan map[string][]*targetgroup.Group) @@ -3813,7 +4635,7 @@ scrape_configs: countSeries++ } return countSeries > 0 - }, 15*time.Second, 100*time.Millisecond) + }, 5*time.Second, 100*time.Millisecond) // Check that native histogram schema is as expected. q, err := s.Querier(0, math.MaxInt64) @@ -3838,3 +4660,174 @@ scrape_configs: require.Equal(t, expectedSchema, h.Schema) } } + +func TestTargetScrapeConfigWithLabels(t *testing.T) { + const ( + configTimeout = 1500 * time.Millisecond + expectedTimeout = "1.5" + expectedTimeoutLabel = "1s500ms" + secondTimeout = 500 * time.Millisecond + secondTimeoutLabel = "500ms" + expectedParam = "value1" + secondParam = "value2" + expectedPath = "/metric-ok" + secondPath = "/metric-nok" + httpScheme = "http" + paramLabel = "__param_param" + jobName = "test" + ) + + createTestServer := func(t *testing.T, done chan struct{}) *url.URL { + server := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer close(done) + require.Equal(t, expectedTimeout, r.Header.Get("X-Prometheus-Scrape-Timeout-Seconds")) + require.Equal(t, expectedParam, r.URL.Query().Get("param")) + require.Equal(t, expectedPath, r.URL.Path) + + w.Header().Set("Content-Type", `text/plain; version=0.0.4`) + w.Write([]byte("metric_a 1\nmetric_b 2\n")) + }), + ) + t.Cleanup(server.Close) + serverURL, err := url.Parse(server.URL) + require.NoError(t, err) + return serverURL + } + + run := func(t *testing.T, cfg *config.ScrapeConfig, targets []*targetgroup.Group) chan struct{} { + done := make(chan struct{}) + srvURL := createTestServer(t, done) + + // Update target addresses to use the dynamically created server URL. + for _, target := range targets { + for i := range target.Targets { + target.Targets[i][model.AddressLabel] = model.LabelValue(srvURL.Host) + } + } + + sp, err := newScrapePool(cfg, &nopAppendable{}, 0, nil, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + t.Cleanup(sp.stop) + + sp.Sync(targets) + return done + } + + cases := []struct { + name string + cfg *config.ScrapeConfig + targets []*targetgroup.Group + }{ + { + name: "Everything in scrape config", + cfg: &config.ScrapeConfig{ + ScrapeInterval: model.Duration(2 * time.Second), + ScrapeTimeout: model.Duration(configTimeout), + Params: url.Values{"param": []string{expectedParam}}, + JobName: jobName, + Scheme: httpScheme, + MetricsPath: expectedPath, + }, + targets: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: model.LabelValue("")}, + }, + }, + }, + }, + { + name: "Overridden in target", + cfg: &config.ScrapeConfig{ + ScrapeInterval: model.Duration(2 * time.Second), + ScrapeTimeout: model.Duration(secondTimeout), + JobName: jobName, + Scheme: httpScheme, + MetricsPath: secondPath, + Params: url.Values{"param": []string{secondParam}}, + }, + targets: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + model.AddressLabel: model.LabelValue(""), + model.ScrapeTimeoutLabel: expectedTimeoutLabel, + model.MetricsPathLabel: expectedPath, + paramLabel: expectedParam, + }, + }, + }, + }, + }, + { + name: "Overridden in relabel_config", + cfg: &config.ScrapeConfig{ + ScrapeInterval: model.Duration(2 * time.Second), + ScrapeTimeout: model.Duration(secondTimeout), + JobName: jobName, + Scheme: httpScheme, + MetricsPath: secondPath, + Params: url.Values{"param": []string{secondParam}}, + RelabelConfigs: []*relabel.Config{ + { + Action: relabel.DefaultRelabelConfig.Action, + Regex: relabel.DefaultRelabelConfig.Regex, + SourceLabels: relabel.DefaultRelabelConfig.SourceLabels, + TargetLabel: model.ScrapeTimeoutLabel, + Replacement: expectedTimeoutLabel, + }, + { + Action: relabel.DefaultRelabelConfig.Action, + Regex: relabel.DefaultRelabelConfig.Regex, + SourceLabels: relabel.DefaultRelabelConfig.SourceLabels, + TargetLabel: paramLabel, + Replacement: expectedParam, + }, + { + Action: relabel.DefaultRelabelConfig.Action, + Regex: relabel.DefaultRelabelConfig.Regex, + SourceLabels: relabel.DefaultRelabelConfig.SourceLabels, + TargetLabel: model.MetricsPathLabel, + Replacement: expectedPath, + }, + }, + }, + targets: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + { + model.AddressLabel: model.LabelValue(""), + model.ScrapeTimeoutLabel: secondTimeoutLabel, + model.MetricsPathLabel: secondPath, + paramLabel: secondParam, + }, + }, + }, + }, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + select { + case <-run(t, c.cfg, c.targets): + case <-time.After(10 * time.Second): + t.Fatal("timeout after 10 seconds") + } + }) + } +} + +func newScrapableServer(scrapeText string) (s *httptest.Server, scrapedTwice chan bool) { + var scrapes int + scrapedTwice = make(chan bool) + + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, scrapeText) + scrapes++ + if scrapes == 2 { + close(scrapedTwice) + } + })), scrapedTwice +} diff --git a/scrape/target.go b/scrape/target.go index 9ef4471fbd..06d4737ff9 100644 --- a/scrape/target.go +++ b/scrape/target.go @@ -17,7 +17,6 @@ import ( "errors" "fmt" "hash/fnv" - "net" "net/url" "strings" "sync" @@ -424,7 +423,7 @@ func (app *maxSchemaAppender) AppendHistogram(ref storage.SeriesRef, lset labels // PopulateLabels builds a label set from the given label set and scrape configuration. // It returns a label set before relabeling was applied as the second return value. // Returns the original discovered label set found before relabelling was applied if the target is dropped during relabeling. -func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort bool) (res, orig labels.Labels, err error) { +func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig) (res, orig labels.Labels, err error) { // Copy labels into the labelset for the target if they are not set already. scrapeLabels := []labels.Label{ {Name: model.JobLabel, Value: cfg.JobName}, @@ -441,8 +440,8 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort } // Encode scrape query parameters as labels. for k, v := range cfg.Params { - if len(v) > 0 { - lb.Set(model.ParamLabelPrefix+k, v[0]) + if name := model.ParamLabelPrefix + k; len(v) > 0 && lb.Get(name) == "" { + lb.Set(name, v[0]) } } @@ -457,51 +456,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort return labels.EmptyLabels(), labels.EmptyLabels(), errors.New("no address") } - // addPort checks whether we should add a default port to the address. - // If the address is not valid, we don't append a port either. - addPort := func(s string) (string, string, bool) { - // If we can split, a port exists and we don't have to add one. - if host, port, err := net.SplitHostPort(s); err == nil { - return host, port, false - } - // If adding a port makes it valid, the previous error - // was not due to an invalid address and we can append a port. - _, _, err := net.SplitHostPort(s + ":1234") - return "", "", err == nil - } - addr := lb.Get(model.AddressLabel) - scheme := lb.Get(model.SchemeLabel) - host, port, add := addPort(addr) - // If it's an address with no trailing port, infer it based on the used scheme - // unless the no-default-scrape-port feature flag is present. - if !noDefaultPort && add { - // Addresses reaching this point are already wrapped in [] if necessary. - switch scheme { - case "http", "": - addr += ":80" - case "https": - addr += ":443" - default: - return labels.EmptyLabels(), labels.EmptyLabels(), fmt.Errorf("invalid scheme: %q", cfg.Scheme) - } - lb.Set(model.AddressLabel, addr) - } - - if noDefaultPort { - // If it's an address with a trailing default port and the - // no-default-scrape-port flag is present, remove the port. - switch port { - case "80": - if scheme == "http" { - lb.Set(model.AddressLabel, host) - } - case "443": - if scheme == "https" { - lb.Set(model.AddressLabel, host) - } - } - } if err := config.CheckTargetAddress(model.LabelValue(addr)); err != nil { return labels.EmptyLabels(), labels.EmptyLabels(), err @@ -557,7 +512,7 @@ func PopulateLabels(lb *labels.Builder, cfg *config.ScrapeConfig, noDefaultPort } // TargetsFromGroup builds targets based on the given TargetGroup and config. -func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefaultPort bool, targets []*Target, lb *labels.Builder) ([]*Target, []error) { +func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, targets []*Target, lb *labels.Builder) ([]*Target, []error) { targets = targets[:0] failures := []error{} @@ -573,7 +528,7 @@ func TargetsFromGroup(tg *targetgroup.Group, cfg *config.ScrapeConfig, noDefault } } - lset, origLabels, err := PopulateLabels(lb, cfg, noDefaultPort) + lset, origLabels, err := PopulateLabels(lb, cfg) if err != nil { failures = append(failures, fmt.Errorf("instance %d in group %s: %w", i, tg, err)) } diff --git a/scrape/target_test.go b/scrape/target_test.go index 84fe078b2b..bd27952874 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -348,7 +348,7 @@ func TestTargetsFromGroup(t *testing.T) { ScrapeInterval: model.Duration(1 * time.Minute), } lb := labels.NewBuilder(labels.EmptyLabels()) - targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, false, nil, lb) + targets, failures := TargetsFromGroup(&targetgroup.Group{Targets: []model.LabelSet{{}, {model.AddressLabel: "localhost:9090"}}}, &cfg, nil, lb) require.Len(t, targets, 1) require.Len(t, failures, 1) require.EqualError(t, failures[0], expectedError) @@ -435,7 +435,7 @@ scrape_configs: lb := labels.NewBuilder(labels.EmptyLabels()) group := &targetgroup.Group{Targets: targets} for i := 0; i < b.N; i++ { - tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], false, tgets, lb) + tgets, _ = TargetsFromGroup(group, config.ScrapeConfigs[0], tgets, lb) if len(targets) != nTargets { b.Fatalf("Expected %d targets, got %d", nTargets, len(targets)) } diff --git a/scripts/compress_assets.sh b/scripts/compress_assets.sh index 6608677bbf..19e1e22486 100755 --- a/scripts/compress_assets.sh +++ b/scripts/compress_assets.sh @@ -4,6 +4,12 @@ set -euo pipefail +export STATIC_DIR=static +PREBUILT_ASSETS_STATIC_DIR=${PREBUILT_ASSETS_STATIC_DIR:-} +if [ -n "$PREBUILT_ASSETS_STATIC_DIR" ]; then + STATIC_DIR=$(realpath $PREBUILT_ASSETS_STATIC_DIR) +fi + cd web/ui cp embed.go.tmpl embed.go @@ -11,6 +17,19 @@ GZIP_OPTS="-fk" # gzip option '-k' may not always exist in the latest gzip available on different distros. if ! gzip -k -h &>/dev/null; then GZIP_OPTS="-f"; fi +mkdir -p static find static -type f -name '*.gz' -delete -find static -type f -exec gzip $GZIP_OPTS '{}' \; -print0 | xargs -0 -I % echo %.gz | sort | xargs echo //go:embed >> embed.go + +# Compress files from the prebuilt static directory and replicate the structure in the current static directory +find "${STATIC_DIR}" -type f ! -name '*.gz' -exec bash -c ' + for file; do + dest="${file#${STATIC_DIR}}" + mkdir -p "static/$(dirname "$dest")" + gzip '"$GZIP_OPTS"' "$file" -c > "static/${dest}.gz" + done +' bash {} + + +# Append the paths of gzipped files to embed.go +find static -type f -name '*.gz' -print0 | sort -z | xargs -0 echo //go:embed >> embed.go + echo var EmbedFS embed.FS >> embed.go diff --git a/scripts/get_module_version.sh b/scripts/get_module_version.sh new file mode 100755 index 0000000000..e385587044 --- /dev/null +++ b/scripts/get_module_version.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# if no version string is passed as an argument, read VERSION file +if [ $# -eq 0 ]; then + VERSION="$(< VERSION)" +else + VERSION=$1 +fi + + +# Remove leading 'v' if present +VERSION="${VERSION#v}" + +# Extract MAJOR, MINOR, and REST +MAJOR="${VERSION%%.*}" +MINOR="${VERSION#*.}"; MINOR="${MINOR%%.*}" +REST="${VERSION#*.*.}" + +# Format and output based on MAJOR version +if [[ "$MAJOR" == "2" ]]; then + echo "0.$MINOR.$REST" +elif [[ "$MAJOR" == "3" ]]; then + printf "0.3%02d.$REST\n" "$MINOR" +fi diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index f4a7385bb5..e645ba30ae 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,16 +24,16 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go - uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 + uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 with: - go-version: 1.22.x + go-version: 1.23.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0 + uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: args: --verbose - version: v1.60.2 + version: v1.62.0 diff --git a/scripts/ui_release.sh b/scripts/ui_release.sh index ea4423d257..c1b872fd37 100755 --- a/scripts/ui_release.sh +++ b/scripts/ui_release.sh @@ -30,8 +30,8 @@ function publish() { cmd+=" --dry-run" fi for workspace in ${workspaces}; do - # package "app" is private so we shouldn't try to publish it. - if [[ "${workspace}" != "react-app" ]]; then + # package "mantine-ui" is private so we shouldn't try to publish it. + if [[ "${workspace}" != "mantine-ui" ]]; then cd "${workspace}" eval "${cmd}" cd "${root_ui_folder}" diff --git a/storage/buffer.go b/storage/buffer.go index 651e5c83e8..e847c10e61 100644 --- a/storage/buffer.go +++ b/storage/buffer.go @@ -187,6 +187,10 @@ func (s fSample) Type() chunkenc.ValueType { return chunkenc.ValFloat } +func (s fSample) Copy() chunks.Sample { + return s +} + type hSample struct { t int64 h *histogram.Histogram @@ -212,6 +216,10 @@ func (s hSample) Type() chunkenc.ValueType { return chunkenc.ValHistogram } +func (s hSample) Copy() chunks.Sample { + return hSample{t: s.t, h: s.h.Copy()} +} + type fhSample struct { t int64 fh *histogram.FloatHistogram @@ -237,13 +245,17 @@ func (s fhSample) Type() chunkenc.ValueType { return chunkenc.ValFloatHistogram } +func (s fhSample) Copy() chunks.Sample { + return fhSample{t: s.t, fh: s.fh.Copy()} +} + type sampleRing struct { delta int64 // Lookback buffers. We use iBuf for mixed samples, but one of the three - // concrete ones for homogenous samples. (Only one of the four bufs is + // concrete ones for homogeneous samples. (Only one of the four bufs is // allowed to be populated!) This avoids the overhead of the interface - // wrapper for the happy (and by far most common) case of homogenous + // wrapper for the happy (and by far most common) case of homogeneous // samples. iBuf []chunks.Sample fBuf []fSample @@ -268,7 +280,7 @@ const ( fhBuf ) -// newSampleRing creates a new sampleRing. If you do not know the prefereed +// newSampleRing creates a new sampleRing. If you do not know the preferred // value type yet, use a size of 0 (in which case the provided typ doesn't // matter). On the first add, a buffer of size 16 will be allocated with the // preferred type being the type of the first added sample. @@ -535,55 +547,8 @@ func (r *sampleRing) addFH(s fhSample) { } } -// genericAdd is a generic implementation of adding a chunks.Sample -// implementation to a buffer of a sample ring. However, the Go compiler -// currently (go1.20) decides to not expand the code during compile time, but -// creates dynamic code to handle the different types. That has a significant -// overhead during runtime, noticeable in PromQL benchmarks. For example, the -// "RangeQuery/expr=rate(a_hundred[1d]),steps=.*" benchmarks show about 7% -// longer runtime, 9% higher allocation size, and 10% more allocations. -// Therefore, genericAdd has been manually implemented for all the types -// (addSample, addF, addH, addFH) below. -// -// func genericAdd[T chunks.Sample](s T, buf []T, r *sampleRing) []T { -// l := len(buf) -// // Grow the ring buffer if it fits no more elements. -// if l == 0 { -// buf = make([]T, 16) -// l = 16 -// } -// if l == r.l { -// newBuf := make([]T, 2*l) -// copy(newBuf[l+r.f:], buf[r.f:]) -// copy(newBuf, buf[:r.f]) -// -// buf = newBuf -// r.i = r.f -// r.f += l -// l = 2 * l -// } else { -// r.i++ -// if r.i >= l { -// r.i -= l -// } -// } -// -// buf[r.i] = s -// r.l++ -// -// // Free head of the buffer of samples that just fell out of the range. -// tmin := s.T() - r.delta -// for buf[r.f].T() < tmin { -// r.f++ -// if r.f >= l { -// r.f -= l -// } -// r.l-- -// } -// return buf -// } - -// addSample is a handcoded specialization of genericAdd (see above). +// addSample adds a sample to a buffer of chunks.Sample, i.e. the general case +// using an interface as the type. func addSample(s chunks.Sample, buf []chunks.Sample, r *sampleRing) []chunks.Sample { l := len(buf) // Grow the ring buffer if it fits no more elements. @@ -607,7 +572,7 @@ func addSample(s chunks.Sample, buf []chunks.Sample, r *sampleRing) []chunks.Sam } } - buf[r.i] = s + buf[r.i] = s.Copy() r.l++ // Free head of the buffer of samples that just fell out of the range. @@ -622,7 +587,7 @@ func addSample(s chunks.Sample, buf []chunks.Sample, r *sampleRing) []chunks.Sam return buf } -// addF is a handcoded specialization of genericAdd (see above). +// addF adds an fSample to a (specialized) fSample buffer. func addF(s fSample, buf []fSample, r *sampleRing) []fSample { l := len(buf) // Grow the ring buffer if it fits no more elements. @@ -661,7 +626,7 @@ func addF(s fSample, buf []fSample, r *sampleRing) []fSample { return buf } -// addH is a handcoded specialization of genericAdd (see above). +// addH adds an hSample to a (specialized) hSample buffer. func addH(s hSample, buf []hSample, r *sampleRing) []hSample { l := len(buf) // Grow the ring buffer if it fits no more elements. @@ -705,7 +670,7 @@ func addH(s hSample, buf []hSample, r *sampleRing) []hSample { return buf } -// addFH is a handcoded specialization of genericAdd (see above). +// addFH adds an fhSample to a (specialized) fhSample buffer. func addFH(s fhSample, buf []fhSample, r *sampleRing) []fhSample { l := len(buf) // Grow the ring buffer if it fits no more elements. diff --git a/storage/buffer_test.go b/storage/buffer_test.go index b5c6443ac5..6e8e83db8f 100644 --- a/storage/buffer_test.go +++ b/storage/buffer_test.go @@ -314,6 +314,56 @@ func TestBufferedSeriesIteratorMixedHistograms(t *testing.T) { require.Equal(t, histograms[1].ToFloat(nil), fh) } +func TestBufferedSeriesIteratorMixedFloatsAndHistograms(t *testing.T) { + histograms := tsdbutil.GenerateTestHistograms(5) + + it := NewBufferIterator(NewListSeriesIteratorWithCopy(samples{ + hSample{t: 1, h: histograms[0].Copy()}, + fSample{t: 2, f: 2}, + hSample{t: 3, h: histograms[1].Copy()}, + hSample{t: 4, h: histograms[2].Copy()}, + fhSample{t: 3, fh: histograms[3].ToFloat(nil)}, + fhSample{t: 4, fh: histograms[4].ToFloat(nil)}, + }), 6) + + require.Equal(t, chunkenc.ValNone, it.Seek(7)) + require.NoError(t, it.Err()) + + buf := it.Buffer() + + require.Equal(t, chunkenc.ValHistogram, buf.Next()) + _, h0 := buf.AtHistogram() + require.Equal(t, histograms[0], h0) + + require.Equal(t, chunkenc.ValFloat, buf.Next()) + _, v := buf.At() + require.Equal(t, 2.0, v) + + require.Equal(t, chunkenc.ValHistogram, buf.Next()) + _, h1 := buf.AtHistogram() + require.Equal(t, histograms[1], h1) + + require.Equal(t, chunkenc.ValHistogram, buf.Next()) + _, h2 := buf.AtHistogram() + require.Equal(t, histograms[2], h2) + + require.Equal(t, chunkenc.ValFloatHistogram, buf.Next()) + _, h3 := buf.AtFloatHistogram(nil) + require.Equal(t, histograms[3].ToFloat(nil), h3) + + require.Equal(t, chunkenc.ValFloatHistogram, buf.Next()) + _, h4 := buf.AtFloatHistogram(nil) + require.Equal(t, histograms[4].ToFloat(nil), h4) + + // Test for overwrite bug where the buffered histogram was reused + // between items in the buffer. + require.Equal(t, histograms[0], h0) + require.Equal(t, histograms[1], h1) + require.Equal(t, histograms[2], h2) + require.Equal(t, histograms[3].ToFloat(nil), h3) + require.Equal(t, histograms[4].ToFloat(nil), h4) +} + func BenchmarkBufferedSeriesIterator(b *testing.B) { // Simulate a 5 minute rate. it := NewBufferIterator(newFakeSeriesIterator(int64(b.N), 30), 5*60) diff --git a/storage/fanout.go b/storage/fanout.go index e52342bc7e..4d076788a7 100644 --- a/storage/fanout.go +++ b/storage/fanout.go @@ -15,9 +15,8 @@ package storage import ( "context" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/exemplar" @@ -28,7 +27,7 @@ import ( ) type fanout struct { - logger log.Logger + logger *slog.Logger primary Storage secondaries []Storage @@ -43,7 +42,7 @@ type fanout struct { // and the error from the secondary querier will be returned as a warning. // // NOTE: In the case of Prometheus, it treats all remote storages as secondary / best effort. -func NewFanout(logger log.Logger, primary Storage, secondaries ...Storage) Storage { +func NewFanout(logger *slog.Logger, primary Storage, secondaries ...Storage) Storage { return &fanout{ logger: logger, primary: primary, @@ -142,12 +141,22 @@ func (f *fanout) Close() error { // fanoutAppender implements Appender. type fanoutAppender struct { - logger log.Logger + logger *slog.Logger primary Appender secondaries []Appender } +// SetOptions propagates the hints to both primary and secondary appenders. +func (f *fanoutAppender) SetOptions(opts *AppendOptions) { + if f.primary != nil { + f.primary.SetOptions(opts) + } + for _, appender := range f.secondaries { + appender.SetOptions(opts) + } +} + func (f *fanoutAppender) Append(ref SeriesRef, l labels.Labels, t int64, v float64) (SeriesRef, error) { ref, err := f.primary.Append(ref, l, t, v) if err != nil { @@ -190,6 +199,20 @@ func (f *fanoutAppender) AppendHistogram(ref SeriesRef, l labels.Labels, t int64 return ref, nil } +func (f *fanoutAppender) AppendHistogramCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) { + ref, err := f.primary.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) + if err != nil { + return ref, err + } + + for _, appender := range f.secondaries { + if _, err := appender.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh); err != nil { + return 0, err + } + } + return ref, nil +} + func (f *fanoutAppender) UpdateMetadata(ref SeriesRef, l labels.Labels, m metadata.Metadata) (SeriesRef, error) { ref, err := f.primary.UpdateMetadata(ref, l, m) if err != nil { @@ -226,7 +249,7 @@ func (f *fanoutAppender) Commit() (err error) { err = appender.Commit() } else { if rollbackErr := appender.Rollback(); rollbackErr != nil { - level.Error(f.logger).Log("msg", "Squashed rollback error on commit", "err", rollbackErr) + f.logger.Error("Squashed rollback error on commit", "err", rollbackErr) } } } @@ -242,7 +265,7 @@ func (f *fanoutAppender) Rollback() (err error) { case err == nil: err = rollbackErr case rollbackErr != nil: - level.Error(f.logger).Log("msg", "Squashed rollback error on rollback", "err", rollbackErr) + f.logger.Error("Squashed rollback error on rollback", "err", rollbackErr) } } return nil diff --git a/storage/fanout_test.go b/storage/fanout_test.go index 4613fe7572..3eef9e3cd0 100644 --- a/storage/fanout_test.go +++ b/storage/fanout_test.go @@ -173,16 +173,13 @@ func TestFanoutErrors(t *testing.T) { } if tc.err != nil { - require.Error(t, ss.Err()) - require.Equal(t, tc.err.Error(), ss.Err().Error()) + require.EqualError(t, ss.Err(), tc.err.Error()) } if tc.warning != nil { - require.NotEmpty(t, ss.Warnings(), "warnings expected") w := ss.Warnings() - require.Error(t, w.AsErrors()[0]) - warn, _ := w.AsStrings("", 0, 0) - require.Equal(t, tc.warning.Error(), warn[0]) + require.NotEmpty(t, w, "warnings expected") + require.EqualError(t, w.AsErrors()[0], tc.warning.Error()) } }) t.Run("chunks", func(t *testing.T) { @@ -200,16 +197,13 @@ func TestFanoutErrors(t *testing.T) { } if tc.err != nil { - require.Error(t, ss.Err()) - require.Equal(t, tc.err.Error(), ss.Err().Error()) + require.EqualError(t, ss.Err(), tc.err.Error()) } if tc.warning != nil { - require.NotEmpty(t, ss.Warnings(), "warnings expected") w := ss.Warnings() - require.Error(t, w.AsErrors()[0]) - warn, _ := w.AsStrings("", 0, 0) - require.Equal(t, tc.warning.Error(), warn[0]) + require.NotEmpty(t, w, "warnings expected") + require.EqualError(t, w.AsErrors()[0], tc.warning.Error()) } }) } diff --git a/storage/interface.go b/storage/interface.go index 2f125e5902..32b90cc10a 100644 --- a/storage/interface.go +++ b/storage/interface.go @@ -41,15 +41,17 @@ var ( ErrOutOfOrderExemplar = errors.New("out of order exemplar") ErrDuplicateExemplar = errors.New("duplicate exemplar") ErrExemplarLabelLength = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength) - ErrExemplarsDisabled = fmt.Errorf("exemplar storage is disabled or max exemplars is less than or equal to 0") - ErrNativeHistogramsDisabled = fmt.Errorf("native histograms are disabled") + ErrExemplarsDisabled = errors.New("exemplar storage is disabled or max exemplars is less than or equal to 0") + ErrNativeHistogramsDisabled = errors.New("native histograms are disabled") + ErrOOONativeHistogramsDisabled = errors.New("out-of-order native histogram ingestion is disabled") // ErrOutOfOrderCT indicates failed append of CT to the storage // due to CT being older the then newer sample. // NOTE(bwplotka): This can be both an instrumentation failure or commonly expected // behaviour, and we currently don't have a way to determine this. As a result // it's recommended to ignore this error for now. - ErrOutOfOrderCT = fmt.Errorf("created timestamp out of order, ignoring") + ErrOutOfOrderCT = errors.New("created timestamp out of order, ignoring") + ErrCTNewerThanSample = errors.New("CT is newer or the same as sample's timestamp, ignoring") ) // SeriesRef is a generic series reference. In prometheus it is either a @@ -112,6 +114,8 @@ type Querier interface { LabelQuerier // Select returns a set of series that matches the given label matchers. + // Results are not checked whether they match. Results that do not match + // may cause undefined behavior. // Caller can specify if it requires returned series to be sorted. Prefer not requiring sorting for better performance. // It allows passing hints that can help in optimising select, but it's up to implementation how this is used if used at all. Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) SeriesSet @@ -150,6 +154,8 @@ type ChunkQuerier interface { LabelQuerier // Select returns a set of series that matches the given label matchers. + // Results are not checked whether they match. Results that do not match + // may cause undefined behavior. // Caller can specify if it requires returned series to be sorted. Prefer not requiring sorting for better performance. // It allows passing hints that can help in optimising select, but it's up to implementation how this is used if used at all. Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) ChunkSeriesSet @@ -157,7 +163,7 @@ type ChunkQuerier interface { // LabelQuerier provides querying access over labels. type LabelQuerier interface { - // LabelValues returns all potential values for a label name. + // LabelValues returns all potential values for a label name in sorted order. // It is not safe to use the strings beyond the lifetime of the querier. // If matchers are specified the returned result set is reduced // to label values of metrics matching the matchers. @@ -237,6 +243,10 @@ func (f QueryableFunc) Querier(mint, maxt int64) (Querier, error) { return f(mint, maxt) } +type AppendOptions struct { + DiscardOutOfOrder bool +} + // Appender provides batched appends against a storage. // It must be completed with a call to Commit or Rollback and must not be reused afterwards. // @@ -265,6 +275,10 @@ type Appender interface { // Appender has to be discarded after rollback. Rollback() error + // SetOptions configures the appender with specific append options such as + // discarding out-of-order samples even if out-of-order is enabled in the TSDB. + SetOptions(opts *AppendOptions) + ExemplarAppender HistogramAppender MetadataUpdater @@ -312,6 +326,20 @@ type HistogramAppender interface { // pointer. AppendHistogram won't mutate the histogram, but in turn // depends on the caller to not mutate it either. AppendHistogram(ref SeriesRef, l labels.Labels, t int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) + // AppendHistogramCTZeroSample adds synthetic zero sample for the given ct timestamp, + // which will be associated with given series, labels and the incoming + // sample's t (timestamp). AppendHistogramCTZeroSample returns error if zero sample can't be + // appended, for example when ct is too old, or when it would collide with + // incoming sample (sample has priority). + // + // AppendHistogramCTZeroSample has to be called before the corresponding histogram AppendHistogram. + // A series reference number is returned which can be used to modify the + // CT for the given series in the same or later transactions. + // Returned reference numbers are ephemeral and may be rejected in calls + // to AppendHistogramCTZeroSample() at any point. + // + // If the reference is 0 it must not be used for caching. + AppendHistogramCTZeroSample(ref SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (SeriesRef, error) } // MetadataUpdater provides an interface for associating metadata to stored series. diff --git a/storage/merge.go b/storage/merge.go index 2424b26ab7..1953d5df09 100644 --- a/storage/merge.go +++ b/storage/merge.go @@ -19,7 +19,6 @@ import ( "context" "fmt" "math" - "slices" "sync" "github.com/prometheus/prometheus/model/histogram" @@ -136,13 +135,17 @@ func filterChunkQueriers(qs []ChunkQuerier) []ChunkQuerier { // Select returns a set of series that matches the given label matchers. func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints *SelectHints, matchers ...*labels.Matcher) genericSeriesSet { seriesSets := make([]genericSeriesSet, 0, len(q.queriers)) + var limit int + if hints != nil { + limit = hints.Limit + } if !q.concurrentSelect { for _, querier := range q.queriers { // We need to sort for merge to work. seriesSets = append(seriesSets, querier.Select(ctx, true, hints, matchers...)) } return &lazyGenericSeriesSet{init: func() (genericSeriesSet, bool) { - s := newGenericMergeSeriesSet(seriesSets, q.mergeFn) + s := newGenericMergeSeriesSet(seriesSets, limit, q.mergeFn) return s, s.Next() }} } @@ -153,13 +156,18 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints ) // Schedule all Selects for all queriers we know about. for _, querier := range q.queriers { + // copy the matchers as some queriers may alter the slice. + // See https://github.com/prometheus/prometheus/issues/14723 + matchersCopy := make([]*labels.Matcher, len(matchers)) + copy(matchersCopy, matchers) + wg.Add(1) - go func(qr genericQuerier) { + go func(qr genericQuerier, m []*labels.Matcher) { defer wg.Done() // We need to sort for NewMergeSeriesSet to work. - seriesSetChan <- qr.Select(ctx, true, hints, matchers...) - }(querier) + seriesSetChan <- qr.Select(ctx, true, hints, m...) + }(querier, matchersCopy) } go func() { wg.Wait() @@ -170,7 +178,7 @@ func (q *mergeGenericQuerier) Select(ctx context.Context, sortSeries bool, hints seriesSets = append(seriesSets, r) } return &lazyGenericSeriesSet{init: func() (genericSeriesSet, bool) { - s := newGenericMergeSeriesSet(seriesSets, q.mergeFn) + s := newGenericMergeSeriesSet(seriesSets, limit, q.mergeFn) return s, s.Next() }} } @@ -188,35 +196,44 @@ func (l labelGenericQueriers) SplitByHalf() (labelGenericQueriers, labelGenericQ // If matchers are specified the returned result set is reduced // to label values of metrics matching the matchers. func (q *mergeGenericQuerier) LabelValues(ctx context.Context, name string, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - res, ws, err := q.lvals(ctx, q.queriers, name, hints, matchers...) + res, ws, err := q.mergeResults(q.queriers, hints, func(q LabelQuerier) ([]string, annotations.Annotations, error) { + return q.LabelValues(ctx, name, hints, matchers...) + }) if err != nil { return nil, nil, fmt.Errorf("LabelValues() from merge generic querier for label %s: %w", name, err) } return res, ws, nil } -// lvals performs merge sort for LabelValues from multiple queriers. -func (q *mergeGenericQuerier) lvals(ctx context.Context, lq labelGenericQueriers, n string, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { +// mergeResults performs merge sort on the results of invoking the resultsFn against multiple queriers. +func (q *mergeGenericQuerier) mergeResults(lq labelGenericQueriers, hints *LabelHints, resultsFn func(q LabelQuerier) ([]string, annotations.Annotations, error)) ([]string, annotations.Annotations, error) { if lq.Len() == 0 { return nil, nil, nil } if lq.Len() == 1 { - return lq.Get(0).LabelValues(ctx, n, hints, matchers...) + return resultsFn(lq.Get(0)) } a, b := lq.SplitByHalf() var ws annotations.Annotations - s1, w, err := q.lvals(ctx, a, n, hints, matchers...) + s1, w, err := q.mergeResults(a, hints, resultsFn) ws.Merge(w) if err != nil { return nil, ws, err } - s2, ws, err := q.lvals(ctx, b, n, hints, matchers...) + s2, w, err := q.mergeResults(b, hints, resultsFn) ws.Merge(w) if err != nil { return nil, ws, err } - return mergeStrings(s1, s2), ws, nil + + s1 = truncateToLimit(s1, hints) + s2 = truncateToLimit(s2, hints) + + merged := mergeStrings(s1, s2) + merged = truncateToLimit(merged, hints) + + return merged, ws, nil } func mergeStrings(a, b []string) []string { @@ -248,33 +265,13 @@ func mergeStrings(a, b []string) []string { // LabelNames returns all the unique label names present in all queriers in sorted order. func (q *mergeGenericQuerier) LabelNames(ctx context.Context, hints *LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { - var ( - labelNamesMap = make(map[string]struct{}) - warnings annotations.Annotations - ) - for _, querier := range q.queriers { - names, wrn, err := querier.LabelNames(ctx, hints, matchers...) - if wrn != nil { - // TODO(bwplotka): We could potentially wrap warnings. - warnings.Merge(wrn) - } - if err != nil { - return nil, nil, fmt.Errorf("LabelNames() from merge generic querier: %w", err) - } - for _, name := range names { - labelNamesMap[name] = struct{}{} - } + res, ws, err := q.mergeResults(q.queriers, hints, func(q LabelQuerier) ([]string, annotations.Annotations, error) { + return q.LabelNames(ctx, hints, matchers...) + }) + if err != nil { + return nil, nil, fmt.Errorf("LabelNames() from merge generic querier: %w", err) } - if len(labelNamesMap) == 0 { - return nil, warnings, nil - } - - labelNames := make([]string, 0, len(labelNamesMap)) - for name := range labelNamesMap { - labelNames = append(labelNames, name) - } - slices.Sort(labelNames) - return labelNames, warnings, nil + return res, ws, nil } // Close releases the resources of the generic querier. @@ -288,17 +285,25 @@ func (q *mergeGenericQuerier) Close() error { return errs.Err() } +func truncateToLimit(s []string, hints *LabelHints) []string { + if hints != nil && hints.Limit > 0 && len(s) > hints.Limit { + s = s[:hints.Limit] + } + return s +} + // VerticalSeriesMergeFunc returns merged series implementation that merges series with same labels together. // It has to handle time-overlapped series as well. type VerticalSeriesMergeFunc func(...Series) Series // NewMergeSeriesSet returns a new SeriesSet that merges many SeriesSets together. -func NewMergeSeriesSet(sets []SeriesSet, mergeFunc VerticalSeriesMergeFunc) SeriesSet { +// If limit is set, the SeriesSet will be limited up-to the limit. 0 means disabled. +func NewMergeSeriesSet(sets []SeriesSet, limit int, mergeFunc VerticalSeriesMergeFunc) SeriesSet { genericSets := make([]genericSeriesSet, 0, len(sets)) for _, s := range sets { genericSets = append(genericSets, &genericSeriesSetAdapter{s}) } - return &seriesSetAdapter{newGenericMergeSeriesSet(genericSets, (&seriesMergerAdapter{VerticalSeriesMergeFunc: mergeFunc}).Merge)} + return &seriesSetAdapter{newGenericMergeSeriesSet(genericSets, limit, (&seriesMergerAdapter{VerticalSeriesMergeFunc: mergeFunc}).Merge)} } // VerticalChunkSeriesMergeFunc returns merged chunk series implementation that merges potentially time-overlapping @@ -308,12 +313,12 @@ func NewMergeSeriesSet(sets []SeriesSet, mergeFunc VerticalSeriesMergeFunc) Seri type VerticalChunkSeriesMergeFunc func(...ChunkSeries) ChunkSeries // NewMergeChunkSeriesSet returns a new ChunkSeriesSet that merges many SeriesSet together. -func NewMergeChunkSeriesSet(sets []ChunkSeriesSet, mergeFunc VerticalChunkSeriesMergeFunc) ChunkSeriesSet { +func NewMergeChunkSeriesSet(sets []ChunkSeriesSet, limit int, mergeFunc VerticalChunkSeriesMergeFunc) ChunkSeriesSet { genericSets := make([]genericSeriesSet, 0, len(sets)) for _, s := range sets { genericSets = append(genericSets, &genericChunkSeriesSetAdapter{s}) } - return &chunkSeriesSetAdapter{newGenericMergeSeriesSet(genericSets, (&chunkSeriesMergerAdapter{VerticalChunkSeriesMergeFunc: mergeFunc}).Merge)} + return &chunkSeriesSetAdapter{newGenericMergeSeriesSet(genericSets, limit, (&chunkSeriesMergerAdapter{VerticalChunkSeriesMergeFunc: mergeFunc}).Merge)} } // genericMergeSeriesSet implements genericSeriesSet. @@ -321,9 +326,11 @@ type genericMergeSeriesSet struct { currentLabels labels.Labels mergeFunc genericSeriesMergeFunc - heap genericSeriesSetHeap - sets []genericSeriesSet - currentSets []genericSeriesSet + heap genericSeriesSetHeap + sets []genericSeriesSet + currentSets []genericSeriesSet + seriesLimit int + mergedSeries int // tracks the total number of series merged and returned. } // newGenericMergeSeriesSet returns a new genericSeriesSet that merges (and deduplicates) @@ -331,7 +338,8 @@ type genericMergeSeriesSet struct { // Each series set must return its series in labels order, otherwise // merged series set will be incorrect. // Overlapped situations are merged using provided mergeFunc. -func newGenericMergeSeriesSet(sets []genericSeriesSet, mergeFunc genericSeriesMergeFunc) genericSeriesSet { +// If seriesLimit is set, only limited series are returned. +func newGenericMergeSeriesSet(sets []genericSeriesSet, seriesLimit int, mergeFunc genericSeriesMergeFunc) genericSeriesSet { if len(sets) == 1 { return sets[0] } @@ -351,13 +359,19 @@ func newGenericMergeSeriesSet(sets []genericSeriesSet, mergeFunc genericSeriesMe } } return &genericMergeSeriesSet{ - mergeFunc: mergeFunc, - sets: sets, - heap: h, + mergeFunc: mergeFunc, + sets: sets, + heap: h, + seriesLimit: seriesLimit, } } func (c *genericMergeSeriesSet) Next() bool { + if c.seriesLimit > 0 && c.mergedSeries >= c.seriesLimit { + // Exit early if seriesLimit is set. + return false + } + // Run in a loop because the "next" series sets may not be valid anymore. // If, for the current label set, all the next series sets come from // failed remote storage sources, we want to keep trying with the next label set. @@ -388,6 +402,7 @@ func (c *genericMergeSeriesSet) Next() bool { break } } + c.mergedSeries++ return true } diff --git a/storage/merge_test.go b/storage/merge_test.go index b145743c86..04d4e92078 100644 --- a/storage/merge_test.go +++ b/storage/merge_test.go @@ -1345,7 +1345,7 @@ func makeMergeSeriesSet(serieses [][]Series) SeriesSet { for i, s := range serieses { seriesSets[i] = &genericSeriesSetAdapter{NewMockSeriesSet(s...)} } - return &seriesSetAdapter{newGenericMergeSeriesSet(seriesSets, (&seriesMergerAdapter{VerticalSeriesMergeFunc: ChainedSeriesMerge}).Merge)} + return &seriesSetAdapter{newGenericMergeSeriesSet(seriesSets, 0, (&seriesMergerAdapter{VerticalSeriesMergeFunc: ChainedSeriesMerge}).Merge)} } func benchmarkDrain(b *testing.B, makeSeriesSet func() SeriesSet) { @@ -1390,6 +1390,34 @@ func BenchmarkMergeSeriesSet(b *testing.B) { } } +func BenchmarkMergeLabelValuesWithLimit(b *testing.B) { + var queriers []genericQuerier + + for i := 0; i < 5; i++ { + var lbls []string + for j := 0; j < 100000; j++ { + lbls = append(lbls, fmt.Sprintf("querier_%d_label_%d", i, j)) + } + q := &mockQuerier{resp: lbls} + queriers = append(queriers, newGenericQuerierFrom(q)) + } + + mergeQuerier := &mergeGenericQuerier{ + queriers: queriers, // Assume querying 5 blocks. + mergeFn: func(l ...Labels) Labels { + return l[0] + }, + } + + b.Run("benchmark", func(b *testing.B) { + ctx := context.Background() + hints := &LabelHints{ + Limit: 1000, + } + mergeQuerier.LabelValues(ctx, "name", hints) + }) +} + func visitMockQueriers(t *testing.T, qr Querier, f func(t *testing.T, q *mockQuerier)) int { count := 0 switch x := qr.(type) { @@ -1428,6 +1456,7 @@ func TestMergeQuerierWithSecondaries_ErrorHandling(t *testing.T) { name string primaries []Querier secondaries []Querier + limit int expectedSelectsSeries []labels.Labels expectedLabels []string @@ -1553,12 +1582,39 @@ func TestMergeQuerierWithSecondaries_ErrorHandling(t *testing.T) { expectedLabels: []string{"a", "b"}, expectedWarnings: annotations.New().Add(warnStorage), }, + { + name: "successful queriers with limit", + primaries: []Querier{ + &mockQuerier{resp: []string{"a", "d"}, warnings: annotations.New().Add(warnStorage), err: nil}, + }, + secondaries: []Querier{ + &mockQuerier{resp: []string{"b", "c"}, warnings: annotations.New().Add(warnStorage), err: nil}, + }, + limit: 2, + expectedSelectsSeries: []labels.Labels{ + labels.FromStrings("test", "a"), + labels.FromStrings("test", "b"), + }, + expectedLabels: []string{"a", "b"}, + expectedWarnings: annotations.New().Add(warnStorage), + }, } { + var labelHints *LabelHints + var selectHints *SelectHints + if tcase.limit > 0 { + labelHints = &LabelHints{ + Limit: tcase.limit, + } + selectHints = &SelectHints{ + Limit: tcase.limit, + } + } + t.Run(tcase.name, func(t *testing.T) { q := NewMergeQuerier(tcase.primaries, tcase.secondaries, func(s ...Series) Series { return s[0] }) t.Run("Select", func(t *testing.T) { - res := q.Select(context.Background(), false, nil) + res := q.Select(context.Background(), false, selectHints) var lbls []labels.Labels for res.Next() { lbls = append(lbls, res.At().Labels()) @@ -1577,7 +1633,7 @@ func TestMergeQuerierWithSecondaries_ErrorHandling(t *testing.T) { require.Equal(t, len(tcase.primaries)+len(tcase.secondaries), n) }) t.Run("LabelNames", func(t *testing.T) { - res, w, err := q.LabelNames(ctx, nil) + res, w, err := q.LabelNames(ctx, labelHints) require.Subset(t, tcase.expectedWarnings, w) require.ErrorIs(t, err, tcase.expectedErrs[1], "expected error doesn't match") requireEqualSlice(t, tcase.expectedLabels, res) @@ -1590,7 +1646,7 @@ func TestMergeQuerierWithSecondaries_ErrorHandling(t *testing.T) { }) }) t.Run("LabelValues", func(t *testing.T) { - res, w, err := q.LabelValues(ctx, "test", nil) + res, w, err := q.LabelValues(ctx, "test", labelHints) require.Subset(t, tcase.expectedWarnings, w) require.ErrorIs(t, err, tcase.expectedErrs[2], "expected error doesn't match") requireEqualSlice(t, tcase.expectedLabels, res) @@ -1604,7 +1660,7 @@ func TestMergeQuerierWithSecondaries_ErrorHandling(t *testing.T) { }) t.Run("LabelValuesWithMatchers", func(t *testing.T) { matcher := labels.MustNewMatcher(labels.MatchEqual, "otherLabel", "someValue") - res, w, err := q.LabelValues(ctx, "test2", nil, matcher) + res, w, err := q.LabelValues(ctx, "test2", labelHints, matcher) require.Subset(t, tcase.expectedWarnings, w) require.ErrorIs(t, err, tcase.expectedErrs[3], "expected error doesn't match") requireEqualSlice(t, tcase.expectedLabels, res) diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index 82f46b82f6..20ec53d6f6 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -16,7 +16,6 @@ package azuread import ( "context" "errors" - "fmt" "net/http" "strings" "sync" @@ -110,55 +109,55 @@ func (c *AzureADConfig) Validate() error { } if c.Cloud != AzureChina && c.Cloud != AzureGovernment && c.Cloud != AzurePublic { - return fmt.Errorf("must provide a cloud in the Azure AD config") + return errors.New("must provide a cloud in the Azure AD config") } if c.ManagedIdentity == nil && c.OAuth == nil && c.SDK == nil { - return fmt.Errorf("must provide an Azure Managed Identity, Azure OAuth or Azure SDK in the Azure AD config") + return errors.New("must provide an Azure Managed Identity, Azure OAuth or Azure SDK in the Azure AD config") } if c.ManagedIdentity != nil && c.OAuth != nil { - return fmt.Errorf("cannot provide both Azure Managed Identity and Azure OAuth in the Azure AD config") + return errors.New("cannot provide both Azure Managed Identity and Azure OAuth in the Azure AD config") } if c.ManagedIdentity != nil && c.SDK != nil { - return fmt.Errorf("cannot provide both Azure Managed Identity and Azure SDK in the Azure AD config") + return errors.New("cannot provide both Azure Managed Identity and Azure SDK in the Azure AD config") } if c.OAuth != nil && c.SDK != nil { - return fmt.Errorf("cannot provide both Azure OAuth and Azure SDK in the Azure AD config") + return errors.New("cannot provide both Azure OAuth and Azure SDK in the Azure AD config") } if c.ManagedIdentity != nil { if c.ManagedIdentity.ClientID == "" { - return fmt.Errorf("must provide an Azure Managed Identity client_id in the Azure AD config") + return errors.New("must provide an Azure Managed Identity client_id in the Azure AD config") } _, err := uuid.Parse(c.ManagedIdentity.ClientID) if err != nil { - return fmt.Errorf("the provided Azure Managed Identity client_id is invalid") + return errors.New("the provided Azure Managed Identity client_id is invalid") } } if c.OAuth != nil { if c.OAuth.ClientID == "" { - return fmt.Errorf("must provide an Azure OAuth client_id in the Azure AD config") + return errors.New("must provide an Azure OAuth client_id in the Azure AD config") } if c.OAuth.ClientSecret == "" { - return fmt.Errorf("must provide an Azure OAuth client_secret in the Azure AD config") + return errors.New("must provide an Azure OAuth client_secret in the Azure AD config") } if c.OAuth.TenantID == "" { - return fmt.Errorf("must provide an Azure OAuth tenant_id in the Azure AD config") + return errors.New("must provide an Azure OAuth tenant_id in the Azure AD config") } var err error _, err = uuid.Parse(c.OAuth.ClientID) if err != nil { - return fmt.Errorf("the provided Azure OAuth client_id is invalid") + return errors.New("the provided Azure OAuth client_id is invalid") } _, err = regexp.MatchString("^[0-9a-zA-Z-.]+$", c.OAuth.TenantID) if err != nil { - return fmt.Errorf("the provided Azure OAuth tenant_id is invalid") + return errors.New("the provided Azure OAuth tenant_id is invalid") } } @@ -168,7 +167,7 @@ func (c *AzureADConfig) Validate() error { if c.SDK.TenantID != "" { _, err = regexp.MatchString("^[0-9a-zA-Z-.]+$", c.SDK.TenantID) if err != nil { - return fmt.Errorf("the provided Azure OAuth tenant_id is invalid") + return errors.New("the provided Azure OAuth tenant_id is invalid") } } } diff --git a/storage/remote/azuread/azuread_test.go b/storage/remote/azuread/azuread_test.go index 7c97138120..08870382ec 100644 --- a/storage/remote/azuread/azuread_test.go +++ b/storage/remote/azuread/azuread_test.go @@ -68,7 +68,7 @@ func (ad *AzureAdTestSuite) TestAzureAdRoundTripper() { cases := []struct { cfg *AzureADConfig }{ - // AzureAd roundtripper with Managedidentity. + // AzureAd roundtripper with ManagedIdentity. { cfg: &AzureADConfig{ Cloud: "AzurePublic", diff --git a/storage/remote/chunked_test.go b/storage/remote/chunked_test.go index 7c3993ca62..82ed866345 100644 --- a/storage/remote/chunked_test.go +++ b/storage/remote/chunked_test.go @@ -86,7 +86,7 @@ func TestChunkedReader_Overflow(t *testing.T) { _, err = NewChunkedReader(bytes.NewReader(b2), 11, nil).Next() require.Error(t, err, "expect exceed limit error") - require.Equal(t, "chunkedReader: message size exceeded the limit 11 bytes; got: 12 bytes", err.Error()) + require.EqualError(t, err, "chunkedReader: message size exceeded the limit 11 bytes; got: 12 bytes") } func TestChunkedReader_CorruptedFrame(t *testing.T) { @@ -102,5 +102,5 @@ func TestChunkedReader_CorruptedFrame(t *testing.T) { _, err = NewChunkedReader(bytes.NewReader(bs), 20, nil).Next() require.Error(t, err, "expected malformed frame") - require.Equal(t, "chunkedReader: corrupted frame; checksum mismatch", err.Error()) + require.EqualError(t, err, "chunkedReader: corrupted frame; checksum mismatch") } diff --git a/storage/remote/client.go b/storage/remote/client.go index 62218cfba9..ad766be9bf 100644 --- a/storage/remote/client.go +++ b/storage/remote/client.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "net/http" + "net/http/httptrace" "strconv" "strings" "time" @@ -29,8 +30,9 @@ import ( "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/sigv4" "github.com/prometheus/common/version" + "github.com/prometheus/sigv4" + "go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace" @@ -143,6 +145,7 @@ type ClientConfig struct { RetryOnRateLimit bool WriteProtoMsg config.RemoteWriteProtoMsg ChunkedReadLimit uint64 + RoundRobinDNS bool } // ReadClient will request the STREAMED_XOR_CHUNKS method of remote read but can @@ -178,7 +181,11 @@ func NewReadClient(name string, conf *ClientConfig) (ReadClient, error) { // NewWriteClient creates a new client for remote write. func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { - httpClient, err := config_util.NewClientFromConfig(conf.HTTPClientConfig, "remote_storage_write_client") + var httpOpts []config_util.HTTPClientOption + if conf.RoundRobinDNS { + httpOpts = []config_util.HTTPClientOption{config_util.WithDialContextFunc(newDialContextWithRoundRobinDNS().dialContextFn())} + } + httpClient, err := config_util.NewClientFromConfig(conf.HTTPClientConfig, "remote_storage_write_client", httpOpts...) if err != nil { return nil, err } @@ -213,8 +220,11 @@ func NewWriteClient(name string, conf *ClientConfig) (WriteClient, error) { if conf.WriteProtoMsg != "" { writeProtoMsg = conf.WriteProtoMsg } - - httpClient.Transport = otelhttp.NewTransport(t) + httpClient.Transport = otelhttp.NewTransport( + t, + otelhttp.WithClientTrace(func(ctx context.Context) *httptrace.ClientTrace { + return otelhttptrace.NewClientTrace(ctx, otelhttptrace.WithoutSubSpans()) + })) return &Client{ remoteName: name, urlString: conf.URL.String(), diff --git a/storage/remote/codec_test.go b/storage/remote/codec_test.go index 404f1add75..35cd4c4cd5 100644 --- a/storage/remote/codec_test.go +++ b/storage/remote/codec_test.go @@ -20,9 +20,9 @@ import ( "sync" "testing" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/config" @@ -253,8 +253,7 @@ func TestValidateLabelsAndMetricName(t *testing.T) { t.Run(test.description, func(t *testing.T) { err := validateLabelsAndMetricName(test.input) if test.expectedErr != "" { - require.Error(t, err) - require.Equal(t, test.expectedErr, err.Error()) + require.EqualError(t, err, test.expectedErr) } else { require.NoError(t, err) } @@ -527,7 +526,7 @@ func TestFromQueryResultWithDuplicates(t *testing.T) { require.True(t, isErrSeriesSet, "Expected resulting series to be an errSeriesSet") errMessage := errSeries.Err().Error() - require.Equal(t, "duplicate label with name: foo", errMessage, fmt.Sprintf("Expected error to be from duplicate label, but got: %s", errMessage)) + require.Equalf(t, "duplicate label with name: foo", errMessage, "Expected error to be from duplicate label, but got: %s", errMessage) } func TestNegotiateResponseType(t *testing.T) { @@ -551,7 +550,7 @@ func TestNegotiateResponseType(t *testing.T) { _, err = NegotiateResponseType([]prompb.ReadRequest_ResponseType{20}) require.Error(t, err, "expected error due to not supported requested response types") - require.Equal(t, "server does not support any of the requested response types: [20]; supported: map[SAMPLES:{} STREAMED_XOR_CHUNKS:{}]", err.Error()) + require.EqualError(t, err, "server does not support any of the requested response types: [20]; supported: map[SAMPLES:{} STREAMED_XOR_CHUNKS:{}]") } func TestMergeLabels(t *testing.T) { @@ -583,7 +582,7 @@ func TestDecodeWriteRequest(t *testing.T) { } func TestDecodeWriteV2Request(t *testing.T) { - buf, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + buf, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) actual, err := DecodeWriteV2Request(bytes.NewReader(buf)) diff --git a/storage/remote/dial_context.go b/storage/remote/dial_context.go new file mode 100644 index 0000000000..b842728e4c --- /dev/null +++ b/storage/remote/dial_context.go @@ -0,0 +1,62 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package remote + +import ( + "context" + "math/rand" + "net" + "net/http" + "time" + + "github.com/prometheus/common/config" +) + +type hostResolver interface { + LookupHost(context.Context, string) ([]string, error) +} + +type dialContextWithRoundRobinDNS struct { + dialContext config.DialContextFunc + resolver hostResolver + rand *rand.Rand +} + +// newDialContextWithRoundRobinDNS creates a new dialContextWithRoundRobinDNS. +// We discourage creating new instances of struct dialContextWithRoundRobinDNS by explicitly setting its members, +// except for testing purposes, and recommend using newDialContextWithRoundRobinDNS. +func newDialContextWithRoundRobinDNS() *dialContextWithRoundRobinDNS { + return &dialContextWithRoundRobinDNS{ + dialContext: http.DefaultTransport.(*http.Transport).DialContext, + resolver: net.DefaultResolver, + rand: rand.New(rand.NewSource(time.Now().Unix())), + } +} + +func (dc *dialContextWithRoundRobinDNS) dialContextFn() config.DialContextFunc { + return func(ctx context.Context, network, addr string) (net.Conn, error) { + host, port, err := net.SplitHostPort(addr) + if err != nil { + return dc.dialContext(ctx, network, addr) + } + + addrs, err := dc.resolver.LookupHost(ctx, host) + if err != nil || len(addrs) == 0 { + return dc.dialContext(ctx, network, addr) + } + + randomAddr := net.JoinHostPort(addrs[dc.rand.Intn(len(addrs))], port) + return dc.dialContext(ctx, network, randomAddr) + } +} diff --git a/storage/remote/dial_context_test.go b/storage/remote/dial_context_test.go new file mode 100644 index 0000000000..d2716df53f --- /dev/null +++ b/storage/remote/dial_context_test.go @@ -0,0 +1,161 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package remote + +import ( + "context" + "errors" + "math/rand" + "net" + "sync" + "testing" + "time" + + "github.com/prometheus/common/config" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +const ( + testNetwork = "tcp" + testAddrWithoutPort = "this-is-my-addr.without-port" + testAddrWithPort = "this-is-my-addr.without-port:123" + testPort = "123" + ip1 = "1.2.3.4" + ip2 = "5.6.7.8" + ip3 = "9.0.1.2" + randSeed int64 = 123456789 +) + +var ( + errMockLookupHost = errors.New("this is a mocked error") + testLookupResult = []string{ip1, ip2, ip3} + testLookupResultWithPort = []string{net.JoinHostPort(ip1, testPort), net.JoinHostPort(ip2, testPort), net.JoinHostPort(ip3, testPort)} +) + +type mockDialContext struct { + mock.Mock + + addrFrequencyMu sync.Mutex + addrFrequency map[string]int +} + +func newMockDialContext(acceptableAddresses []string) *mockDialContext { + m := &mockDialContext{ + addrFrequencyMu: sync.Mutex{}, + addrFrequency: make(map[string]int), + } + for _, acceptableAddr := range acceptableAddresses { + m.On("dialContext", mock.Anything, mock.Anything, acceptableAddr).Return(nil, nil) + } + return m +} + +func (dc *mockDialContext) dialContext(ctx context.Context, network, addr string) (net.Conn, error) { + dc.addrFrequencyMu.Lock() + defer dc.addrFrequencyMu.Unlock() + args := dc.MethodCalled("dialContext", ctx, network, addr) + dc.addrFrequency[addr]++ + return nil, args.Error(1) +} + +func (dc *mockDialContext) getCount(addr string) int { + dc.addrFrequencyMu.Lock() + defer dc.addrFrequencyMu.Unlock() + return dc.addrFrequency[addr] +} + +type mockedLookupHost struct { + withErr bool + result []string +} + +func (lh *mockedLookupHost) LookupHost(context.Context, string) ([]string, error) { + if lh.withErr { + return nil, errMockLookupHost + } + return lh.result, nil +} + +func createDialContextWithRoundRobinDNS(dialContext config.DialContextFunc, resolver hostResolver, r *rand.Rand) dialContextWithRoundRobinDNS { + return dialContextWithRoundRobinDNS{ + dialContext: dialContext, + resolver: resolver, + rand: r, + } +} + +func TestDialContextWithRandomConnections(t *testing.T) { + numberOfRuns := 2 * len(testLookupResult) + var mdc *mockDialContext + testCases := map[string]struct { + addr string + setup func() dialContextWithRoundRobinDNS + check func() + }{ + "if address contains no port call default DealContext": { + addr: testAddrWithoutPort, + setup: func() dialContextWithRoundRobinDNS { + mdc = newMockDialContext([]string{testAddrWithoutPort}) + return createDialContextWithRoundRobinDNS(mdc.dialContext, &mockedLookupHost{withErr: false}, rand.New(rand.NewSource(time.Now().Unix()))) + }, + check: func() { + require.Equal(t, numberOfRuns, mdc.getCount(testAddrWithoutPort)) + }, + }, + "if lookup host returns error call default DealContext": { + addr: testAddrWithPort, + setup: func() dialContextWithRoundRobinDNS { + mdc = newMockDialContext([]string{testAddrWithPort}) + return createDialContextWithRoundRobinDNS(mdc.dialContext, &mockedLookupHost{withErr: true}, rand.New(rand.NewSource(time.Now().Unix()))) + }, + check: func() { + require.Equal(t, numberOfRuns, mdc.getCount(testAddrWithPort)) + }, + }, + "if lookup returns no addresses call default DealContext": { + addr: testAddrWithPort, + setup: func() dialContextWithRoundRobinDNS { + mdc = newMockDialContext([]string{testAddrWithPort}) + return createDialContextWithRoundRobinDNS(mdc.dialContext, &mockedLookupHost{}, rand.New(rand.NewSource(time.Now().Unix()))) + }, + check: func() { + require.Equal(t, numberOfRuns, mdc.getCount(testAddrWithPort)) + }, + }, + "if lookup host is successful, shuffle results": { + addr: testAddrWithPort, + setup: func() dialContextWithRoundRobinDNS { + mdc = newMockDialContext(testLookupResultWithPort) + return createDialContextWithRoundRobinDNS(mdc.dialContext, &mockedLookupHost{result: testLookupResult}, rand.New(rand.NewSource(randSeed))) + }, + check: func() { + // we ensure that not all runs will choose the first element of the lookup + require.NotEqual(t, numberOfRuns, mdc.getCount(testLookupResultWithPort[0])) + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + dc := tc.setup() + require.NotNil(t, dc) + for i := 0; i < numberOfRuns; i++ { + _, err := dc.dialContextFn()(context.Background(), testNetwork, tc.addr) + require.NoError(t, err) + } + tc.check() + }) + } +} diff --git a/storage/remote/intern_test.go b/storage/remote/intern_test.go index 917c42e912..98e16ef80d 100644 --- a/storage/remote/intern_test.go +++ b/storage/remote/intern_test.go @@ -19,7 +19,6 @@ package remote import ( - "fmt" "testing" "time" @@ -33,7 +32,7 @@ func TestIntern(t *testing.T) { interned, ok := interner.pool[testString] require.True(t, ok) - require.Equal(t, int64(1), interned.refs.Load(), fmt.Sprintf("expected refs to be 1 but it was %d", interned.refs.Load())) + require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) } func TestIntern_MultiRef(t *testing.T) { @@ -44,13 +43,13 @@ func TestIntern_MultiRef(t *testing.T) { interned, ok := interner.pool[testString] require.True(t, ok) - require.Equal(t, int64(1), interned.refs.Load(), fmt.Sprintf("expected refs to be 1 but it was %d", interned.refs.Load())) + require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) interner.intern(testString) interned, ok = interner.pool[testString] require.True(t, ok) - require.Equal(t, int64(2), interned.refs.Load(), fmt.Sprintf("expected refs to be 2 but it was %d", interned.refs.Load())) + require.Equalf(t, int64(2), interned.refs.Load(), "expected refs to be 2 but it was %d", interned.refs.Load()) } func TestIntern_DeleteRef(t *testing.T) { @@ -61,7 +60,7 @@ func TestIntern_DeleteRef(t *testing.T) { interned, ok := interner.pool[testString] require.True(t, ok) - require.Equal(t, int64(1), interned.refs.Load(), fmt.Sprintf("expected refs to be 1 but it was %d", interned.refs.Load())) + require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) interner.release(testString) _, ok = interner.pool[testString] @@ -75,7 +74,7 @@ func TestIntern_MultiRef_Concurrent(t *testing.T) { interner.intern(testString) interned, ok := interner.pool[testString] require.True(t, ok) - require.Equal(t, int64(1), interned.refs.Load(), fmt.Sprintf("expected refs to be 1 but it was %d", interned.refs.Load())) + require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) go interner.release(testString) @@ -87,5 +86,5 @@ func TestIntern_MultiRef_Concurrent(t *testing.T) { interned, ok = interner.pool[testString] interner.mtx.RUnlock() require.True(t, ok) - require.Equal(t, int64(1), interned.refs.Load(), fmt.Sprintf("expected refs to be 1 but it was %d", interned.refs.Load())) + require.Equalf(t, int64(1), interned.refs.Load(), "expected refs to be 1 but it was %d", interned.refs.Load()) } diff --git a/storage/remote/metadata_watcher.go b/storage/remote/metadata_watcher.go index fdcd668f56..9306dcb4c2 100644 --- a/storage/remote/metadata_watcher.go +++ b/storage/remote/metadata_watcher.go @@ -16,11 +16,11 @@ package remote import ( "context" "errors" + "log/slog" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/scrape" ) @@ -44,7 +44,7 @@ func (noop *noopScrapeManager) Get() (*scrape.Manager, error) { // MetadataWatcher watches the Scrape Manager for a given WriteMetadataTo. type MetadataWatcher struct { name string - logger log.Logger + logger *slog.Logger managerGetter ReadyScrapeManager manager Watchable @@ -62,9 +62,9 @@ type MetadataWatcher struct { } // NewMetadataWatcher builds a new MetadataWatcher. -func NewMetadataWatcher(l log.Logger, mg ReadyScrapeManager, name string, w MetadataAppender, interval model.Duration, deadline time.Duration) *MetadataWatcher { +func NewMetadataWatcher(l *slog.Logger, mg ReadyScrapeManager, name string, w MetadataAppender, interval model.Duration, deadline time.Duration) *MetadataWatcher { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if mg == nil { @@ -87,7 +87,7 @@ func NewMetadataWatcher(l log.Logger, mg ReadyScrapeManager, name string, w Meta // Start the MetadataWatcher. func (mw *MetadataWatcher) Start() { - level.Info(mw.logger).Log("msg", "Starting scraped metadata watcher") + mw.logger.Info("Starting scraped metadata watcher") mw.hardShutdownCtx, mw.hardShutdownCancel = context.WithCancel(context.Background()) mw.softShutdownCtx, mw.softShutdownCancel = context.WithCancel(mw.hardShutdownCtx) go mw.loop() @@ -95,15 +95,15 @@ func (mw *MetadataWatcher) Start() { // Stop the MetadataWatcher. func (mw *MetadataWatcher) Stop() { - level.Info(mw.logger).Log("msg", "Stopping metadata watcher...") - defer level.Info(mw.logger).Log("msg", "Scraped metadata watcher stopped") + mw.logger.Info("Stopping metadata watcher...") + defer mw.logger.Info("Scraped metadata watcher stopped") mw.softShutdownCancel() select { case <-mw.done: return case <-time.After(mw.deadline): - level.Error(mw.logger).Log("msg", "Failed to flush metadata") + mw.logger.Error("Failed to flush metadata") } mw.hardShutdownCancel() diff --git a/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go b/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go new file mode 100644 index 0000000000..cb9257d073 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/helpers_from_stdlib.go @@ -0,0 +1,106 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// Provenance-includes-location: https://github.com/golang/go/blob/f2d118fd5f7e872804a5825ce29797f81a28b0fa/src/strings/strings.go +// Provenance-includes-license: BSD-3-Clause +// Provenance-includes-copyright: Copyright The Go Authors. + +package prometheus + +import "strings" + +// fieldsFunc is a copy of strings.FieldsFunc from the Go standard library, +// but it also returns the separators as part of the result. +func fieldsFunc(s string, f func(rune) bool) ([]string, []string) { + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int + } + spans := make([]span, 0, 32) + separators := make([]string, 0, 32) + + // Find the field start and end indices. + // Doing this in a separate pass (rather than slicing the string s + // and collecting the result substrings right away) is significantly + // more efficient, possibly due to cache effects. + start := -1 // valid span start if >= 0 + for end, rune := range s { + if f(rune) { + if start >= 0 { + spans = append(spans, span{start, end}) + // Set start to a negative value. + // Note: using -1 here consistently and reproducibly + // slows down this code by a several percent on amd64. + start = ^start + separators = append(separators, string(s[end])) + } + } else { + if start < 0 { + start = end + } + } + } + + // Last field might end at EOF. + if start >= 0 { + spans = append(spans, span{start, len(s)}) + } + + // Create strings from recorded field indices. + a := make([]string, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end] + } + + return a, separators +} + +// join is a copy of strings.Join from the Go standard library, +// but it also accepts a slice of separators to join the elements with. +// If the slice of separators is shorter than the slice of elements, use a default value. +// We also don't check for integer overflow. +func join(elems []string, separators []string, def string) string { + switch len(elems) { + case 0: + return "" + case 1: + return elems[0] + } + + var n int + var sep string + sepLen := len(separators) + for i, elem := range elems { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + n += len(sep) + len(elem) + } + + var b strings.Builder + b.Grow(n) + b.WriteString(elems[0]) + for i, s := range elems[1:] { + if i >= sepLen { + sep = def + } else { + sep = separators[i] + } + b.WriteString(sep) + b.WriteString(s) + } + return b.String() +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_label.go b/storage/remote/otlptranslator/prometheus/normalize_label.go index a112b9bbce..b928e6888d 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_label.go +++ b/storage/remote/otlptranslator/prometheus/normalize_label.go @@ -19,6 +19,8 @@ package prometheus import ( "strings" "unicode" + + "github.com/prometheus/prometheus/util/strutil" ) // Normalizes the specified label to follow Prometheus label names standard. @@ -26,16 +28,14 @@ import ( // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels. // // Labels that start with non-letter rune will be prefixed with "key_". -// // An exception is made for double-underscores which are allowed. -func NormalizeLabel(label string) string { +func NormalizeLabel(label string, allowUTF8 bool) string { // Trivial case - if len(label) == 0 { + if len(label) == 0 || allowUTF8 { return label } - // Replace all non-alphanumeric runes with underscores - label = strings.Map(sanitizeRune, label) + label = strutil.SanitizeLabelName(label) // If label starts with a number, prepend with "key_" if unicode.IsDigit(rune(label[0])) { @@ -46,11 +46,3 @@ func NormalizeLabel(label string) string { return label } - -// Return '_' for anything non-alphanumeric. -func sanitizeRune(r rune) rune { - if unicode.IsLetter(r) || unicode.IsDigit(r) { - return r - } - return '_' -} diff --git a/storage/remote/otlptranslator/prometheus/normalize_label_test.go b/storage/remote/otlptranslator/prometheus/normalize_label_test.go new file mode 100644 index 0000000000..19ab6cd173 --- /dev/null +++ b/storage/remote/otlptranslator/prometheus/normalize_label_test.go @@ -0,0 +1,48 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package prometheus + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestNormalizeLabel(t *testing.T) { + tests := []struct { + label string + expected string + expectedUTF8 string + }{ + {"", "", ""}, + {"label:with:colons", "label_with_colons", "label:with:colons"}, // Without UTF-8 support, colons are only allowed in metric names + {"LabelWithCapitalLetters", "LabelWithCapitalLetters", "LabelWithCapitalLetters"}, + {"label!with&special$chars)", "label_with_special_chars_", "label!with&special$chars)"}, + {"label_with_foreign_characters_字符", "label_with_foreign_characters___", "label_with_foreign_characters_字符"}, + {"label.with.dots", "label_with_dots", "label.with.dots"}, + {"123label", "key_123label", "123label"}, + {"_label_starting_with_underscore", "key_label_starting_with_underscore", "_label_starting_with_underscore"}, + {"__label_starting_with_2underscores", "__label_starting_with_2underscores", "__label_starting_with_2underscores"}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("test_%d", i), func(t *testing.T) { + result := NormalizeLabel(test.label, false) + require.Equal(t, test.expected, result) + uTF8result := NormalizeLabel(test.label, true) + require.Equal(t, test.expectedUTF8, uTF8result) + }) + } +} diff --git a/storage/remote/otlptranslator/prometheus/normalize_name.go b/storage/remote/otlptranslator/prometheus/normalize_name.go index 0f472b80a0..335705aa8d 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name.go @@ -17,9 +17,12 @@ package prometheus import ( + "regexp" + "slices" "strings" "unicode" + "github.com/prometheus/prometheus/util/strutil" "go.opentelemetry.io/collector/pdata/pmetric" ) @@ -84,38 +87,52 @@ var perUnitMap = map[string]string{ // // See rules at https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels, // https://prometheus.io/docs/practices/naming/#metric-and-label-naming -// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.33.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. -func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes bool) string { - var metricName string - +// and https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. +func BuildCompliantName(metric pmetric.Metric, namespace string, addMetricSuffixes, allowUTF8 bool) string { // Full normalization following standard Prometheus naming conventions if addMetricSuffixes { - return normalizeName(metric, namespace) + return normalizeName(metric, namespace, allowUTF8) } - // Simple case (no full normalization, no units, etc.), we simply trim out forbidden chars - metricName = RemovePromForbiddenRunes(metric.Name()) + var metricName string + if !allowUTF8 { + // Regexp for metric name characters that should be replaced with _. + invalidMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:_]`) + + // Simple case (no full normalization, no units, etc.). + metricName = strings.Join(strings.FieldsFunc(metric.Name(), func(r rune) bool { + return invalidMetricCharRE.MatchString(string(r)) + }), "_") + } else { + metricName = metric.Name() + } // Namespace? if namespace != "" { return namespace + "_" + metricName } - // Metric name starts with a digit? Prefix it with an underscore - if metricName != "" && unicode.IsDigit(rune(metricName[0])) { + // Metric name starts with a digit and utf8 not allowed? Prefix it with an underscore. + if metricName != "" && unicode.IsDigit(rune(metricName[0])) && !allowUTF8 { metricName = "_" + metricName } return metricName } -// Build a normalized name for the specified metric -func normalizeName(metric pmetric.Metric, namespace string) string { - // Split metric name into "tokens" (remove all non-alphanumerics) - nameTokens := strings.FieldsFunc( - metric.Name(), - func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }, - ) +// Build a normalized name for the specified metric. +func normalizeName(metric pmetric.Metric, namespace string, allowUTF8 bool) string { + var translationFunc func(rune) bool + if !allowUTF8 { + nonTokenMetricCharRE := regexp.MustCompile(`[^a-zA-Z0-9:]`) + translationFunc = func(r rune) bool { return nonTokenMetricCharRE.MatchString(string(r)) } + } else { + translationFunc = func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != ':' } + } + // Split metric name into "tokens" (of supported metric name runes). + // Note that this has the side effect of replacing multiple consecutive underscores with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + nameTokens, separators := fieldsFunc(metric.Name(), translationFunc) // Split unit at the '/' if any unitTokens := strings.SplitN(metric.Unit(), "/", 2) @@ -123,11 +140,15 @@ func normalizeName(metric pmetric.Metric, namespace string) string { // Main unit // Append if not blank, doesn't contain '{}', and is not present in metric name already if len(unitTokens) > 0 { + var mainUnitProm, perUnitProm string mainUnitOTel := strings.TrimSpace(unitTokens[0]) if mainUnitOTel != "" && !strings.ContainsAny(mainUnitOTel, "{}") { - mainUnitProm := CleanUpString(unitMapGetOrDefault(mainUnitOTel)) - if mainUnitProm != "" && !contains(nameTokens, mainUnitProm) { - nameTokens = append(nameTokens, mainUnitProm) + mainUnitProm = unitMapGetOrDefault(mainUnitOTel) + if !allowUTF8 { + mainUnitProm = cleanUpUnit(mainUnitProm) + } + if slices.Contains(nameTokens, mainUnitProm) { + mainUnitProm = "" } } @@ -136,13 +157,29 @@ func normalizeName(metric pmetric.Metric, namespace string) string { if len(unitTokens) > 1 && unitTokens[1] != "" { perUnitOTel := strings.TrimSpace(unitTokens[1]) if perUnitOTel != "" && !strings.ContainsAny(perUnitOTel, "{}") { - perUnitProm := CleanUpString(perUnitMapGetOrDefault(perUnitOTel)) - if perUnitProm != "" && !contains(nameTokens, perUnitProm) { - nameTokens = append(nameTokens, "per", perUnitProm) + perUnitProm = perUnitMapGetOrDefault(perUnitOTel) + if !allowUTF8 { + perUnitProm = cleanUpUnit(perUnitProm) + } + } + if perUnitProm != "" { + perUnitProm = "per_" + perUnitProm + if slices.Contains(nameTokens, perUnitProm) { + perUnitProm = "" } } } + if perUnitProm != "" { + mainUnitProm = strings.TrimSuffix(mainUnitProm, "_") + } + + if mainUnitProm != "" { + nameTokens = append(nameTokens, mainUnitProm) + } + if perUnitProm != "" { + nameTokens = append(nameTokens, perUnitProm) + } } // Append _total for Counters @@ -164,8 +201,12 @@ func normalizeName(metric pmetric.Metric, namespace string) string { nameTokens = append([]string{namespace}, nameTokens...) } - // Build the string from the tokens, separated with underscores - normalizedName := strings.Join(nameTokens, "_") + // Build the string from the tokens + separators. + // If UTF-8 isn't allowed, we'll use underscores as separators. + if !allowUTF8 { + separators = []string{} + } + normalizedName := join(nameTokens, separators, "_") // Metric name cannot start with a digit, so prefix it with "_" in this case if normalizedName != "" && unicode.IsDigit(rune(normalizedName[0])) { @@ -235,13 +276,15 @@ func removeSuffix(tokens []string, suffix string) []string { return tokens } -// Clean up specified string so it's Prometheus compliant -func CleanUpString(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) }), "_") -} - -func RemovePromForbiddenRunes(s string) string { - return strings.Join(strings.FieldsFunc(s, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_' && r != ':' }), "_") +// cleanUpUnit cleans up unit so it matches model.LabelNameRE. +func cleanUpUnit(unit string) string { + // Multiple consecutive underscores are replaced with a single underscore. + // This is part of the OTel to Prometheus specification: https://github.com/open-telemetry/opentelemetry-specification/blob/v1.38.0/specification/compatibility/prometheus_and_openmetrics.md#otlp-metric-points-to-prometheus. + multipleUnderscoresRE := regexp.MustCompile(`__+`) + return strings.TrimPrefix(multipleUnderscoresRE.ReplaceAllString( + strutil.SanitizeLabelName(unit), + "_", + ), "_") } // Retrieve the Prometheus "basic" unit corresponding to the specified "basic" unit @@ -262,16 +305,6 @@ func perUnitMapGetOrDefault(perUnit string) string { return perUnit } -// Returns whether the slice contains the specified value -func contains(slice []string, value string) bool { - for _, sliceEntry := range slice { - if sliceEntry == value { - return true - } - } - return false -} - // Remove the specified value from the slice func removeItem(slice []string, value string) []string { newSlice := make([]string, 0, len(slice)) diff --git a/storage/remote/otlptranslator/prometheus/normalize_name_test.go b/storage/remote/otlptranslator/prometheus/normalize_name_test.go index 07b9b0a784..d97e7a560a 100644 --- a/storage/remote/otlptranslator/prometheus/normalize_name_test.go +++ b/storage/remote/otlptranslator/prometheus/normalize_name_test.go @@ -25,92 +25,119 @@ import ( ) func TestByte(t *testing.T) { - require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("system.filesystem.usage", "By"), "")) + require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("system.filesystem.usage", "By"), "", false)) } func TestByteCounter(t *testing.T) { - require.Equal(t, "system_io_bytes_total", normalizeName(createCounter("system.io", "By"), "")) - require.Equal(t, "network_transmitted_bytes_total", normalizeName(createCounter("network_transmitted_bytes_total", "By"), "")) + require.Equal(t, "system_io_bytes_total", normalizeName(createCounter("system.io", "By"), "", false)) + require.Equal(t, "network_transmitted_bytes_total", normalizeName(createCounter("network_transmitted_bytes_total", "By"), "", false)) } func TestWhiteSpaces(t *testing.T) { - require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("\t system.filesystem.usage ", " By\t"), "")) + require.Equal(t, "system_filesystem_usage_bytes", normalizeName(createGauge("\t system.filesystem.usage ", " By\t"), "", false)) } func TestNonStandardUnit(t *testing.T) { - require.Equal(t, "system_network_dropped", normalizeName(createGauge("system.network.dropped", "{packets}"), "")) + require.Equal(t, "system_network_dropped", normalizeName(createGauge("system.network.dropped", "{packets}"), "", false)) } func TestNonStandardUnitCounter(t *testing.T) { - require.Equal(t, "system_network_dropped_total", normalizeName(createCounter("system.network.dropped", "{packets}"), "")) + require.Equal(t, "system_network_dropped_total", normalizeName(createCounter("system.network.dropped", "{packets}"), "", false)) } func TestBrokenUnit(t *testing.T) { - require.Equal(t, "system_network_dropped_packets", normalizeName(createGauge("system.network.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_dropped", normalizeName(createGauge("system.network.packets.dropped", "packets"), "")) - require.Equal(t, "system_network_packets", normalizeName(createGauge("system.network.packets", "packets"), "")) + require.Equal(t, "system_network_dropped_packets", normalizeName(createGauge("system.network.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_dropped", normalizeName(createGauge("system.network.packets.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets", normalizeName(createGauge("system.network.packets", "packets"), "", false)) } func TestBrokenUnitCounter(t *testing.T) { - require.Equal(t, "system_network_dropped_packets_total", normalizeName(createCounter("system.network.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_dropped_total", normalizeName(createCounter("system.network.packets.dropped", "packets"), "")) - require.Equal(t, "system_network_packets_total", normalizeName(createCounter("system.network.packets", "packets"), "")) + require.Equal(t, "system_network_dropped_packets_total", normalizeName(createCounter("system.network.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_dropped_total", normalizeName(createCounter("system.network.packets.dropped", "packets"), "", false)) + require.Equal(t, "system_network_packets_total", normalizeName(createCounter("system.network.packets", "packets"), "", false)) } func TestRatio(t *testing.T) { - require.Equal(t, "hw_gpu_memory_utilization_ratio", normalizeName(createGauge("hw.gpu.memory.utilization", "1"), "")) - require.Equal(t, "hw_fan_speed_ratio", normalizeName(createGauge("hw.fan.speed_ratio", "1"), "")) - require.Equal(t, "objects_total", normalizeName(createCounter("objects", "1"), "")) + require.Equal(t, "hw_gpu_memory_utilization_ratio", normalizeName(createGauge("hw.gpu.memory.utilization", "1"), "", false)) + require.Equal(t, "hw_fan_speed_ratio", normalizeName(createGauge("hw.fan.speed_ratio", "1"), "", false)) + require.Equal(t, "objects_total", normalizeName(createCounter("objects", "1"), "", false)) } func TestHertz(t *testing.T) { - require.Equal(t, "hw_cpu_speed_limit_hertz", normalizeName(createGauge("hw.cpu.speed_limit", "Hz"), "")) + require.Equal(t, "hw_cpu_speed_limit_hertz", normalizeName(createGauge("hw.cpu.speed_limit", "Hz"), "", false)) } func TestPer(t *testing.T) { - require.Equal(t, "broken_metric_speed_km_per_hour", normalizeName(createGauge("broken.metric.speed", "km/h"), "")) - require.Equal(t, "astro_light_speed_limit_meters_per_second", normalizeName(createGauge("astro.light.speed_limit", "m/s"), "")) + require.Equal(t, "broken_metric_speed_km_per_hour", normalizeName(createGauge("broken.metric.speed", "km/h"), "", false)) + require.Equal(t, "astro_light_speed_limit_meters_per_second", normalizeName(createGauge("astro.light.speed_limit", "m/s"), "", false)) } func TestPercent(t *testing.T) { - require.Equal(t, "broken_metric_success_ratio_percent", normalizeName(createGauge("broken.metric.success_ratio", "%"), "")) - require.Equal(t, "broken_metric_success_percent", normalizeName(createGauge("broken.metric.success_percent", "%"), "")) + require.Equal(t, "broken_metric_success_ratio_percent", normalizeName(createGauge("broken.metric.success_ratio", "%"), "", false)) + require.Equal(t, "broken_metric_success_percent", normalizeName(createGauge("broken.metric.success_percent", "%"), "", false)) } func TestEmpty(t *testing.T) { - require.Equal(t, "test_metric_no_unit", normalizeName(createGauge("test.metric.no_unit", ""), "")) - require.Equal(t, "test_metric_spaces", normalizeName(createGauge("test.metric.spaces", " \t "), "")) + require.Equal(t, "test_metric_no_unit", normalizeName(createGauge("test.metric.no_unit", ""), "", false)) + require.Equal(t, "test_metric_spaces", normalizeName(createGauge("test.metric.spaces", " \t "), "", false)) } -func TestUnsupportedRunes(t *testing.T) { - require.Equal(t, "unsupported_metric_temperature_F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "")) - require.Equal(t, "unsupported_metric_weird", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "")) - require.Equal(t, "unsupported_metric_redundant_test_per_C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "")) +func TestAllowUTF8(t *testing.T) { + t.Run("allow UTF8", func(t *testing.T) { + require.Equal(t, "unsupported.metric.temperature_°F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "", true)) + require.Equal(t, "unsupported.metric.weird_+=.:,!* & #", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "", true)) + require.Equal(t, "unsupported.metric.redundant___test $_per_°C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "", true)) + require.Equal(t, "metric_with_字符_foreign_characters_ど", normalizeName(createGauge("metric_with_字符_foreign_characters", "ど"), "", true)) + }) + t.Run("disallow UTF8", func(t *testing.T) { + require.Equal(t, "unsupported_metric_temperature_F", normalizeName(createGauge("unsupported.metric.temperature", "°F"), "", false)) + require.Equal(t, "unsupported_metric_weird", normalizeName(createGauge("unsupported.metric.weird", "+=.:,!* & #"), "", false)) + require.Equal(t, "unsupported_metric_redundant_test_per_C", normalizeName(createGauge("unsupported.metric.redundant", "__test $/°C"), "", false)) + require.Equal(t, "metric_with_foreign_characters", normalizeName(createGauge("metric_with_字符_foreign_characters", "ど"), "", false)) + }) +} + +func TestAllowUTF8KnownBugs(t *testing.T) { + // Due to historical reasons, the translator code was copied from OpenTelemetry collector codebase. + // Over there, they tried to provide means to translate metric names following Prometheus conventions that are documented here: + // https://prometheus.io/docs/practices/naming/ + // + // Althogh not explicitly said, it was implied that words should be separated by a single underscore and the codebase was written + // with that in mind. + // + // Now that we're allowing OTel users to have their original names stored in prometheus without any transformation, we're facing problems + // where two (or more) UTF-8 characters are being used to separate words. + // TODO(arthursens): Fix it! + + // We're asserting on 'NotEqual', which proves the bug. + require.NotEqual(t, "metric....split_=+by_//utf8characters", normalizeName(createGauge("metric....split_=+by_//utf8characters", ""), "", true)) + // Here we're asserting on 'Equal', showing the current behavior. + require.Equal(t, "metric.split_by_utf8characters", normalizeName(createGauge("metric....split_=+by_//utf8characters", ""), "", true)) } func TestOTelReceivers(t *testing.T) { - require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", normalizeName(createCounter("active_directory.ds.replication.network.io", "By"), "")) - require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", normalizeName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), "")) - require.Equal(t, "active_directory_ds_replication_object_rate_per_second", normalizeName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), "")) - require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", normalizeName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), "")) - require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", normalizeName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), "")) - require.Equal(t, "apache_current_connections", normalizeName(createGauge("apache.current_connections", "connections"), "")) - require.Equal(t, "apache_workers_connections", normalizeName(createGauge("apache.workers", "connections"), "")) - require.Equal(t, "apache_requests_total", normalizeName(createCounter("apache.requests", "1"), "")) - require.Equal(t, "bigip_virtual_server_request_count_total", normalizeName(createCounter("bigip.virtual_server.request.count", "{requests}"), "")) - require.Equal(t, "system_cpu_utilization_ratio", normalizeName(createGauge("system.cpu.utilization", "1"), "")) - require.Equal(t, "system_disk_operation_time_seconds_total", normalizeName(createCounter("system.disk.operation_time", "s"), "")) - require.Equal(t, "system_cpu_load_average_15m_ratio", normalizeName(createGauge("system.cpu.load_average.15m", "1"), "")) - require.Equal(t, "memcached_operation_hit_ratio_percent", normalizeName(createGauge("memcached.operation_hit_ratio", "%"), "")) - require.Equal(t, "mongodbatlas_process_asserts_per_second", normalizeName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), "")) - require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", normalizeName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), "")) - require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", normalizeName(createGauge("mongodbatlas.process.network.io", "By/s"), "")) - require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", normalizeName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), "")) - require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", normalizeName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), "")) - require.Equal(t, "nginx_requests", normalizeName(createGauge("nginx.requests", "requests"), "")) - require.Equal(t, "nginx_connections_accepted", normalizeName(createGauge("nginx.connections_accepted", "connections"), "")) - require.Equal(t, "nsxt_node_memory_usage_kilobytes", normalizeName(createGauge("nsxt.node.memory.usage", "KBy"), "")) - require.Equal(t, "redis_latest_fork_microseconds", normalizeName(createGauge("redis.latest_fork", "us"), "")) + require.Equal(t, "active_directory_ds_replication_network_io_bytes_total", normalizeName(createCounter("active_directory.ds.replication.network.io", "By"), "", false)) + require.Equal(t, "active_directory_ds_replication_sync_object_pending_total", normalizeName(createCounter("active_directory.ds.replication.sync.object.pending", "{objects}"), "", false)) + require.Equal(t, "active_directory_ds_replication_object_rate_per_second", normalizeName(createGauge("active_directory.ds.replication.object.rate", "{objects}/s"), "", false)) + require.Equal(t, "active_directory_ds_name_cache_hit_rate_percent", normalizeName(createGauge("active_directory.ds.name_cache.hit_rate", "%"), "", false)) + require.Equal(t, "active_directory_ds_ldap_bind_last_successful_time_milliseconds", normalizeName(createGauge("active_directory.ds.ldap.bind.last_successful.time", "ms"), "", false)) + require.Equal(t, "apache_current_connections", normalizeName(createGauge("apache.current_connections", "connections"), "", false)) + require.Equal(t, "apache_workers_connections", normalizeName(createGauge("apache.workers", "connections"), "", false)) + require.Equal(t, "apache_requests_total", normalizeName(createCounter("apache.requests", "1"), "", false)) + require.Equal(t, "bigip_virtual_server_request_count_total", normalizeName(createCounter("bigip.virtual_server.request.count", "{requests}"), "", false)) + require.Equal(t, "system_cpu_utilization_ratio", normalizeName(createGauge("system.cpu.utilization", "1"), "", false)) + require.Equal(t, "system_disk_operation_time_seconds_total", normalizeName(createCounter("system.disk.operation_time", "s"), "", false)) + require.Equal(t, "system_cpu_load_average_15m_ratio", normalizeName(createGauge("system.cpu.load_average.15m", "1"), "", false)) + require.Equal(t, "memcached_operation_hit_ratio_percent", normalizeName(createGauge("memcached.operation_hit_ratio", "%"), "", false)) + require.Equal(t, "mongodbatlas_process_asserts_per_second", normalizeName(createGauge("mongodbatlas.process.asserts", "{assertions}/s"), "", false)) + require.Equal(t, "mongodbatlas_process_journaling_data_files_mebibytes", normalizeName(createGauge("mongodbatlas.process.journaling.data_files", "MiBy"), "", false)) + require.Equal(t, "mongodbatlas_process_network_io_bytes_per_second", normalizeName(createGauge("mongodbatlas.process.network.io", "By/s"), "", false)) + require.Equal(t, "mongodbatlas_process_oplog_rate_gibibytes_per_hour", normalizeName(createGauge("mongodbatlas.process.oplog.rate", "GiBy/h"), "", false)) + require.Equal(t, "mongodbatlas_process_db_query_targeting_scanned_per_returned", normalizeName(createGauge("mongodbatlas.process.db.query_targeting.scanned_per_returned", "{scanned}/{returned}"), "", false)) + require.Equal(t, "nginx_requests", normalizeName(createGauge("nginx.requests", "requests"), "", false)) + require.Equal(t, "nginx_connections_accepted", normalizeName(createGauge("nginx.connections_accepted", "connections"), "", false)) + require.Equal(t, "nsxt_node_memory_usage_kilobytes", normalizeName(createGauge("nsxt.node.memory.usage", "KBy"), "", false)) + require.Equal(t, "redis_latest_fork_microseconds", normalizeName(createGauge("redis.latest_fork", "us"), "", false)) } func TestTrimPromSuffixes(t *testing.T) { @@ -144,17 +171,17 @@ func TestTrimPromSuffixes(t *testing.T) { } func TestNamespace(t *testing.T) { - require.Equal(t, "space_test", normalizeName(createGauge("test", ""), "space")) - require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space")) + require.Equal(t, "space_test", normalizeName(createGauge("test", ""), "space", false)) + require.Equal(t, "space_test", normalizeName(createGauge("#test", ""), "space", false)) } -func TestCleanUpString(t *testing.T) { - require.Equal(t, "", CleanUpString("")) - require.Equal(t, "a_b", CleanUpString("a b")) - require.Equal(t, "hello_world", CleanUpString("hello, world!")) - require.Equal(t, "hello_you_2", CleanUpString("hello you 2")) - require.Equal(t, "1000", CleanUpString("$1000")) - require.Equal(t, "", CleanUpString("*+$^=)")) +func TestCleanUpUnit(t *testing.T) { + require.Equal(t, "", cleanUpUnit("")) + require.Equal(t, "a_b", cleanUpUnit("a b")) + require.Equal(t, "hello_world", cleanUpUnit("hello, world")) + require.Equal(t, "hello_you_2", cleanUpUnit("hello you 2")) + require.Equal(t, "1000", cleanUpUnit("$1000")) + require.Equal(t, "", cleanUpUnit("*+$^=)")) } func TestUnitMapGetOrDefault(t *testing.T) { @@ -179,27 +206,29 @@ func TestRemoveItem(t *testing.T) { require.Equal(t, []string{"b", "c"}, removeItem([]string{"a", "b", "c"}, "a")) } -func TestBuildCompliantNameWithNormalize(t *testing.T) { - require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true)) - require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true)) - require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true)) - require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true)) - require.Equal(t, "foo_bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true)) +func TestBuildCompliantNameWithSuffixes(t *testing.T) { + require.Equal(t, "system_io_bytes_total", BuildCompliantName(createCounter("system.io", "By"), "", true, false)) + require.Equal(t, "system_network_io_bytes_total", BuildCompliantName(createCounter("network.io", "By"), "system", true, false)) + require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", ""), "", true, false)) + require.Equal(t, "envoy_rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", true, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", true, false)) + require.Equal(t, ":foo::bar_total", BuildCompliantName(createCounter(":foo::bar", ""), "", true, false)) // Gauges with unit 1 are considered ratios. - require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true)) + require.Equal(t, "foo_bar_ratio", BuildCompliantName(createGauge("foo.bar", "1"), "", true, false)) // Slashes in units are converted. - require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true)) + require.Equal(t, "system_io_foo_per_bar_total", BuildCompliantName(createCounter("system.io", "foo/bar"), "", true, false)) + require.Equal(t, "metric_with_foreign_characters_total", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", true, false)) } func TestBuildCompliantNameWithoutSuffixes(t *testing.T) { - require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "By"), "", false)) - require.Equal(t, "system_network_io", BuildCompliantName(createCounter("network.io", "By"), "system", false)) - require.Equal(t, "system_network_I_O", BuildCompliantName(createCounter("network (I/O)", "By"), "system", false)) - require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", "By"), "", false)) - require.Equal(t, "envoy__rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", false)) - require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", false)) - require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false)) - require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false)) - require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false)) + require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "By"), "", false, false)) + require.Equal(t, "system_network_io", BuildCompliantName(createCounter("network.io", "By"), "system", false, false)) + require.Equal(t, "system_network_I_O", BuildCompliantName(createCounter("network (I/O)", "By"), "system", false, false)) + require.Equal(t, "_3_14_digits", BuildCompliantName(createGauge("3.14 digits", "By"), "", false, false)) + require.Equal(t, "envoy__rule_engine_zlib_buf_error", BuildCompliantName(createGauge("envoy__rule_engine_zlib_buf_error", ""), "", false, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createGauge(":foo::bar", ""), "", false, false)) + require.Equal(t, ":foo::bar", BuildCompliantName(createCounter(":foo::bar", ""), "", false, false)) + require.Equal(t, "foo_bar", BuildCompliantName(createGauge("foo.bar", "1"), "", false, false)) + require.Equal(t, "system_io", BuildCompliantName(createCounter("system.io", "foo/bar"), "", false, false)) + require.Equal(t, "metric_with___foreign_characters", BuildCompliantName(createCounter("metric_with_字符_foreign_characters", ""), "", false, false)) } diff --git a/util/logging/ratelimit.go b/storage/remote/otlptranslator/prometheusremotewrite/context.go similarity index 53% rename from util/logging/ratelimit.go rename to storage/remote/otlptranslator/prometheusremotewrite/context.go index 32d1e249e6..5c6dd20f18 100644 --- a/util/logging/ratelimit.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/context.go @@ -1,4 +1,4 @@ -// Copyright 2019 The Prometheus Authors +// Copyright 2024 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -10,30 +10,28 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +package prometheusremotewrite -package logging +import "context" -import ( - "github.com/go-kit/log" - "golang.org/x/time/rate" -) - -type ratelimiter struct { - limiter *rate.Limiter - next log.Logger +// everyNTimes supports checking for context error every n times. +type everyNTimes struct { + n int + i int + err error } -// RateLimit write to a logger. -func RateLimit(next log.Logger, limit rate.Limit) log.Logger { - return &ratelimiter{ - limiter: rate.NewLimiter(limit, int(limit)), - next: next, +// checkContext calls ctx.Err() every e.n times and returns an eventual error. +func (e *everyNTimes) checkContext(ctx context.Context) error { + if e.err != nil { + return e.err } -} -func (r *ratelimiter) Log(keyvals ...interface{}) error { - if r.limiter.Allow() { - return r.next.Log(keyvals...) + e.i++ + if e.i >= e.n { + e.i = 0 + e.err = ctx.Err() } - return nil + + return e.err } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/context_test.go b/storage/remote/otlptranslator/prometheusremotewrite/context_test.go new file mode 100644 index 0000000000..94b23be04f --- /dev/null +++ b/storage/remote/otlptranslator/prometheusremotewrite/context_test.go @@ -0,0 +1,40 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package prometheusremotewrite + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestEveryNTimes(t *testing.T) { + const n = 128 + ctx, cancel := context.WithCancel(context.Background()) + e := &everyNTimes{ + n: n, + } + + for i := 0; i < n; i++ { + require.NoError(t, e.checkContext(ctx)) + } + + cancel() + for i := 0; i < n-1; i++ { + require.NoError(t, e.checkContext(ctx)) + } + require.EqualError(t, e.checkContext(ctx), context.Canceled.Error()) + // e should remember the error. + require.EqualError(t, e.checkContext(ctx), context.Canceled.Error()) +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper.go b/storage/remote/otlptranslator/prometheusremotewrite/helper.go index 67cf28119d..316dd60c63 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go @@ -17,6 +17,7 @@ package prometheusremotewrite import ( + "context" "encoding/hex" "fmt" "log" @@ -156,7 +157,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting // map ensures no duplicate label names. l := make(map[string]string, maxLabelCount) for _, label := range labels { - var finalKey = prometheustranslator.NormalizeLabel(label.Name) + var finalKey = prometheustranslator.NormalizeLabel(label.Name, settings.AllowUTF8) if existingValue, alreadyExists := l[finalKey]; alreadyExists { l[finalKey] = existingValue + ";" + label.Value } else { @@ -165,7 +166,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting } for _, lbl := range promotedAttrs { - normalized := prometheustranslator.NormalizeLabel(lbl.Name) + normalized := prometheustranslator.NormalizeLabel(lbl.Name, settings.AllowUTF8) if _, exists := l[normalized]; !exists { l[normalized] = lbl.Value } @@ -204,7 +205,7 @@ func createAttributes(resource pcommon.Resource, attributes pcommon.Map, setting } // internal labels should be maintained if !(len(name) > 4 && name[:2] == "__" && name[len(name)-2:] == "__") { - name = prometheustranslator.NormalizeLabel(name) + name = prometheustranslator.NormalizeLabel(name, settings.AllowUTF8) } l[name] = extras[i+1] } @@ -241,9 +242,13 @@ func isValidAggregationTemporality(metric pmetric.Metric) bool { // with the user defined bucket boundaries of non-exponential OTel histograms. // However, work is under way to resolve this shortcoming through a feature called native histograms custom buckets: // https://github.com/prometheus/prometheus/issues/13485. -func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.HistogramDataPointSlice, - resource pcommon.Resource, settings Settings, baseName string) { +func (c *PrometheusConverter) addHistogramDataPoints(ctx context.Context, dataPoints pmetric.HistogramDataPointSlice, + resource pcommon.Resource, settings Settings, baseName string) error { for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + pt := dataPoints.At(x) timestamp := convertTimeStamp(pt.Timestamp()) baseLabels := createAttributes(resource, pt.Attributes(), settings, nil, false) @@ -284,6 +289,10 @@ func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.Histogra // process each bound, based on histograms proto definition, # of buckets = # of explicit bounds + 1 for i := 0; i < pt.ExplicitBounds().Len() && i < pt.BucketCounts().Len(); i++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + bound := pt.ExplicitBounds().At(i) cumulativeCount += pt.BucketCounts().At(i) bucket := &prompb.Sample{ @@ -312,7 +321,9 @@ func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.Histogra ts := c.addSample(infBucket, infLabels) bucketBounds = append(bucketBounds, bucketBoundsData{ts: ts, bound: math.Inf(1)}) - c.addExemplars(pt, bucketBounds) + if err := c.addExemplars(ctx, pt, bucketBounds); err != nil { + return err + } startTimestamp := pt.StartTimestamp() if settings.ExportCreatedMetric && startTimestamp != 0 { @@ -320,6 +331,8 @@ func (c *PrometheusConverter) addHistogramDataPoints(dataPoints pmetric.Histogra c.addTimeSeriesIfNeeded(labels, startTimestamp, pt.Timestamp()) } } + + return nil } type exemplarType interface { @@ -327,16 +340,28 @@ type exemplarType interface { Exemplars() pmetric.ExemplarSlice } -func getPromExemplars[T exemplarType](pt T) []prompb.Exemplar { +func getPromExemplars[T exemplarType](ctx context.Context, everyN *everyNTimes, pt T) ([]prompb.Exemplar, error) { promExemplars := make([]prompb.Exemplar, 0, pt.Exemplars().Len()) for i := 0; i < pt.Exemplars().Len(); i++ { + if err := everyN.checkContext(ctx); err != nil { + return nil, err + } + exemplar := pt.Exemplars().At(i) exemplarRunes := 0 promExemplar := prompb.Exemplar{ - Value: exemplar.DoubleValue(), Timestamp: timestamp.FromTime(exemplar.Timestamp().AsTime()), } + switch exemplar.ValueType() { + case pmetric.ExemplarValueTypeInt: + promExemplar.Value = float64(exemplar.IntValue()) + case pmetric.ExemplarValueTypeDouble: + promExemplar.Value = exemplar.DoubleValue() + default: + return nil, fmt.Errorf("unsupported exemplar value type: %v", exemplar.ValueType()) + } + if traceID := exemplar.TraceID(); !traceID.IsEmpty() { val := hex.EncodeToString(traceID[:]) exemplarRunes += utf8.RuneCountInString(traceIDKey) + utf8.RuneCountInString(val) @@ -379,7 +404,7 @@ func getPromExemplars[T exemplarType](pt T) []prompb.Exemplar { promExemplars = append(promExemplars, promExemplar) } - return promExemplars + return promExemplars, nil } // mostRecentTimestampInMetric returns the latest timestamp in a batch of metrics @@ -417,9 +442,13 @@ func mostRecentTimestampInMetric(metric pmetric.Metric) pcommon.Timestamp { return ts } -func (c *PrometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, - settings Settings, baseName string) { +func (c *PrometheusConverter) addSummaryDataPoints(ctx context.Context, dataPoints pmetric.SummaryDataPointSlice, resource pcommon.Resource, + settings Settings, baseName string) error { for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + pt := dataPoints.At(x) timestamp := convertTimeStamp(pt.Timestamp()) baseLabels := createAttributes(resource, pt.Attributes(), settings, nil, false) @@ -468,6 +497,8 @@ func (c *PrometheusConverter) addSummaryDataPoints(dataPoints pmetric.SummaryDat c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) } } + + return nil } // createLabels returns a copy of baseLabels, adding to it the pair model.MetricNameLabel=name. @@ -569,6 +600,10 @@ func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timesta } settings.PromoteResourceAttributes = nil + if settings.KeepIdentifyingResourceAttributes { + // Do not pass identifying attributes as ignoreAttrs below. + identifyingAttrs = nil + } labels := createAttributes(resource, attributes, settings, identifyingAttrs, false, model.MetricNameLabel, name) haveIdentifier := false for _, l := range labels { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go index e02ebbf5de..b4bc704d4e 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go @@ -17,6 +17,7 @@ package prometheusremotewrite import ( + "context" "testing" "time" @@ -47,17 +48,45 @@ func TestCreateAttributes(t *testing.T) { resource.Attributes().PutStr(k, v) } attrs := pcommon.NewMap() - attrs.PutStr("__name__", "test_metric") attrs.PutStr("metric-attr", "metric value") + attrs.PutStr("metric-attr-other", "metric value other") testCases := []struct { name string promoteResourceAttributes []string + ignoreAttrs []string expectedLabels []prompb.Label }{ { name: "Successful conversion without resource attribute promotion", promoteResourceAttributes: nil, + expectedLabels: []prompb.Label{ + { + Name: "__name__", + Value: "test_metric", + }, + { + Name: "instance", + Value: "service ID", + }, + { + Name: "job", + Value: "service name", + }, + { + Name: "metric_attr", + Value: "metric value", + }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, + }, + }, + { + name: "Successful conversion with some attributes ignored", + promoteResourceAttributes: nil, + ignoreAttrs: []string{"metric-attr-other"}, expectedLabels: []prompb.Label{ { Name: "__name__", @@ -97,6 +126,10 @@ func TestCreateAttributes(t *testing.T) { Name: "metric_attr", Value: "metric value", }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, { Name: "existent_attr", Value: "resource value", @@ -127,6 +160,10 @@ func TestCreateAttributes(t *testing.T) { Name: "metric_attr", Value: "metric value", }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, }, }, { @@ -153,6 +190,10 @@ func TestCreateAttributes(t *testing.T) { Name: "metric_attr", Value: "metric value", }, + { + Name: "metric_attr_other", + Value: "metric value other", + }, }, }, } @@ -161,7 +202,7 @@ func TestCreateAttributes(t *testing.T) { settings := Settings{ PromoteResourceAttributes: tc.promoteResourceAttributes, } - lbls := createAttributes(resource, attrs, settings, nil, false) + lbls := createAttributes(resource, attrs, settings, tc.ignoreAttrs, false, model.MetricNameLabel, "test_metric") assert.ElementsMatch(t, lbls, tc.expectedLabels) }) @@ -280,6 +321,7 @@ func TestPrometheusConverter_AddSummaryDataPoints(t *testing.T) { converter := NewPrometheusConverter() converter.addSummaryDataPoints( + context.Background(), metric.Summary().DataPoints(), pcommon.NewResource(), Settings{ @@ -390,6 +432,7 @@ func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { converter := NewPrometheusConverter() converter.addHistogramDataPoints( + context.Background(), metric.Histogram().DataPoints(), pcommon.NewResource(), Settings{ @@ -403,3 +446,38 @@ func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) { }) } } + +func TestGetPromExemplars(t *testing.T) { + ctx := context.Background() + everyN := &everyNTimes{n: 1} + + t.Run("Exemplars with int value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetIntValue(42) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, float64(42), exemplars[0].Value) + }) + + t.Run("Exemplars with double value", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + exemplar.SetDoubleValue(69.420) + exemplars, err := getPromExemplars(ctx, everyN, pt) + assert.NoError(t, err) + assert.Len(t, exemplars, 1) + assert.Equal(t, 69.420, exemplars[0].Value) + }) + + t.Run("Exemplars with unsupported value type", func(t *testing.T) { + pt := pmetric.NewNumberDataPoint() + exemplar := pt.Exemplars().AppendEmpty() + exemplar.SetTimestamp(pcommon.Timestamp(time.Now().UnixNano())) + _, err := getPromExemplars(ctx, everyN, pt) + assert.Error(t, err) + }) +} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index ec93387fc6..8349d4f907 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -17,6 +17,7 @@ package prometheusremotewrite import ( + "context" "fmt" "math" @@ -33,10 +34,14 @@ const defaultZeroThreshold = 1e-128 // addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series // as native histogram samples. -func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, +func (c *PrometheusConverter) addExponentialHistogramDataPoints(ctx context.Context, dataPoints pmetric.ExponentialHistogramDataPointSlice, resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) { var annots annotations.Annotations for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return annots, err + } + pt := dataPoints.At(x) histogram, ws, err := exponentialToNativeHistogram(pt) @@ -57,15 +62,18 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr ts, _ := c.getOrCreateTimeSeries(lbls) ts.Histograms = append(ts.Histograms, histogram) - exemplars := getPromExemplars[pmetric.ExponentialHistogramDataPoint](pt) + exemplars, err := getPromExemplars[pmetric.ExponentialHistogramDataPoint](ctx, &c.everyN, pt) + if err != nil { + return annots, err + } ts.Exemplars = append(ts.Exemplars, exemplars...) } return annots, nil } -// exponentialToNativeHistogram translates OTel Exponential Histogram data point -// to Prometheus Native Histogram. +// exponentialToNativeHistogram translates an OTel Exponential Histogram data point +// to a Prometheus Native Histogram. func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) { var annots annotations.Annotations scale := p.Scale() diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go index cd1c858ac1..dcd83b7f93 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go @@ -17,17 +17,19 @@ package prometheusremotewrite import ( + "context" "fmt" "testing" "time" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) @@ -170,7 +172,7 @@ func TestConvertBucketsLayout(t *testing.T) { }, // Downscale: // 4+2+0+2, 0+0+0+0, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 0, 1 - // Check from sclaing from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1 wantDeltas: []int64{8, -7}, }, }, @@ -221,7 +223,7 @@ func TestConvertBucketsLayout(t *testing.T) { }, // Downscale: // 4+2+0+2, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 1 - // Check from sclaing from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 + // Check from scaling from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1 wantDeltas: []int64{8, -8, 0, 1}, }, }, @@ -754,12 +756,13 @@ func TestPrometheusConverter_addExponentialHistogramDataPoints(t *testing.T) { converter := NewPrometheusConverter() annots, err := converter.addExponentialHistogramDataPoints( + context.Background(), metric.ExponentialHistogram().DataPoints(), pcommon.NewResource(), Settings{ ExportCreatedMetric: true, }, - prometheustranslator.BuildCompliantName(metric, "", true), + prometheustranslator.BuildCompliantName(metric, "", true, true), ) require.NoError(t, err) require.Empty(t, annots) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index 9d76800809..6779c9ed80 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -17,6 +17,7 @@ package prometheusremotewrite import ( + "context" "errors" "fmt" "sort" @@ -31,19 +32,22 @@ import ( ) type Settings struct { - Namespace string - ExternalLabels map[string]string - DisableTargetInfo bool - ExportCreatedMetric bool - AddMetricSuffixes bool - SendMetadata bool - PromoteResourceAttributes []string + Namespace string + ExternalLabels map[string]string + DisableTargetInfo bool + ExportCreatedMetric bool + AddMetricSuffixes bool + AllowUTF8 bool + PromoteResourceAttributes []string + KeepIdentifyingResourceAttributes bool } // PrometheusConverter converts from OTel write format to Prometheus remote write format. type PrometheusConverter struct { unique map[uint64]*prompb.TimeSeries conflicts map[uint64][]*prompb.TimeSeries + everyN everyNTimes + metadata []prompb.MetricMetadata } func NewPrometheusConverter() *PrometheusConverter { @@ -54,8 +58,19 @@ func NewPrometheusConverter() *PrometheusConverter { } // FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) { +func (c *PrometheusConverter) FromMetrics(ctx context.Context, md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) { + c.everyN = everyNTimes{n: 128} resourceMetricsSlice := md.ResourceMetrics() + + numMetrics := 0 + for i := 0; i < resourceMetricsSlice.Len(); i++ { + scopeMetricsSlice := resourceMetricsSlice.At(i).ScopeMetrics() + for j := 0; j < scopeMetricsSlice.Len(); j++ { + numMetrics += scopeMetricsSlice.At(j).Metrics().Len() + } + } + c.metadata = make([]prompb.MetricMetadata, 0, numMetrics) + for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) resource := resourceMetrics.Resource() @@ -68,6 +83,11 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) // TODO: decide if instrumentation library information should be exported as labels for k := 0; k < metricSlice.Len(); k++ { + if err := c.everyN.checkContext(ctx); err != nil { + errs = multierr.Append(errs, err) + return + } + metric := metricSlice.At(k) mostRecentTimestamp = max(mostRecentTimestamp, mostRecentTimestampInMetric(metric)) @@ -76,7 +96,13 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) continue } - promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes) + promName := prometheustranslator.BuildCompliantName(metric, settings.Namespace, settings.AddMetricSuffixes, settings.AllowUTF8) + c.metadata = append(c.metadata, prompb.MetricMetadata{ + Type: otelMetricTypeToPromMetricType(metric), + MetricFamilyName: promName, + Help: metric.Description(), + Unit: metric.Unit(), + }) // handle individual metrics based on type //exhaustive:enforce @@ -87,21 +113,36 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) break } - c.addGaugeNumberDataPoints(dataPoints, resource, settings, promName) + if err := c.addGaugeNumberDataPoints(ctx, dataPoints, resource, settings, promName); err != nil { + errs = multierr.Append(errs, err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } case pmetric.MetricTypeSum: dataPoints := metric.Sum().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) break } - c.addSumNumberDataPoints(dataPoints, resource, metric, settings, promName) + if err := c.addSumNumberDataPoints(ctx, dataPoints, resource, metric, settings, promName); err != nil { + errs = multierr.Append(errs, err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } case pmetric.MetricTypeHistogram: dataPoints := metric.Histogram().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) break } - c.addHistogramDataPoints(dataPoints, resource, settings, promName) + if err := c.addHistogramDataPoints(ctx, dataPoints, resource, settings, promName); err != nil { + errs = multierr.Append(errs, err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } case pmetric.MetricTypeExponentialHistogram: dataPoints := metric.ExponentialHistogram().DataPoints() if dataPoints.Len() == 0 { @@ -109,20 +150,31 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) break } ws, err := c.addExponentialHistogramDataPoints( + ctx, dataPoints, resource, settings, promName, ) annots.Merge(ws) - errs = multierr.Append(errs, err) + if err != nil { + errs = multierr.Append(errs, err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() if dataPoints.Len() == 0 { errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) break } - c.addSummaryDataPoints(dataPoints, resource, settings, promName) + if err := c.addSummaryDataPoints(ctx, dataPoints, resource, settings, promName); err != nil { + errs = multierr.Append(errs, err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return + } + } default: errs = multierr.Append(errs, errors.New("unsupported metric type")) } @@ -148,25 +200,33 @@ func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { // addExemplars adds exemplars for the dataPoint. For each exemplar, if it can find a bucket bound corresponding to its value, // the exemplar is added to the bucket bound's time series, provided that the time series' has samples. -func (c *PrometheusConverter) addExemplars(dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) { +func (c *PrometheusConverter) addExemplars(ctx context.Context, dataPoint pmetric.HistogramDataPoint, bucketBounds []bucketBoundsData) error { if len(bucketBounds) == 0 { - return + return nil } - exemplars := getPromExemplars(dataPoint) + exemplars, err := getPromExemplars(ctx, &c.everyN, dataPoint) + if err != nil { + return err + } if len(exemplars) == 0 { - return + return nil } sort.Sort(byBucketBoundsData(bucketBounds)) for _, exemplar := range exemplars { for _, bound := range bucketBounds { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } if len(bound.ts.Samples) > 0 && exemplar.Value <= bound.bound { bound.ts.Exemplars = append(bound.ts.Exemplars, exemplar) break } } } + + return nil } // addSample finds a TimeSeries that corresponds to lbls, and adds sample to it. diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go index bdc1c9d0b2..05abc7743f 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -17,17 +17,108 @@ package prometheusremotewrite import ( + "context" "fmt" "testing" "time" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/require" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/prompb" + prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) func TestFromMetrics(t *testing.T) { + for _, keepIdentifyingResourceAttributes := range []bool{false, true} { + t.Run(fmt.Sprintf("successful/keepIdentifyingAttributes=%v", keepIdentifyingResourceAttributes), func(t *testing.T) { + converter := NewPrometheusConverter() + payload := createExportRequest(5, 128, 128, 2, 0) + var expMetadata []prompb.MetricMetadata + resourceMetricsSlice := payload.Metrics().ResourceMetrics() + for i := 0; i < resourceMetricsSlice.Len(); i++ { + scopeMetricsSlice := resourceMetricsSlice.At(i).ScopeMetrics() + for j := 0; j < scopeMetricsSlice.Len(); j++ { + metricSlice := scopeMetricsSlice.At(j).Metrics() + for k := 0; k < metricSlice.Len(); k++ { + metric := metricSlice.At(k) + promName := prometheustranslator.BuildCompliantName(metric, "", false, false) + expMetadata = append(expMetadata, prompb.MetricMetadata{ + Type: otelMetricTypeToPromMetricType(metric), + MetricFamilyName: promName, + Help: metric.Description(), + Unit: metric.Unit(), + }) + } + } + } + + annots, err := converter.FromMetrics( + context.Background(), + payload.Metrics(), + Settings{KeepIdentifyingResourceAttributes: keepIdentifyingResourceAttributes}, + ) + require.NoError(t, err) + require.Empty(t, annots) + + if diff := cmp.Diff(expMetadata, converter.Metadata()); diff != "" { + t.Errorf("mismatch (-want +got):\n%s", diff) + } + + ts := converter.TimeSeries() + require.Len(t, ts, 1408+1) // +1 for the target_info. + + target_info_count := 0 + for _, s := range ts { + b := labels.NewScratchBuilder(2) + lbls := s.ToLabels(&b, nil) + if lbls.Get(labels.MetricName) == "target_info" { + target_info_count++ + require.Equal(t, "test-namespace/test-service", lbls.Get("job")) + require.Equal(t, "id1234", lbls.Get("instance")) + if keepIdentifyingResourceAttributes { + require.Equal(t, "test-service", lbls.Get("service_name")) + require.Equal(t, "test-namespace", lbls.Get("service_namespace")) + require.Equal(t, "id1234", lbls.Get("service_instance_id")) + } else { + require.False(t, lbls.Has("service_name")) + require.False(t, lbls.Has("service_namespace")) + require.False(t, lbls.Has("service_instance_id")) + } + } + } + require.Equal(t, 1, target_info_count) + }) + } + + t.Run("context cancellation", func(t *testing.T) { + converter := NewPrometheusConverter() + ctx, cancel := context.WithCancel(context.Background()) + // Verify that converter.FromMetrics respects cancellation. + cancel() + payload := createExportRequest(5, 128, 128, 2, 0) + + annots, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}) + require.ErrorIs(t, err, context.Canceled) + require.Empty(t, annots) + }) + + t.Run("context timeout", func(t *testing.T) { + converter := NewPrometheusConverter() + // Verify that converter.FromMetrics respects timeout. + ctx, cancel := context.WithTimeout(context.Background(), 0) + t.Cleanup(cancel) + payload := createExportRequest(5, 128, 128, 2, 0) + + annots, err := converter.FromMetrics(ctx, payload.Metrics(), Settings{}) + require.ErrorIs(t, err, context.DeadlineExceeded) + require.Empty(t, annots) + }) + t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) { request := pmetricotlp.NewExportRequest() rm := request.Metrics().ResourceMetrics().AppendEmpty() @@ -51,7 +142,7 @@ func TestFromMetrics(t *testing.T) { } converter := NewPrometheusConverter() - annots, err := converter.FromMetrics(request.Metrics(), Settings{}) + annots, err := converter.FromMetrics(context.Background(), request.Metrics(), Settings{}) require.NoError(t, err) require.NotEmpty(t, annots) ws, infos := annots.AsStrings("", 0, 0) @@ -81,13 +172,15 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { for _, exemplarsPerSeries := range []int{0, 5, 10} { b.Run(fmt.Sprintf("exemplars per series: %v", exemplarsPerSeries), func(b *testing.B) { payload := createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCount, labelsPerMetric, exemplarsPerSeries) + b.ResetTimer() - for i := 0; i < b.N; i++ { + for range b.N { converter := NewPrometheusConverter() - annots, err := converter.FromMetrics(payload.Metrics(), Settings{}) + annots, err := converter.FromMetrics(context.Background(), payload.Metrics(), Settings{}) require.NoError(b, err) require.Empty(b, annots) require.NotNil(b, converter.TimeSeries()) + require.NotNil(b, converter.Metadata()) } }) } @@ -107,6 +200,15 @@ func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCou rm := request.Metrics().ResourceMetrics().AppendEmpty() generateAttributes(rm.Resource().Attributes(), "resource", resourceAttributeCount) + // Fake some resource attributes. + for k, v := range map[string]string{ + "service.name": "test-service", + "service.namespace": "test-namespace", + "service.instance.id": "id1234", + } { + rm.Resource().Attributes().PutStr(k, v) + } + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() ts := pcommon.NewTimestampFromTime(time.Now()) @@ -114,6 +216,8 @@ func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCou m := metrics.AppendEmpty() m.SetEmptyHistogram() m.SetName(fmt.Sprintf("histogram-%v", i)) + m.SetDescription("histogram") + m.SetUnit("unit") m.Histogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) h := m.Histogram().DataPoints().AppendEmpty() h.SetTimestamp(ts) @@ -132,6 +236,8 @@ func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCou m := metrics.AppendEmpty() m.SetEmptySum() m.SetName(fmt.Sprintf("sum-%v", i)) + m.SetDescription("sum") + m.SetUnit("unit") m.Sum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) point := m.Sum().DataPoints().AppendEmpty() point.SetTimestamp(ts) @@ -144,6 +250,8 @@ func createExportRequest(resourceAttributeCount, histogramCount, nonHistogramCou m := metrics.AppendEmpty() m.SetEmptyGauge() m.SetName(fmt.Sprintf("gauge-%v", i)) + m.SetDescription("gauge") + m.SetUnit("unit") point := m.Gauge().DataPoints().AppendEmpty() point.SetTimestamp(ts) point.SetDoubleValue(1.23) diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go index 80ccb46c75..6cdab450e1 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points.go @@ -17,6 +17,7 @@ package prometheusremotewrite import ( + "context" "math" "github.com/prometheus/common/model" @@ -27,9 +28,13 @@ import ( "github.com/prometheus/prometheus/prompb" ) -func (c *PrometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, - resource pcommon.Resource, settings Settings, name string) { +func (c *PrometheusConverter) addGaugeNumberDataPoints(ctx context.Context, dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, settings Settings, name string) error { for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + pt := dataPoints.At(x) labels := createAttributes( resource, @@ -55,11 +60,17 @@ func (c *PrometheusConverter) addGaugeNumberDataPoints(dataPoints pmetric.Number } c.addSample(sample, labels) } + + return nil } -func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDataPointSlice, - resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) { +func (c *PrometheusConverter) addSumNumberDataPoints(ctx context.Context, dataPoints pmetric.NumberDataPointSlice, + resource pcommon.Resource, metric pmetric.Metric, settings Settings, name string) error { for x := 0; x < dataPoints.Len(); x++ { + if err := c.everyN.checkContext(ctx); err != nil { + return err + } + pt := dataPoints.At(x) lbls := createAttributes( resource, @@ -85,7 +96,10 @@ func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDa } ts := c.addSample(sample, lbls) if ts != nil { - exemplars := getPromExemplars[pmetric.NumberDataPoint](pt) + exemplars, err := getPromExemplars[pmetric.NumberDataPoint](ctx, &c.everyN, pt) + if err != nil { + return err + } ts.Exemplars = append(ts.Exemplars, exemplars...) } @@ -93,7 +107,7 @@ func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDa if settings.ExportCreatedMetric && metric.Sum().IsMonotonic() { startTimestamp := pt.StartTimestamp() if startTimestamp == 0 { - return + return nil } createdLabels := make([]prompb.Label, len(lbls)) @@ -107,4 +121,6 @@ func (c *PrometheusConverter) addSumNumberDataPoints(dataPoints pmetric.NumberDa c.addTimeSeriesIfNeeded(createdLabels, startTimestamp, pt.Timestamp()) } } + + return nil } diff --git a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go index 41afc8c4c3..b01d2cb1fe 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go @@ -17,14 +17,16 @@ package prometheusremotewrite import ( + "context" "testing" "time" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/prompb" "github.com/stretchr/testify/assert" "go.opentelemetry.io/collector/pdata/pcommon" "go.opentelemetry.io/collector/pdata/pmetric" + + "github.com/prometheus/prometheus/prompb" ) func TestPrometheusConverter_addGaugeNumberDataPoints(t *testing.T) { @@ -66,6 +68,7 @@ func TestPrometheusConverter_addGaugeNumberDataPoints(t *testing.T) { converter := NewPrometheusConverter() converter.addGaugeNumberDataPoints( + context.Background(), metric.Gauge().DataPoints(), pcommon.NewResource(), Settings{ @@ -242,6 +245,7 @@ func TestPrometheusConverter_addSumNumberDataPoints(t *testing.T) { converter := NewPrometheusConverter() converter.addSumNumberDataPoints( + context.Background(), metric.Sum().DataPoints(), pcommon.NewResource(), metric, diff --git a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go index ba48704193..359fc52522 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/otlp_to_openmetrics_metadata.go @@ -20,7 +20,6 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric" "github.com/prometheus/prometheus/prompb" - prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" ) func otelMetricTypeToPromMetricType(otelMetric pmetric.Metric) prompb.MetricMetadata_MetricType { @@ -42,36 +41,3 @@ func otelMetricTypeToPromMetricType(otelMetric pmetric.Metric) prompb.MetricMeta } return prompb.MetricMetadata_UNKNOWN } - -func OtelMetricsToMetadata(md pmetric.Metrics, addMetricSuffixes bool) []*prompb.MetricMetadata { - resourceMetricsSlice := md.ResourceMetrics() - - metadataLength := 0 - for i := 0; i < resourceMetricsSlice.Len(); i++ { - scopeMetricsSlice := resourceMetricsSlice.At(i).ScopeMetrics() - for j := 0; j < scopeMetricsSlice.Len(); j++ { - metadataLength += scopeMetricsSlice.At(j).Metrics().Len() - } - } - - var metadata = make([]*prompb.MetricMetadata, 0, metadataLength) - for i := 0; i < resourceMetricsSlice.Len(); i++ { - resourceMetrics := resourceMetricsSlice.At(i) - scopeMetricsSlice := resourceMetrics.ScopeMetrics() - - for j := 0; j < scopeMetricsSlice.Len(); j++ { - scopeMetrics := scopeMetricsSlice.At(j) - for k := 0; k < scopeMetrics.Metrics().Len(); k++ { - metric := scopeMetrics.Metrics().At(k) - entry := prompb.MetricMetadata{ - Type: otelMetricTypeToPromMetricType(metric), - MetricFamilyName: prometheustranslator.BuildCompliantName(metric, "", addMetricSuffixes), - Help: metric.Description(), - } - metadata = append(metadata, &entry) - } - } - } - - return metadata -} diff --git a/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go b/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go index fe973761a2..abffbe6105 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/timeseries.go @@ -39,3 +39,8 @@ func (c *PrometheusConverter) TimeSeries() []prompb.TimeSeries { return allTS } + +// Metadata returns a slice of the prompb.Metadata that were converted from OTel format. +func (c *PrometheusConverter) Metadata() []prompb.MetricMetadata { + return c.metadata +} diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index b1c8997268..f6d6cbc7e9 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -17,17 +17,17 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "strconv" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" @@ -407,7 +407,7 @@ type QueueManager struct { reshardDisableStartTimestamp atomic.Int64 // Time that reshard was disabled. reshardDisableEndTimestamp atomic.Int64 // Time that reshard is disabled until. - logger log.Logger + logger *slog.Logger flushDeadline time.Duration cfg config.QueueConfig mcfg config.MetadataConfig @@ -454,7 +454,7 @@ func NewQueueManager( metrics *queueManagerMetrics, watcherMetrics *wlog.WatcherMetrics, readerMetrics *wlog.LiveReaderMetrics, - logger log.Logger, + logger *slog.Logger, dir string, samplesIn *ewmaRate, cfg config.QueueConfig, @@ -471,7 +471,7 @@ func NewQueueManager( protoMsg config.RemoteWriteProtoMsg, ) *QueueManager { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } // Copy externalLabels into a slice, which we need for processExternalLabels. @@ -480,7 +480,7 @@ func NewQueueManager( extLabelsSlice = append(extLabelsSlice, l) }) - logger = log.With(logger, remoteName, client.Name(), endpoint, client.Endpoint()) + logger = logger.With(remoteName, client.Name(), endpoint, client.Endpoint()) t := &QueueManager{ logger: logger, flushDeadline: flushDeadline, @@ -526,7 +526,7 @@ func NewQueueManager( // ships them alongside series. If both mechanisms are set, the new one // takes precedence by implicitly disabling the older one. if t.mcfg.Send && t.protoMsg != config.RemoteWriteProtoMsgV1 { - level.Warn(logger).Log("msg", "usage of 'metadata_config.send' is redundant when using remote write v2 (or higher) as metadata will always be gathered from the WAL and included for every series within each write request") + logger.Warn("usage of 'metadata_config.send' is redundant when using remote write v2 (or higher) as metadata will always be gathered from the WAL and included for every series within each write request") t.mcfg.Send = false } @@ -567,7 +567,7 @@ func (t *QueueManager) AppendWatcherMetadata(ctx context.Context, metadata []scr err := t.sendMetadataWithBackoff(ctx, mm[i*t.mcfg.MaxSamplesPerSend:last], pBuf) if err != nil { t.metrics.failedMetadataTotal.Add(float64(last - (i * t.mcfg.MaxSamplesPerSend))) - level.Error(t.logger).Log("msg", "non-recoverable error while sending metadata", "count", last-(i*t.mcfg.MaxSamplesPerSend), "err", err) + t.logger.Error("non-recoverable error while sending metadata", "count", last-(i*t.mcfg.MaxSamplesPerSend), "err", err) } } } @@ -706,7 +706,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[s.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref) + t.logger.Info("Dropped sample for series that was not explicitly dropped via relabelling", "ref", s.Ref) t.metrics.droppedSamplesTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedSamplesTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -769,7 +769,7 @@ outer: // Track dropped exemplars in the same EWMA for sharding calc. t.dataDropped.incr(1) if _, ok := t.droppedSeries[e.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref) + t.logger.Info("Dropped exemplar for series that was not explicitly dropped via relabelling", "ref", e.Ref) t.metrics.droppedExemplarsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedExemplarsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -825,7 +825,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[h.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) + t.logger.Info("Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -880,7 +880,7 @@ outer: if !ok { t.dataDropped.incr(1) if _, ok := t.droppedSeries[h.Ref]; !ok { - level.Info(t.logger).Log("msg", "Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) + t.logger.Info("Dropped histogram for series that was not explicitly dropped via relabelling", "ref", h.Ref) t.metrics.droppedHistogramsTotal.WithLabelValues(reasonUnintentionalDroppedSeries).Inc() } else { t.metrics.droppedHistogramsTotal.WithLabelValues(reasonDroppedSeries).Inc() @@ -944,8 +944,8 @@ func (t *QueueManager) Start() { // Stop stops sending samples to the remote storage and waits for pending // sends to complete. func (t *QueueManager) Stop() { - level.Info(t.logger).Log("msg", "Stopping remote storage...") - defer level.Info(t.logger).Log("msg", "Remote storage stopped.") + t.logger.Info("Stopping remote storage...") + defer t.logger.Info("Remote storage stopped.") close(t.quit) t.wg.Wait() @@ -957,13 +957,6 @@ func (t *QueueManager) Stop() { if t.mcfg.Send { t.metadataWatcher.Stop() } - - // On shutdown, release the strings in the labels from the intern pool. - t.seriesMtx.Lock() - for _, labels := range t.seriesLabels { - t.releaseLabels(labels) - } - t.seriesMtx.Unlock() t.metrics.unregister() } @@ -985,14 +978,6 @@ func (t *QueueManager) StoreSeries(series []record.RefSeries, index int) { continue } lbls := t.builder.Labels() - t.internLabels(lbls) - - // We should not ever be replacing a series labels in the map, but just - // in case we do we need to ensure we do not leak the replaced interned - // strings. - if orig, ok := t.seriesLabels[s.Ref]; ok { - t.releaseLabels(orig) - } t.seriesLabels[s.Ref] = lbls } } @@ -1037,7 +1022,6 @@ func (t *QueueManager) SeriesReset(index int) { for k, v := range t.seriesSegmentIndexes { if v < index { delete(t.seriesSegmentIndexes, k) - t.releaseLabels(t.seriesLabels[k]) delete(t.seriesLabels, k) delete(t.seriesMetadata, k) delete(t.droppedSeries, k) @@ -1059,14 +1043,6 @@ func (t *QueueManager) client() WriteClient { return t.storeClient } -func (t *QueueManager) internLabels(lbls labels.Labels) { - lbls.InternStrings(t.interner.intern) -} - -func (t *QueueManager) releaseLabels(ls labels.Labels) { - ls.ReleaseStrings(t.interner.release) -} - // processExternalLabels merges externalLabels into b. If b contains // a label in externalLabels, the value in b wins. func processExternalLabels(b *labels.Builder, externalLabels []labels.Label) { @@ -1093,10 +1069,10 @@ func (t *QueueManager) updateShardsLoop() { // to stay close to shardUpdateDuration. select { case t.reshardChan <- desiredShards: - level.Info(t.logger).Log("msg", "Remote storage resharding", "from", t.numShards, "to", desiredShards) + t.logger.Info("Remote storage resharding", "from", t.numShards, "to", desiredShards) t.numShards = desiredShards default: - level.Info(t.logger).Log("msg", "Currently resharding, skipping.") + t.logger.Info("Currently resharding, skipping.") } case <-t.quit: return @@ -1114,14 +1090,14 @@ func (t *QueueManager) shouldReshard(desiredShards int) bool { minSendTimestamp := time.Now().Add(-1 * shardUpdateDuration).Unix() lsts := t.lastSendTimestamp.Load() if lsts < minSendTimestamp { - level.Warn(t.logger).Log("msg", "Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp) + t.logger.Warn("Skipping resharding, last successful send was beyond threshold", "lastSendTimestamp", lsts, "minSendTimestamp", minSendTimestamp) return false } if disableTimestamp := t.reshardDisableEndTimestamp.Load(); time.Now().Unix() < disableTimestamp { disabledAt := time.Unix(t.reshardDisableStartTimestamp.Load(), 0) disabledFor := time.Until(time.Unix(disableTimestamp, 0)) - level.Warn(t.logger).Log("msg", "Skipping resharding, resharding is disabled while waiting for recoverable errors", "disabled_at", disabledAt, "disabled_for", disabledFor) + t.logger.Warn("Skipping resharding, resharding is disabled while waiting for recoverable errors", "disabled_at", disabledAt, "disabled_for", disabledFor) return false } return true @@ -1164,7 +1140,7 @@ func (t *QueueManager) calculateDesiredShards() int { desiredShards = timePerSample * (dataInRate*dataKeptRatio + backlogCatchup) ) t.metrics.desiredNumShards.Set(desiredShards) - level.Debug(t.logger).Log("msg", "QueueManager.calculateDesiredShards", + t.logger.Debug("QueueManager.calculateDesiredShards", "dataInRate", dataInRate, "dataOutRate", dataOutRate, "dataKeptRatio", dataKeptRatio, @@ -1182,7 +1158,7 @@ func (t *QueueManager) calculateDesiredShards() int { lowerBound = float64(t.numShards) * (1. - shardToleranceFraction) upperBound = float64(t.numShards) * (1. + shardToleranceFraction) ) - level.Debug(t.logger).Log("msg", "QueueManager.updateShardsLoop", + t.logger.Debug("QueueManager.updateShardsLoop", "lowerBound", lowerBound, "desiredShards", desiredShards, "upperBound", upperBound) desiredShards = math.Ceil(desiredShards) // Round up to be on the safe side. @@ -1193,7 +1169,7 @@ func (t *QueueManager) calculateDesiredShards() int { numShards := int(desiredShards) // Do not downshard if we are more than ten seconds back. if numShards < t.numShards && delay > 10.0 { - level.Debug(t.logger).Log("msg", "Not downsharding due to being too far behind") + t.logger.Debug("Not downsharding due to being too far behind") return t.numShards } @@ -1321,7 +1297,7 @@ func (s *shards) stop() { // Log error for any dropped samples, exemplars, or histograms. logDroppedError := func(t string, counter atomic.Uint32) { if dropped := counter.Load(); dropped > 0 { - level.Error(s.qm.logger).Log("msg", fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) + s.qm.logger.Error(fmt.Sprintf("Failed to flush all %s on shutdown", t), "count", dropped) } } logDroppedError("samples", s.samplesDroppedOnHardShutdown) @@ -1564,7 +1540,7 @@ func (s *shards) runShard(ctx context.Context, shardID int, queue *queue) { nPendingSamples, nPendingExemplars, nPendingHistograms := populateTimeSeries(batch, pendingData, s.qm.sendExemplars, s.qm.sendNativeHistograms) n := nPendingSamples + nPendingExemplars + nPendingHistograms if timer { - level.Debug(s.qm.logger).Log("msg", "runShard timer ticked, sending buffered data", "samples", nPendingSamples, + s.qm.logger.Debug("runShard timer ticked, sending buffered data", "samples", nPendingSamples, "exemplars", nPendingExemplars, "shard", shardNum, "histograms", nPendingHistograms) } _ = s.sendSamples(ctx, pendingData[:n], nPendingSamples, nPendingExemplars, nPendingHistograms, pBuf, &buf, enc) @@ -1691,9 +1667,9 @@ func (s *shards) updateMetrics(_ context.Context, err error, sampleCount, exempl s.qm.metrics.failedExemplarsTotal.Add(float64(exemplarDiff)) } if err != nil { - level.Error(s.qm.logger).Log("msg", "non-recoverable error", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff, "err", err) + s.qm.logger.Error("non-recoverable error", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff, "err", err) } else if sampleDiff+exemplarDiff+histogramDiff > 0 { - level.Error(s.qm.logger).Log("msg", "we got 2xx status code from the Receiver yet statistics indicate some dat was not written; investigation needed", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff) + s.qm.logger.Error("we got 2xx status code from the Receiver yet statistics indicate some dat was not written; investigation needed", "failedSampleCount", sampleDiff, "failedHistogramCount", histogramDiff, "failedExemplarCount", exemplarDiff) } // These counters are used to calculate the dynamic sharding, and as such @@ -2018,16 +1994,16 @@ func (t *QueueManager) sendWriteRequestWithBackoff(ctx context.Context, attempt switch { case backoffErr.retryAfter > 0: sleepDuration = backoffErr.retryAfter - level.Info(t.logger).Log("msg", "Retrying after duration specified by Retry-After header", "duration", sleepDuration) + t.logger.Info("Retrying after duration specified by Retry-After header", "duration", sleepDuration) case backoffErr.retryAfter < 0: - level.Debug(t.logger).Log("msg", "retry-after cannot be in past, retrying using default backoff mechanism") + t.logger.Debug("retry-after cannot be in past, retrying using default backoff mechanism") } // We should never reshard for a recoverable error; increasing shards could // make the problem worse, particularly if we're getting rate limited. // // reshardDisableTimestamp holds the unix timestamp until which resharding - // is diableld. We'll update that timestamp if the period we were just told + // is disabled. We'll update that timestamp if the period we were just told // to sleep for is newer than the existing disabled timestamp. reshardWaitPeriod := time.Now().Add(time.Duration(sleepDuration) * 2) if oldTS, updated := setAtomicToNewer(&t.reshardDisableEndTimestamp, reshardWaitPeriod.Unix()); updated { @@ -2047,7 +2023,7 @@ func (t *QueueManager) sendWriteRequestWithBackoff(ctx context.Context, attempt // If we make it this far, we've encountered a recoverable error and will retry. onRetry() - level.Warn(t.logger).Log("msg", "Failed to send batch, retrying", "err", err) + t.logger.Warn("Failed to send batch, retrying", "err", err) backoff = sleepDuration * 2 @@ -2147,12 +2123,12 @@ func compressPayload(tmpbuf *[]byte, inp []byte, enc Compression) (compressed [] } } -func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf *[]byte, filter func(prompb.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { +func buildWriteRequest(logger *slog.Logger, timeSeries []prompb.TimeSeries, metadata []prompb.MetricMetadata, pBuf *proto.Buffer, buf *[]byte, filter func(prompb.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms := buildTimeSeries(timeSeries, filter) if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 { - level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) + logger.Debug("dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) } req := &prompb.WriteRequest{ @@ -2185,11 +2161,11 @@ func buildWriteRequest(logger log.Logger, timeSeries []prompb.TimeSeries, metada return compressed, highest, lowest, nil } -func buildV2WriteRequest(logger log.Logger, samples []writev2.TimeSeries, labels []string, pBuf, buf *[]byte, filter func(writev2.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { +func buildV2WriteRequest(logger *slog.Logger, samples []writev2.TimeSeries, labels []string, pBuf, buf *[]byte, filter func(writev2.TimeSeries) bool, enc Compression) (compressed []byte, highest, lowest int64, _ error) { highest, lowest, timeSeries, droppedSamples, droppedExemplars, droppedHistograms := buildV2TimeSeries(samples, filter) if droppedSamples > 0 || droppedExemplars > 0 || droppedHistograms > 0 { - level.Debug(logger).Log("msg", "dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) + logger.Debug("dropped data due to their age", "droppedSamples", droppedSamples, "droppedExemplars", droppedExemplars, "droppedHistograms", droppedHistograms) } req := &writev2.Request{ diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index 032a1a92f7..8369da3f12 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -20,6 +20,7 @@ import ( "math" "math/rand" "os" + "path" "runtime/pprof" "sort" "strconv" @@ -28,13 +29,13 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/google/go-cmp/cmp" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" @@ -48,6 +49,7 @@ import ( "github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/record" + "github.com/prometheus/prometheus/tsdb/wlog" "github.com/prometheus/prometheus/util/runutil" "github.com/prometheus/prometheus/util/testutil" ) @@ -351,7 +353,7 @@ func TestMetadataDelivery(t *testing.T) { require.Equal(t, 0.0, client_testutil.ToFloat64(m.metrics.failedMetadataTotal)) require.Len(t, c.receivedMetadata, numMetadata) - // One more write than the rounded qoutient should be performed in order to get samples that didn't + // One more write than the rounded quotient should be performed in order to get samples that didn't // fit into MaxSamplesPerSend. require.Equal(t, numMetadata/config.DefaultMetadataConfig.MaxSamplesPerSend+1, c.writesReceived) // Make sure the last samples were sent. @@ -763,7 +765,7 @@ func TestDisableReshardOnRetry(t *testing.T) { onStoreCalled() return WriteResponseStats{}, RecoverableError{ - error: fmt.Errorf("fake error"), + error: errors.New("fake error"), retryAfter: model.Duration(retryAfter), } }, @@ -1326,21 +1328,25 @@ func BenchmarkSampleSend(b *testing.B) { cfg.MaxShards = 20 // todo: test with new proto type(s) - m := newTestQueueManager(b, cfg, mcfg, defaultFlushDeadline, c, config.RemoteWriteProtoMsgV1) - m.StoreSeries(series, 0) + for _, format := range []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2} { + b.Run(string(format), func(b *testing.B) { + m := newTestQueueManager(b, cfg, mcfg, defaultFlushDeadline, c, format) + m.StoreSeries(series, 0) - // These should be received by the client. - m.Start() - defer m.Stop() + // These should be received by the client. + m.Start() + defer m.Stop() - b.ResetTimer() - for i := 0; i < b.N; i++ { - m.Append(samples) - m.UpdateSeriesSegment(series, i+1) // simulate what wlog.Watcher.garbageCollectSeries does - m.SeriesReset(i + 1) + b.ResetTimer() + for i := 0; i < b.N; i++ { + m.Append(samples) + m.UpdateSeriesSegment(series, i+1) // simulate what wlog.Watcher.garbageCollectSeries does + m.SeriesReset(i + 1) + } + // Do not include shutdown + b.StopTimer() + }) } - // Do not include shutdown - b.StopTimer() } // Check how long it takes to add N series, including external labels processing. @@ -1403,31 +1409,32 @@ func BenchmarkStartup(b *testing.B) { // Find the second largest segment; we will replay up to this. // (Second largest as WALWatcher will start tailing the largest). - dirents, err := os.ReadDir(dir) + dirents, err := os.ReadDir(path.Join(dir, "wal")) require.NoError(b, err) var segments []int for _, dirent := range dirents { - if i, err := strconv.Atoi(dirent.Name()); err != nil { + if i, err := strconv.Atoi(dirent.Name()); err == nil { segments = append(segments, i) } } sort.Ints(segments) - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stdout)) - logger = log.With(logger, "caller", log.DefaultCaller) + logger := promslog.New(&promslog.Config{}) cfg := testDefaultQueueConfig() mcfg := config.DefaultMetadataConfig for n := 0; n < b.N; n++ { metrics := newQueueManagerMetrics(nil, "", "") + watcherMetrics := wlog.NewWatcherMetrics(nil) c := NewTestBlockedWriteClient() // todo: test with new proto type(s) - m := NewQueueManager(metrics, nil, nil, logger, dir, + m := NewQueueManager(metrics, watcherMetrics, nil, logger, dir, newEWMARate(ewmaWeight, shardUpdateDuration), cfg, mcfg, labels.EmptyLabels(), nil, c, 1*time.Minute, newPool(), newHighestTimestampMetric(), nil, false, false, config.RemoteWriteProtoMsgV1) m.watcher.SetStartTime(timestamp.Time(math.MaxInt64)) m.watcher.MaxSegment = segments[len(segments)-2] + m.watcher.SetMetrics() err := m.watcher.Run() require.NoError(b, err) } @@ -1849,7 +1856,7 @@ func createDummyTimeSeries(instances int) []timeSeries { } func BenchmarkBuildWriteRequest(b *testing.B) { - noopLogger := log.NewNopLogger() + noopLogger := promslog.NewNopLogger() bench := func(b *testing.B, batch []timeSeries) { buff := make([]byte, 0) seriesBuff := make([]prompb.TimeSeries, len(batch)) @@ -1859,13 +1866,6 @@ func BenchmarkBuildWriteRequest(b *testing.B) { } pBuf := proto.NewBuffer(nil) - // Warmup buffers - for i := 0; i < 10; i++ { - populateTimeSeries(batch, seriesBuff, true, true) - buildWriteRequest(noopLogger, seriesBuff, nil, pBuf, &buff, nil, "snappy") - } - - b.ResetTimer() totalSize := 0 for i := 0; i < b.N; i++ { populateTimeSeries(batch, seriesBuff, true, true) @@ -1896,46 +1896,44 @@ func BenchmarkBuildWriteRequest(b *testing.B) { } func BenchmarkBuildV2WriteRequest(b *testing.B) { - noopLogger := log.NewNopLogger() - type testcase struct { - batch []timeSeries - } - testCases := []testcase{ - {createDummyTimeSeries(2)}, - {createDummyTimeSeries(10)}, - {createDummyTimeSeries(100)}, - } - for _, tc := range testCases { + noopLogger := promslog.NewNopLogger() + bench := func(b *testing.B, batch []timeSeries) { symbolTable := writev2.NewSymbolTable() buff := make([]byte, 0) - seriesBuff := make([]writev2.TimeSeries, len(tc.batch)) + seriesBuff := make([]writev2.TimeSeries, len(batch)) for i := range seriesBuff { seriesBuff[i].Samples = []writev2.Sample{{}} seriesBuff[i].Exemplars = []writev2.Exemplar{{}} } pBuf := []byte{} - // Warmup buffers - for i := 0; i < 10; i++ { - populateV2TimeSeries(&symbolTable, tc.batch, seriesBuff, true, true) - buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") - } - - b.Run(fmt.Sprintf("%d-instances", len(tc.batch)), func(b *testing.B) { - totalSize := 0 - for j := 0; j < b.N; j++ { - populateV2TimeSeries(&symbolTable, tc.batch, seriesBuff, true, true) - b.ResetTimer() - req, _, _, err := buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") - if err != nil { - b.Fatal(err) - } - symbolTable.Reset() - totalSize += len(req) - b.ReportMetric(float64(totalSize)/float64(b.N), "compressedSize/op") + totalSize := 0 + for i := 0; i < b.N; i++ { + populateV2TimeSeries(&symbolTable, batch, seriesBuff, true, true) + req, _, _, err := buildV2WriteRequest(noopLogger, seriesBuff, symbolTable.Symbols(), &pBuf, &buff, nil, "snappy") + if err != nil { + b.Fatal(err) } - }) + totalSize += len(req) + b.ReportMetric(float64(totalSize)/float64(b.N), "compressedSize/op") + } } + + twoBatch := createDummyTimeSeries(2) + tenBatch := createDummyTimeSeries(10) + hundredBatch := createDummyTimeSeries(100) + + b.Run("2 instances", func(b *testing.B) { + bench(b, twoBatch) + }) + + b.Run("10 instances", func(b *testing.B) { + bench(b, tenBatch) + }) + + b.Run("1k instances", func(b *testing.B) { + bench(b, hundredBatch) + }) } func TestDropOldTimeSeries(t *testing.T) { diff --git a/storage/remote/read_handler.go b/storage/remote/read_handler.go index ffc64c9c3f..8f2945f974 100644 --- a/storage/remote/read_handler.go +++ b/storage/remote/read_handler.go @@ -16,13 +16,12 @@ package remote import ( "context" "errors" + "log/slog" "net/http" "slices" "strings" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" @@ -34,7 +33,7 @@ import ( ) type readHandler struct { - logger log.Logger + logger *slog.Logger queryable storage.SampleAndChunkQueryable config func() config.Config remoteReadSampleLimit int @@ -46,7 +45,7 @@ type readHandler struct { // NewReadHandler creates a http.Handler that accepts remote read requests and // writes them to the provided queryable. -func NewReadHandler(logger log.Logger, r prometheus.Registerer, queryable storage.SampleAndChunkQueryable, config func() config.Config, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame int) http.Handler { +func NewReadHandler(logger *slog.Logger, r prometheus.Registerer, queryable storage.SampleAndChunkQueryable, config func() config.Config, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame int) http.Handler { h := &readHandler{ logger: logger, queryable: queryable, @@ -140,7 +139,7 @@ func (h *readHandler) remoteReadSamples( } defer func() { if err := querier.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error on querier close", "err", err.Error()) + h.logger.Warn("Error on querier close", "err", err.Error()) } }() @@ -163,7 +162,7 @@ func (h *readHandler) remoteReadSamples( return err } for _, w := range ws { - level.Warn(h.logger).Log("msg", "Warnings on remote read query", "err", w.Error()) + h.logger.Warn("Warnings on remote read query", "err", w.Error()) } for _, ts := range resp.Results[i].Timeseries { ts.Labels = MergeLabels(ts.Labels, sortedExternalLabels) @@ -208,7 +207,7 @@ func (h *readHandler) remoteReadStreamedXORChunks(ctx context.Context, w http.Re } defer func() { if err := querier.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error on chunk querier close", "err", err.Error()) + h.logger.Warn("Error on chunk querier close", "err", err.Error()) } }() @@ -239,7 +238,7 @@ func (h *readHandler) remoteReadStreamedXORChunks(ctx context.Context, w http.Re } for _, w := range ws { - level.Warn(h.logger).Log("msg", "Warnings on chunked remote read query", "warnings", w.Error()) + h.logger.Warn("Warnings on chunked remote read query", "warnings", w.Error()) } return nil }(); err != nil { diff --git a/storage/remote/read_handler_test.go b/storage/remote/read_handler_test.go index 4cd4647e72..fd7f3ad48d 100644 --- a/storage/remote/read_handler_test.go +++ b/storage/remote/read_handler_test.go @@ -334,7 +334,7 @@ func TestStreamReadEndpoint(t *testing.T) { Type: prompb.Chunk_XOR, MinTimeMs: 7200000, MaxTimeMs: 7200000, - Data: []byte("\000\001\200\364\356\006@\307p\000\000\000\000\000\000"), + Data: []byte("\000\001\200\364\356\006@\307p\000\000\000\000\000"), }, }, }, @@ -381,7 +381,7 @@ func TestStreamReadEndpoint(t *testing.T) { Type: prompb.Chunk_XOR, MinTimeMs: 14400000, MaxTimeMs: 14400000, - Data: []byte("\000\001\200\350\335\r@\327p\000\000\000\000\000\000"), + Data: []byte("\000\001\200\350\335\r@\327p\000\000\000\000\000"), }, }, }, diff --git a/storage/remote/read_test.go b/storage/remote/read_test.go index d63cefc3fe..b78a8c6215 100644 --- a/storage/remote/read_test.go +++ b/storage/remote/read_test.go @@ -475,7 +475,9 @@ func TestSampleAndChunkQueryableClient(t *testing.T) { ) q, err := c.Querier(tc.mint, tc.maxt) require.NoError(t, err) - defer require.NoError(t, q.Close()) + defer func() { + require.NoError(t, q.Close()) + }() ss := q.Select(context.Background(), true, nil, tc.matchers...) require.NoError(t, err) diff --git a/storage/remote/storage.go b/storage/remote/storage.go index 05634f1798..14c3c87d93 100644 --- a/storage/remote/storage.go +++ b/storage/remote/storage.go @@ -18,12 +18,13 @@ import ( "crypto/md5" "encoding/hex" "fmt" + "log/slog" "sync" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/config" @@ -51,8 +52,9 @@ type startTimeCallback func() (int64, error) // Storage represents all the remote read and write endpoints. It implements // storage.Storage. type Storage struct { - logger *logging.Deduper - mtx sync.Mutex + deduper *logging.Deduper + logger *slog.Logger + mtx sync.Mutex rws *WriteStorage @@ -62,14 +64,16 @@ type Storage struct { } // NewStorage returns a remote.Storage. -func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { +func NewStorage(l *slog.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWAL bool) *Storage { if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } - logger := logging.Dedupe(l, 1*time.Minute) + deduper := logging.Dedupe(l, 1*time.Minute) + logger := slog.New(deduper) s := &Storage{ logger: logger, + deduper: deduper, localStartTimeCallback: stCallback, } s.rws = NewWriteStorage(s.logger, reg, walDir, flushDeadline, sm, metadataInWAL) @@ -196,7 +200,7 @@ func (s *Storage) LowestSentTimestamp() int64 { // Close the background processing of the storage queues. func (s *Storage) Close() error { - s.logger.Stop() + s.deduper.Stop() s.mtx.Lock() defer s.mtx.Unlock() return s.rws.Close() diff --git a/storage/remote/write.go b/storage/remote/write.go index 3d2f1fdfcd..0363095444 100644 --- a/storage/remote/write.go +++ b/storage/remote/write.go @@ -17,13 +17,14 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "sync" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -57,7 +58,7 @@ var ( // WriteStorage represents all the remote write storage. type WriteStorage struct { - logger log.Logger + logger *slog.Logger reg prometheus.Registerer mtx sync.Mutex @@ -78,9 +79,9 @@ type WriteStorage struct { } // NewWriteStorage creates and runs a WriteStorage. -func NewWriteStorage(logger log.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { +func NewWriteStorage(logger *slog.Logger, reg prometheus.Registerer, dir string, flushDeadline time.Duration, sm ReadyScrapeManager, metadataInWal bool) *WriteStorage { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } rws := &WriteStorage{ queues: make(map[string]*QueueManager), @@ -179,6 +180,7 @@ func (rws *WriteStorage) ApplyConfig(conf *config.Config) error { GoogleIAMConfig: rwConf.GoogleIAMConfig, Headers: rwConf.Headers, RetryOnRateLimit: rwConf.QueueConfig.RetryOnRateLimit, + RoundRobinDNS: rwConf.RoundRobinDNS, }) if err != nil { return err @@ -277,6 +279,7 @@ func (rws *WriteStorage) Close() error { type timestampTracker struct { writeStorage *WriteStorage + appendOptions *storage.AppendOptions samples int64 exemplars int64 histograms int64 @@ -284,6 +287,10 @@ type timestampTracker struct { highestRecvTimestamp *maxTimestamp } +func (t *timestampTracker) SetOptions(opts *storage.AppendOptions) { + t.appendOptions = opts +} + // Append implements storage.Appender. func (t *timestampTracker) Append(_ storage.SeriesRef, _ labels.Labels, ts int64, _ float64) (storage.SeriesRef, error) { t.samples++ @@ -306,14 +313,29 @@ func (t *timestampTracker) AppendHistogram(_ storage.SeriesRef, _ labels.Labels, return 0, nil } -func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { - // TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write. - // UpadteMetadata is no-op for remote write (where timestampTracker is being used) for now. +func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64) (storage.SeriesRef, error) { + t.samples++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } return 0, nil } -func (t *timestampTracker) AppendCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, _ int64) (storage.SeriesRef, error) { - // AppendCTZeroSample is no-op for remote-write for now. +func (t *timestampTracker) AppendHistogramCTZeroSample(_ storage.SeriesRef, _ labels.Labels, _, ct int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) { + t.histograms++ + if ct > t.highestTimestamp { + // Theoretically, we should never see a CT zero sample with a timestamp higher than the highest timestamp we've seen so far. + // However, we're not going to enforce that here, as it is not the responsibility of the tracker to enforce this. + t.highestTimestamp = ct + } + return 0, nil +} + +func (t *timestampTracker) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ metadata.Metadata) (storage.SeriesRef, error) { + // TODO: Add and increment a `metadata` field when we get around to wiring metadata in remote_write. + // UpdateMetadata is no-op for remote write (where timestampTracker is being used) for now. return 0, nil } diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 58fb668cc1..afb50ef265 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -18,12 +18,11 @@ import ( "errors" "fmt" "io" + "log/slog" "net/http" "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" @@ -42,7 +41,7 @@ import ( ) type writeHandler struct { - logger log.Logger + logger *slog.Logger appendable storage.Appendable samplesWithInvalidLabelsTotal prometheus.Counter @@ -58,7 +57,7 @@ const maxAheadTime = 10 * time.Minute // // NOTE(bwplotka): When accepting v2 proto and spec, partial writes are possible // as per https://prometheus.io/docs/specs/remote_write_spec_2_0/#partial-write. -func NewWriteHandler(logger log.Logger, reg prometheus.Registerer, appendable storage.Appendable, acceptedProtoMsgs []config.RemoteWriteProtoMsg) http.Handler { +func NewWriteHandler(logger *slog.Logger, reg prometheus.Registerer, appendable storage.Appendable, acceptedProtoMsgs []config.RemoteWriteProtoMsg) http.Handler { protoMsgs := map[config.RemoteWriteProtoMsg]struct{}{} for _, acc := range acceptedProtoMsgs { protoMsgs[acc] = struct{}{} @@ -119,7 +118,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { msgType, err := h.parseProtoMsg(contentType) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) return } @@ -131,7 +130,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } return ret }()) - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) } @@ -142,14 +141,14 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { // We could give http.StatusUnsupportedMediaType, but let's assume snappy by default. } else if enc != string(SnappyBlockCompression) { err := fmt.Errorf("%v encoding (compression) is not accepted by this server; only %v is acceptable", enc, SnappyBlockCompression) - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err) + h.logger.Error("Error decoding remote write request", "err", err) http.Error(w, err.Error(), http.StatusUnsupportedMediaType) } // Read the request body. body, err := io.ReadAll(r.Body) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + h.logger.Error("Error decoding remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -157,7 +156,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { decompressed, err := snappy.Decode(nil, body) if err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decompressing remote write request", "err", err.Error()) + h.logger.Error("Error decompressing remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -169,7 +168,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req prompb.WriteRequest if err := proto.Unmarshal(decompressed, &req); err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decoding v1 remote write request", "protobuf_message", msgType, "err", err.Error()) + h.logger.Error("Error decoding v1 remote write request", "protobuf_message", msgType, "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -180,7 +179,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return default: - level.Error(h.logger).Log("msg", "Error while remote writing the v1 request", "err", err.Error()) + h.logger.Error("Error while remote writing the v1 request", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -193,7 +192,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { var req writev2.Request if err := proto.Unmarshal(decompressed, &req); err != nil { // TODO(bwplotka): Add more context to responded error? - level.Error(h.logger).Log("msg", "Error decoding v2 remote write request", "protobuf_message", msgType, "err", err.Error()) + h.logger.Error("Error decoding v2 remote write request", "protobuf_message", msgType, "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -205,7 +204,7 @@ func (h *writeHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if err != nil { if errHTTPCode/5 == 100 { // 5xx - level.Error(h.logger).Log("msg", "Error while remote writing the v2 request", "err", err.Error()) + h.logger.Error("Error while remote writing the v2 request", "err", err.Error()) } http.Error(w, err.Error(), errHTTPCode) return @@ -241,11 +240,11 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are // potentially written. Perhaps unify with fixed writeV2 implementation a bit. if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { - level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String()) + h.logger.Warn("Invalid metric names or labels", "got", ls.String()) samplesWithInvalidLabels++ continue } else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate { - level.Warn(h.logger).Log("msg", "Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) + h.logger.Warn("Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel) samplesWithInvalidLabels++ continue } @@ -261,10 +260,10 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err switch { case errors.Is(err, storage.ErrOutOfOrderExemplar): outOfOrderExemplarErrs++ - level.Debug(h.logger).Log("msg", "Out of order exemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Debug("Out of order exemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) default: // Since exemplar storage is still experimental, we don't fail the request on ingestion errors - level.Debug(h.logger).Log("msg", "Error while adding exemplar in AppendExemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e), "err", err) + h.logger.Debug("Error while adding exemplar in AppendExemplar", "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e), "err", err) } } } @@ -276,7 +275,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err } if outOfOrderExemplarErrs > 0 { - _ = level.Warn(h.logger).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) + h.logger.Warn("Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) } if samplesWithInvalidLabels > 0 { h.samplesWithInvalidLabelsTotal.Add(float64(samplesWithInvalidLabels)) @@ -293,7 +292,7 @@ func (h *writeHandler) appendV1Samples(app storage.Appender, ss []prompb.Sample, if errors.Is(err, storage.ErrOutOfOrderSample) || errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { - level.Error(h.logger).Log("msg", "Out of order sample from remote write", "err", err.Error(), "series", labels.String(), "timestamp", s.Timestamp) + h.logger.Error("Out of order sample from remote write", "err", err.Error(), "series", labels.String(), "timestamp", s.Timestamp) } return err } @@ -315,7 +314,7 @@ func (h *writeHandler) appendV1Histograms(app storage.Appender, hh []prompb.Hist if errors.Is(err, storage.ErrOutOfOrderSample) || errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { - level.Error(h.logger).Log("msg", "Out of order histogram from remote write", "err", err.Error(), "series", labels.String(), "timestamp", hp.Timestamp) + h.logger.Error("Out of order histogram from remote write", "err", err.Error(), "series", labels.String(), "timestamp", hp.Timestamp) } return err } @@ -345,7 +344,7 @@ func (h *writeHandler) writeV2(ctx context.Context, req *writev2.Request) (_ Wri // On 5xx, we always rollback, because we expect // sender to retry and TSDB is not idempotent. if rerr := app.Rollback(); rerr != nil { - level.Error(h.logger).Log("msg", "writev2 rollback failed on retry-able error", "err", rerr) + h.logger.Error("writev2 rollback failed on retry-able error", "err", rerr) } return WriteResponseStats{}, errHTTPCode, err } @@ -407,7 +406,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * errors.Is(err, storage.ErrDuplicateSampleForTimestamp) || errors.Is(err, storage.ErrTooOldSample) { // TODO(bwplotka): Not too spammy log? - level.Error(h.logger).Log("msg", "Out of order sample from remote write", "err", err.Error(), "series", ls.String(), "timestamp", s.Timestamp) + h.logger.Error("Out of order sample from remote write", "err", err.Error(), "series", ls.String(), "timestamp", s.Timestamp) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } @@ -432,7 +431,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * errors.Is(err, storage.ErrOutOfBounds) || errors.Is(err, storage.ErrDuplicateSampleForTimestamp) { // TODO(bwplotka): Not too spammy log? - level.Error(h.logger).Log("msg", "Out of order histogram from remote write", "err", err.Error(), "series", ls.String(), "timestamp", hp.Timestamp) + h.logger.Error("Out of order histogram from remote write", "err", err.Error(), "series", ls.String(), "timestamp", hp.Timestamp) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } @@ -450,18 +449,18 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // Handle append error. if errors.Is(err, storage.ErrOutOfOrderExemplar) { outOfOrderExemplarErrs++ // Maintain old metrics, but technically not needed, given we fail here. - level.Error(h.logger).Log("msg", "Out of order exemplar", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Error("Out of order exemplar", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) badRequestErrs = append(badRequestErrs, fmt.Errorf("%w for series %v", err, ls.String())) continue } // TODO(bwplotka): Add strict mode which would trigger rollback of everything if needed. // For now we keep the previously released flow (just error not debug leve) of dropping them without rollback and 5xx. - level.Error(h.logger).Log("msg", "failed to ingest exemplar, emitting error log, but no error for PRW caller", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) + h.logger.Error("failed to ingest exemplar, emitting error log, but no error for PRW caller", "err", err.Error(), "series", ls.String(), "exemplar", fmt.Sprintf("%+v", e)) } m := ts.ToMetadata(req.Symbols) if _, err = app.UpdateMetadata(ref, ls, m); err != nil { - level.Debug(h.logger).Log("msg", "error while updating metadata from remote write", "err", err) + h.logger.Debug("error while updating metadata from remote write", "err", err) // Metadata is attached to each series, so since Prometheus does not reject sample without metadata information, // we don't report remote write error either. We increment metric instead. samplesWithoutMetadata += rs.AllSamples() - allSamplesSoFar @@ -469,7 +468,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * } if outOfOrderExemplarErrs > 0 { - level.Warn(h.logger).Log("msg", "Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) + h.logger.Warn("Error on ingesting out-of-order exemplars", "num_dropped", outOfOrderExemplarErrs) } h.samplesWithInvalidLabelsTotal.Add(float64(samplesWithInvalidLabels)) @@ -482,7 +481,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // NewOTLPWriteHandler creates a http.Handler that accepts OTLP write requests and // writes them to the provided appendable. -func NewOTLPWriteHandler(logger log.Logger, appendable storage.Appendable, configFunc func() config.Config) http.Handler { +func NewOTLPWriteHandler(logger *slog.Logger, appendable storage.Appendable, configFunc func() config.Config) http.Handler { rwHandler := &writeHandler{ logger: logger, appendable: appendable, @@ -496,7 +495,7 @@ func NewOTLPWriteHandler(logger log.Logger, appendable storage.Appendable, confi } type otlpWriteHandler struct { - logger log.Logger + logger *slog.Logger rwHandler *writeHandler configFunc func() config.Config } @@ -504,7 +503,7 @@ type otlpWriteHandler struct { func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { req, err := DecodeOTLPWriteRequest(r) if err != nil { - level.Error(h.logger).Log("msg", "Error decoding remote write request", "err", err.Error()) + h.logger.Error("Error decoding remote write request", "err", err.Error()) http.Error(w, err.Error(), http.StatusBadRequest) return } @@ -512,20 +511,23 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { otlpCfg := h.configFunc().OTLPConfig converter := otlptranslator.NewPrometheusConverter() - annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ - AddMetricSuffixes: true, - PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, + annots, err := converter.FromMetrics(r.Context(), req.Metrics(), otlptranslator.Settings{ + AddMetricSuffixes: true, + AllowUTF8: otlpCfg.TranslationStrategy == config.NoUTF8EscapingWithSuffixes, + PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, + KeepIdentifyingResourceAttributes: otlpCfg.KeepIdentifyingResourceAttributes, }) if err != nil { - level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) + h.logger.Warn("Error translating OTLP metrics to Prometheus write request", "err", err) } ws, _ := annots.AsStrings("", 0, 0) if len(ws) > 0 { - level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) + h.logger.Warn("Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ Timeseries: converter.TimeSeries(), + Metadata: converter.Metadata(), }) switch { @@ -535,7 +537,7 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusBadRequest) return default: - level.Error(h.logger).Log("msg", "Error appending remote write", "err", err.Error()) + h.logger.Error("Error appending remote write", "err", err.Error()) http.Error(w, err.Error(), http.StatusInternalServerError) return } diff --git a/storage/remote/write_handler_test.go b/storage/remote/write_handler_test.go index 5c89a1ab95..c40f227ea8 100644 --- a/storage/remote/write_handler_test.go +++ b/storage/remote/write_handler_test.go @@ -27,11 +27,12 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" @@ -129,7 +130,7 @@ func TestRemoteWriteHandlerHeadersHandling_V1Message(t *testing.T) { } appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -144,7 +145,7 @@ func TestRemoteWriteHandlerHeadersHandling_V1Message(t *testing.T) { } func TestRemoteWriteHandlerHeadersHandling_V2Message(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) for _, tc := range []struct { @@ -230,7 +231,7 @@ func TestRemoteWriteHandlerHeadersHandling_V2Message(t *testing.T) { } appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -255,7 +256,7 @@ func TestRemoteWriteHandler_V1Message(t *testing.T) { // in Prometheus, so keeping like this to not break existing 1.0 clients. appendable := &mockAppendable{} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -428,7 +429,7 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { }, } { t.Run(tc.desc, func(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), tc.input, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), tc.input, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) req, err := http.NewRequest("", "", bytes.NewReader(payload)) @@ -445,7 +446,7 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) { appendExemplarErr: tc.appendExemplarErr, updateMetadataErr: tc.updateMetadataErr, } - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -544,7 +545,7 @@ func TestOutOfOrderSample_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -586,7 +587,7 @@ func TestOutOfOrderExemplar_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -624,7 +625,7 @@ func TestOutOfOrderHistogram_V1Message(t *testing.T) { require.NoError(t, err) appendable := &mockAppendable{latestSample: map[uint64]int64{labels.FromStrings("__name__", "test_metric").Hash(): 100}} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -655,7 +656,7 @@ func BenchmarkRemoteWriteHandler(b *testing.B) { appendable := &mockAppendable{} // TODO: test with other proto format(s) - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() b.ResetTimer() @@ -671,8 +672,8 @@ func TestCommitErr_V1Message(t *testing.T) { req, err := http.NewRequest("", "", bytes.NewReader(payload)) require.NoError(t, err) - appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + appendable := &mockAppendable{commitErr: errors.New("commit error")} + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -685,7 +686,7 @@ func TestCommitErr_V1Message(t *testing.T) { } func TestCommitErr_V2Message(t *testing.T) { - payload, _, _, err := buildV2WriteRequest(log.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") + payload, _, _, err := buildV2WriteRequest(promslog.NewNopLogger(), writeV2RequestFixture.Timeseries, writeV2RequestFixture.Symbols, nil, nil, nil, "snappy") require.NoError(t, err) req, err := http.NewRequest("", "", bytes.NewReader(payload)) @@ -695,8 +696,8 @@ func TestCommitErr_V2Message(t *testing.T) { req.Header.Set("Content-Encoding", string(SnappyBlockCompression)) req.Header.Set(RemoteWriteVersionHeader, RemoteWriteVersion20HeaderValue) - appendable := &mockAppendable{commitErr: fmt.Errorf("commit error")} - handler := NewWriteHandler(log.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) + appendable := &mockAppendable{commitErr: errors.New("commit error")} + handler := NewWriteHandler(promslog.NewNopLogger(), nil, appendable, []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV2}) recorder := httptest.NewRecorder() handler.ServeHTTP(recorder, req) @@ -723,7 +724,7 @@ func BenchmarkRemoteWriteOOOSamples(b *testing.B) { require.NoError(b, db.Close()) }) // TODO: test with other proto format(s) - handler := NewWriteHandler(log.NewNopLogger(), nil, db.Head(), []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) + handler := NewWriteHandler(promslog.NewNopLogger(), nil, db.Head(), []config.RemoteWriteProtoMsg{config.RemoteWriteProtoMsgV1}) buf, _, _, err := buildWriteRequest(nil, genSeriesWithSample(1000, 200*time.Minute.Milliseconds()), nil, nil, nil, nil, "snappy") require.NoError(b, err) @@ -832,6 +833,10 @@ func (m *mockAppendable) Appender(_ context.Context) storage.Appender { return m } +func (m *mockAppendable) SetOptions(opts *storage.AppendOptions) { + panic("unimplemented") +} + func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if m.appendSampleErr != nil { return 0, m.appendSampleErr @@ -915,6 +920,13 @@ func (m *mockAppendable) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t return 0, nil } +func (m *mockAppendable) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + // AppendCTZeroSample is no-op for remote-write for now. + // TODO(bwplotka/arthursens): Add support for PRW 2.0 for CT zero feature (but also we might + // replace this with in-metadata CT storage, see https://github.com/prometheus/prometheus/issues/14218). + return 0, nil +} + func (m *mockAppendable) UpdateMetadata(_ storage.SeriesRef, l labels.Labels, mp metadata.Metadata) (storage.SeriesRef, error) { if m.updateMetadataErr != nil { return 0, m.updateMetadataErr diff --git a/storage/series.go b/storage/series.go index 70e3d0a199..a3dbec7088 100644 --- a/storage/series.go +++ b/storage/series.go @@ -171,6 +171,34 @@ func (it *listSeriesIterator) Seek(t int64) chunkenc.ValueType { func (it *listSeriesIterator) Err() error { return nil } +type listSeriesIteratorWithCopy struct { + *listSeriesIterator +} + +func NewListSeriesIteratorWithCopy(samples Samples) chunkenc.Iterator { + return &listSeriesIteratorWithCopy{ + listSeriesIterator: &listSeriesIterator{samples: samples, idx: -1}, + } +} + +func (it *listSeriesIteratorWithCopy) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) { + t, ih := it.listSeriesIterator.AtHistogram(nil) + if h == nil || ih == nil { + return t, ih + } + ih.CopyTo(h) + return t, h +} + +func (it *listSeriesIteratorWithCopy) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) { + t, ih := it.listSeriesIterator.AtFloatHistogram(nil) + if fh == nil || ih == nil { + return t, ih + } + ih.CopyTo(fh) + return t, fh +} + type listChunkSeriesIterator struct { chks []chunks.Meta idx int diff --git a/storage/series_test.go b/storage/series_test.go index f8ba2af67c..5309494069 100644 --- a/storage/series_test.go +++ b/storage/series_test.go @@ -14,7 +14,6 @@ package storage import ( - "fmt" "math" "testing" @@ -612,36 +611,36 @@ func testHistogramsSeriesToChunks(t *testing.T, test histogramTest) { encodedSample := encodedSamples[i] switch expectedSample := s.(type) { case hSample: - require.Equal(t, chunkenc.ValHistogram, encodedSample.Type(), "expect histogram", fmt.Sprintf("at idx %d", i)) + require.Equalf(t, chunkenc.ValHistogram, encodedSample.Type(), "expect histogram at idx %d", i) h := encodedSample.H() // Ignore counter reset if not gauge here, will check on chunk level. if expectedSample.h.CounterResetHint != histogram.GaugeType { h.CounterResetHint = histogram.UnknownCounterReset } if value.IsStaleNaN(expectedSample.h.Sum) { - require.True(t, value.IsStaleNaN(h.Sum), fmt.Sprintf("at idx %d", i)) + require.Truef(t, value.IsStaleNaN(h.Sum), "at idx %d", i) continue } - require.Equal(t, *expectedSample.h, *h, fmt.Sprintf("at idx %d", i)) + require.Equalf(t, *expectedSample.h, *h, "at idx %d", i) case fhSample: - require.Equal(t, chunkenc.ValFloatHistogram, encodedSample.Type(), "expect float histogram", fmt.Sprintf("at idx %d", i)) + require.Equalf(t, chunkenc.ValFloatHistogram, encodedSample.Type(), "expect float histogram at idx %d", i) fh := encodedSample.FH() // Ignore counter reset if not gauge here, will check on chunk level. if expectedSample.fh.CounterResetHint != histogram.GaugeType { fh.CounterResetHint = histogram.UnknownCounterReset } if value.IsStaleNaN(expectedSample.fh.Sum) { - require.True(t, value.IsStaleNaN(fh.Sum), fmt.Sprintf("at idx %d", i)) + require.Truef(t, value.IsStaleNaN(fh.Sum), "at idx %d", i) continue } - require.Equal(t, *expectedSample.fh, *fh, fmt.Sprintf("at idx %d", i)) + require.Equalf(t, *expectedSample.fh, *fh, "at idx %d", i) default: t.Error("internal error, unexpected type") } } for i, expectedCounterResetHint := range test.expectedCounterResetHeaders { - require.Equal(t, expectedCounterResetHint, getCounterResetHint(chks[i]), fmt.Sprintf("chunk at index %d", i)) + require.Equalf(t, expectedCounterResetHint, getCounterResetHint(chks[i]), "chunk at index %d", i) } } diff --git a/tracing/tracing.go b/tracing/tracing.go index 6b9319ecbd..4fdedf505b 100644 --- a/tracing/tracing.go +++ b/tracing/tracing.go @@ -16,11 +16,10 @@ package tracing import ( "context" "fmt" + "log/slog" "reflect" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/version" "go.opentelemetry.io/otel" @@ -43,14 +42,14 @@ const serviceName = "prometheus" // Manager is capable of building, (re)installing and shutting down // the tracer provider. type Manager struct { - logger log.Logger + logger *slog.Logger done chan struct{} config config.TracingConfig shutdownFunc func() error } // NewManager creates a new tracing manager. -func NewManager(logger log.Logger) *Manager { +func NewManager(logger *slog.Logger) *Manager { return &Manager{ logger: logger, done: make(chan struct{}), @@ -62,7 +61,7 @@ func NewManager(logger log.Logger) *Manager { func (m *Manager) Run() { otel.SetTextMapPropagator(propagation.TraceContext{}) otel.SetErrorHandler(otelErrHandler(func(err error) { - level.Error(m.logger).Log("msg", "OpenTelemetry handler returned an error", "err", err) + m.logger.Error("OpenTelemetry handler returned an error", "err", err.Error()) })) <-m.done } @@ -89,7 +88,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.config = cfg.TracingConfig m.shutdownFunc = nil otel.SetTracerProvider(noop.NewTracerProvider()) - level.Info(m.logger).Log("msg", "Tracing provider uninstalled.") + m.logger.Info("Tracing provider uninstalled.") return nil } @@ -102,7 +101,7 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error { m.config = cfg.TracingConfig otel.SetTracerProvider(tp) - level.Info(m.logger).Log("msg", "Successfully installed a new tracer provider.") + m.logger.Info("Successfully installed a new tracer provider.") return nil } @@ -115,10 +114,10 @@ func (m *Manager) Stop() { } if err := m.shutdownFunc(); err != nil { - level.Error(m.logger).Log("msg", "failed to shut down the tracer provider", "err", err) + m.logger.Error("failed to shut down the tracer provider", "err", err) } - level.Info(m.logger).Log("msg", "Tracing manager stopped") + m.logger.Info("Tracing manager stopped") } type otelErrHandler func(err error) diff --git a/tracing/tracing_test.go b/tracing/tracing_test.go index b7996c6104..e735e1a18a 100644 --- a/tracing/tracing_test.go +++ b/tracing/tracing_test.go @@ -16,8 +16,8 @@ package tracing import ( "testing" - "github.com/go-kit/log" config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/trace/noop" @@ -28,7 +28,7 @@ import ( func TestInstallingNewTracerProvider(t *testing.T) { tpBefore := otel.GetTracerProvider() - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -41,7 +41,7 @@ func TestInstallingNewTracerProvider(t *testing.T) { } func TestReinstallingTracerProvider(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -76,7 +76,7 @@ func TestReinstallingTracerProvider(t *testing.T) { } func TestReinstallingTracerProviderWithTLS(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -96,7 +96,7 @@ func TestReinstallingTracerProviderWithTLS(t *testing.T) { } func TestUninstallingTracerProvider(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", @@ -118,7 +118,7 @@ func TestUninstallingTracerProvider(t *testing.T) { } func TestTracerProviderShutdown(t *testing.T) { - m := NewManager(log.NewNopLogger()) + m := NewManager(promslog.NewNopLogger()) cfg := config.Config{ TracingConfig: config.TracingConfig{ Endpoint: "localhost:1234", diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 9697739e00..3863e6cd99 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -17,14 +17,13 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "path/filepath" "sync" "time" "unicode/utf8" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "go.uber.org/atomic" @@ -226,7 +225,7 @@ func (m *dbMetrics) Unregister() { // DB represents a WAL-only storage. It implements storage.DB. type DB struct { mtx sync.RWMutex - logger log.Logger + logger *slog.Logger opts *Options rs *remote.Storage @@ -251,7 +250,7 @@ type DB struct { } // Open returns a new agent.DB in the given directory. -func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir string, opts *Options) (*DB, error) { +func Open(l *slog.Logger, reg prometheus.Registerer, rs *remote.Storage, dir string, opts *Options) (*DB, error) { opts = validateOptions(opts) locker, err := tsdbutil.NewDirLocker(dir, "agent", l, reg) @@ -306,11 +305,11 @@ func Open(l log.Logger, reg prometheus.Registerer, rs *remote.Storage, dir strin } if err := db.replayWAL(); err != nil { - level.Warn(db.logger).Log("msg", "encountered WAL read error, attempting repair", "err", err) + db.logger.Warn("encountered WAL read error, attempting repair", "err", err) if err := w.Repair(err); err != nil { return nil, fmt.Errorf("repair corrupted WAL: %w", err) } - level.Info(db.logger).Log("msg", "successfully repaired WAL") + db.logger.Info("successfully repaired WAL") } go db.run() @@ -335,7 +334,7 @@ func validateOptions(opts *Options) *Options { opts.WALCompression = wlog.CompressionNone } - // Revert Stripesize to DefaultStripsize if Stripsize is either 0 or not a power of 2. + // Revert StripeSize to DefaultStripeSize if StripeSize is either 0 or not a power of 2. if opts.StripeSize <= 0 || ((opts.StripeSize & (opts.StripeSize - 1)) != 0) { opts.StripeSize = tsdb.DefaultStripeSize } @@ -359,7 +358,7 @@ func validateOptions(opts *Options) *Options { } func (db *DB) replayWAL() error { - level.Info(db.logger).Log("msg", "replaying WAL, this may take a while", "dir", db.wal.Dir()) + db.logger.Info("replaying WAL, this may take a while", "dir", db.wal.Dir()) start := time.Now() dir, startFrom, err := wlog.LastCheckpoint(db.wal.Dir()) @@ -376,7 +375,7 @@ func (db *DB) replayWAL() error { } defer func() { if err := sr.Close(); err != nil { - level.Warn(db.logger).Log("msg", "error while closing the wal segments reader", "err", err) + db.logger.Warn("error while closing the wal segments reader", "err", err) } }() @@ -386,7 +385,7 @@ func (db *DB) replayWAL() error { return fmt.Errorf("backfill checkpoint: %w", err) } startFrom++ - level.Info(db.logger).Log("msg", "WAL checkpoint loaded") + db.logger.Info("WAL checkpoint loaded") } // Find the last segment. @@ -395,7 +394,7 @@ func (db *DB) replayWAL() error { return fmt.Errorf("finding WAL segments: %w", err) } - // Backfil segments from the most recent checkpoint onwards. + // Backfill segments from the most recent checkpoint onwards. for i := startFrom; i <= last; i++ { seg, err := wlog.OpenReadSegment(wlog.SegmentName(db.wal.Dir(), i)) if err != nil { @@ -405,12 +404,12 @@ func (db *DB) replayWAL() error { sr := wlog.NewSegmentBufReader(seg) err = db.loadWAL(wlog.NewReader(sr), multiRef) if err := sr.Close(); err != nil { - level.Warn(db.logger).Log("msg", "error while closing the wal segments reader", "err", err) + db.logger.Warn("error while closing the wal segments reader", "err", err) } if err != nil { return err } - level.Info(db.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", last) + db.logger.Info("WAL segment loaded", "segment", i, "maxSegment", last) } walReplayDuration := time.Since(start) @@ -571,7 +570,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H } if v := nonExistentSeriesRefs.Load(); v > 0 { - level.Warn(db.logger).Log("msg", "found sample referencing non-existing series", "skipped_series", v) + db.logger.Warn("found sample referencing non-existing series", "skipped_series", v) } db.nextRef.Store(uint64(lastRef)) @@ -616,9 +615,9 @@ Loop: ts = maxTS } - level.Debug(db.logger).Log("msg", "truncating the WAL", "ts", ts) + db.logger.Debug("truncating the WAL", "ts", ts) if err := db.truncate(ts); err != nil { - level.Warn(db.logger).Log("msg", "failed to truncate WAL", "err", err) + db.logger.Warn("failed to truncate WAL", "err", err) } } } @@ -631,7 +630,7 @@ func (db *DB) truncate(mint int64) error { start := time.Now() db.gc(mint) - level.Info(db.logger).Log("msg", "series GC completed", "duration", time.Since(start)) + db.logger.Info("series GC completed", "duration", time.Since(start)) first, last, err := wlog.Segments(db.wal.Dir()) if err != nil { @@ -679,7 +678,7 @@ func (db *DB) truncate(mint int64) error { // If truncating fails, we'll just try it again at the next checkpoint. // Leftover segments will still just be ignored in the future if there's a // checkpoint that supersedes them. - level.Error(db.logger).Log("msg", "truncating segments failed", "err", err) + db.logger.Error("truncating segments failed", "err", err) } // The checkpoint is written and segments before it are truncated, so we @@ -696,13 +695,13 @@ func (db *DB) truncate(mint int64) error { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. They will just be ignored since a newer checkpoint // exists. - level.Error(db.logger).Log("msg", "delete old checkpoints", "err", err) + db.logger.Error("delete old checkpoints", "err", err) db.metrics.checkpointDeleteFail.Inc() } db.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) - level.Info(db.logger).Log("msg", "WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) + db.logger.Info("WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) return nil } @@ -764,6 +763,7 @@ func (db *DB) Close() error { type appender struct { *DB + hints *storage.AppendOptions pendingSeries []record.RefSeries pendingSamples []record.RefSample @@ -784,6 +784,10 @@ type appender struct { floatHistogramSeries []*memSeries } +func (a *appender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts +} + func (a *appender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) { // series references and chunk references are identical for agent mode. headRef := chunks.HeadSeriesRef(ref) @@ -977,9 +981,134 @@ func (a *appender) UpdateMetadata(storage.SeriesRef, labels.Labels, metadata.Met return 0, nil } -func (a *appender) AppendCTZeroSample(storage.SeriesRef, labels.Labels, int64, int64) (storage.SeriesRef, error) { - // TODO(bwplotka): Wire metadata in the Agent's appender. - return 0, nil +func (a *appender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if h != nil { + if err := h.Validate(); err != nil { + return 0, err + } + } + if fh != nil { + if err := fh.Validate(); err != nil { + return 0, err + } + } + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + // Ensure no empty labels have gotten through. + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + var created bool + series, created = a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: series.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + } + + series.Lock() + defer series.Unlock() + + if ct <= a.minValidTime(series.lastTs) { + return 0, storage.ErrOutOfOrderCT + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + a.pendingHistograms = append(a.pendingHistograms, record.RefHistogramSample{ + Ref: series.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, series) + case fh != nil: + a.pendingFloatHistograms = append(a.pendingFloatHistograms, record.RefFloatHistogramSample{ + Ref: series.ref, + T: ct, + FH: &histogram.FloatHistogram{}, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, series) + } + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + return storage.SeriesRef(series.ref), nil +} + +func (a *appender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) { + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + series := a.series.GetByID(chunks.HeadSeriesRef(ref)) + if series == nil { + l = l.WithoutEmpty() + if l.IsEmpty() { + return 0, fmt.Errorf("empty labelset: %w", tsdb.ErrInvalidSample) + } + + if lbl, dup := l.HasDuplicateLabelNames(); dup { + return 0, fmt.Errorf(`label name "%s" is not unique: %w`, lbl, tsdb.ErrInvalidSample) + } + + newSeries, created := a.getOrCreate(l) + if created { + a.pendingSeries = append(a.pendingSeries, record.RefSeries{ + Ref: newSeries.ref, + Labels: l, + }) + a.metrics.numActiveSeries.Inc() + } + + series = newSeries + } + + series.Lock() + defer series.Unlock() + + if t <= a.minValidTime(series.lastTs) { + a.metrics.totalOutOfOrderSamples.Inc() + return 0, storage.ErrOutOfOrderSample + } + + if ct > series.lastTs { + series.lastTs = ct + } else { + // discard the sample if it's out of order. + return 0, storage.ErrOutOfOrderCT + } + + // NOTE: always modify pendingSamples and sampleSeries together. + a.pendingSamples = append(a.pendingSamples, record.RefSample{ + Ref: series.ref, + T: ct, + V: 0, + }) + a.sampleSeries = append(a.sampleSeries, series) + + a.metrics.totalAppendedSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + + return storage.SeriesRef(series.ref), nil } // Commit submits the collected samples and purges the batch. diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index b31041b1b9..b28c29095c 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -15,21 +15,23 @@ package agent import ( "context" + "errors" "fmt" + "io" "math" "path/filepath" "strconv" "testing" "time" - "github.com/go-kit/log" - "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/model" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/storage/remote" @@ -89,12 +91,12 @@ func createTestAgentDB(t testing.TB, reg prometheus.Registerer, opts *Options) * t.Helper() dbDir := t.TempDir() - rs := remote.NewStorage(log.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), reg, startTime, dbDir, time.Second*30, nil, false) t.Cleanup(func() { require.NoError(t, rs.Close()) }) - db, err := Open(log.NewNopLogger(), reg, rs, dbDir, opts) + db, err := Open(promslog.NewNopLogger(), reg, rs, dbDir, opts) require.NoError(t, err) return db } @@ -583,7 +585,7 @@ func TestWALReplay(t *testing.T) { func TestLockfile(t *testing.T) { tsdbutil.TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*tsdbutil.DirLocker, testutil.Closer) { - logger := log.NewNopLogger() + logger := promslog.NewNopLogger() reg := prometheus.NewRegistry() rs := remote.NewStorage(logger, reg, startTime, data, time.Second*30, nil, false) t.Cleanup(func() { @@ -605,12 +607,12 @@ func TestLockfile(t *testing.T) { func Test_ExistingWAL_NextRef(t *testing.T) { dbDir := t.TempDir() - rs := remote.NewStorage(log.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) + rs := remote.NewStorage(promslog.NewNopLogger(), nil, startTime, dbDir, time.Second*30, nil, false) defer func() { require.NoError(t, rs.Close()) }() - db, err := Open(log.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) + db, err := Open(promslog.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) require.NoError(t, err) seriesCount := 10 @@ -638,9 +640,11 @@ func Test_ExistingWAL_NextRef(t *testing.T) { require.NoError(t, db.Close()) // Create a new storage and see what nextRef is initialized to. - db, err = Open(log.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) + db, err = Open(promslog.NewNopLogger(), nil, rs, dbDir, DefaultOptions()) require.NoError(t, err) - defer require.NoError(t, db.Close()) + defer func() { + require.NoError(t, db.Close()) + }() require.Equal(t, uint64(seriesCount+histogramCount), db.nextRef.Load(), "nextRef should be equal to the number of series written across the entire WAL") } @@ -932,6 +936,249 @@ func TestDBOutOfOrderTimeWindow(t *testing.T) { } } +type walSample struct { + t int64 + f float64 + h *histogram.Histogram + lbls labels.Labels + ref storage.SeriesRef +} + +func TestDBCreatedTimestampSamplesIngestion(t *testing.T) { + t.Parallel() + + type appendableSample struct { + t int64 + ct int64 + v float64 + lbls labels.Labels + h *histogram.Histogram + expectsError bool + } + + testHistogram := tsdbutil.GenerateTestHistograms(1)[0] + zeroHistogram := &histogram.Histogram{} + + lbls := labelsForTest(t.Name(), 1) + defLbls := labels.New(lbls[0]...) + + testCases := []struct { + name string + inputSamples []appendableSample + expectedSamples []*walSample + expectedSeriesCount int + }{ + { + name: "in order ct+normal sample/floatSamples", + inputSamples: []appendableSample{ + {t: 100, ct: 1, v: 10, lbls: defLbls}, + {t: 101, ct: 1, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, f: 0, lbls: defLbls}, + {t: 100, f: 10, lbls: defLbls}, + {t: 101, f: 10, lbls: defLbls}, + }, + }, + { + name: "CT+float && CT+histogram samples", + inputSamples: []appendableSample{ + { + t: 100, + ct: 30, + v: 20, + lbls: defLbls, + }, + { + t: 300, + ct: 230, + h: testHistogram, + lbls: defLbls, + }, + }, + expectedSamples: []*walSample{ + {t: 30, f: 0, lbls: defLbls}, + {t: 100, f: 20, lbls: defLbls}, + {t: 230, h: zeroHistogram, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 1, + }, + { + name: "CT+float && CT+histogram samples with error", + inputSamples: []appendableSample{ + { + // invalid CT + t: 100, + ct: 100, + v: 10, + lbls: defLbls, + expectsError: true, + }, + { + // invalid CT histogram + t: 300, + ct: 300, + h: testHistogram, + lbls: defLbls, + expectsError: true, + }, + }, + expectedSamples: []*walSample{ + {t: 100, f: 10, lbls: defLbls}, + {t: 300, h: testHistogram, lbls: defLbls}, + }, + expectedSeriesCount: 0, + }, + { + name: "In order ct+normal sample/histogram", + inputSamples: []appendableSample{ + {t: 100, h: testHistogram, ct: 1, lbls: defLbls}, + {t: 101, h: testHistogram, ct: 1, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 1, h: &histogram.Histogram{}}, + {t: 100, h: testHistogram}, + {t: 101, h: &histogram.Histogram{CounterResetHint: histogram.NotCounterReset}}, + }, + }, + { + name: "ct+normal then OOO sample/float", + inputSamples: []appendableSample{ + {t: 60_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 120_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 180_000, ct: 40_000, v: 10, lbls: defLbls}, + {t: 50_000, ct: 40_000, v: 10, lbls: defLbls}, + }, + expectedSamples: []*walSample{ + {t: 40_000, f: 0, lbls: defLbls}, + {t: 50_000, f: 10, lbls: defLbls}, + {t: 60_000, f: 10, lbls: defLbls}, + {t: 120_000, f: 10, lbls: defLbls}, + {t: 180_000, f: 10, lbls: defLbls}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 360_000 + s := createTestAgentDB(t, reg, opts) + app := s.Appender(context.TODO()) + + for _, sample := range tc.inputSamples { + // We supposed to write a Histogram to the WAL + if sample.h != nil { + _, err := app.AppendHistogramCTZeroSample(0, sample.lbls, sample.t, sample.ct, zeroHistogram, nil) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.AppendHistogram(0, sample.lbls, sample.t, sample.h, nil) + require.NoError(t, err) + } else { + // We supposed to write a float sample to the WAL + _, err := app.AppendCTZeroSample(0, sample.lbls, sample.t, sample.ct) + if !errors.Is(err, storage.ErrOutOfOrderCT) { + require.Equal(t, sample.expectsError, err != nil, "expected error: %v, got: %v", sample.expectsError, err) + } + + _, err = app.Append(0, sample.lbls, sample.t, sample.v) + require.NoError(t, err) + } + } + + require.NoError(t, app.Commit()) + // Close the DB to ensure all data is flushed to the WAL + require.NoError(t, s.Close()) + + // Check that we dont have any OOO samples in the WAL by checking metrics + families, err := reg.Gather() + require.NoError(t, err, "failed to gather metrics") + for _, f := range families { + if f.GetName() == "prometheus_agent_out_of_order_samples_total" { + t.Fatalf("unexpected metric %s", f.GetName()) + } + } + + outputSamples := readWALSamples(t, s.wal.Dir()) + + require.Equal(t, len(tc.expectedSamples), len(outputSamples), "Expected %d samples", len(tc.expectedSamples)) + + for i, expectedSample := range tc.expectedSamples { + for _, sample := range outputSamples { + if sample.t == expectedSample.t && sample.lbls.String() == expectedSample.lbls.String() { + if expectedSample.h != nil { + require.Equal(t, expectedSample.h, sample.h, "histogram value mismatch (sample index %d)", i) + } else { + require.Equal(t, expectedSample.f, sample.f, "value mismatch (sample index %d)", i) + } + } + } + } + }) + } +} + +func readWALSamples(t *testing.T, walDir string) []*walSample { + t.Helper() + sr, err := wlog.NewSegmentsReader(walDir) + require.NoError(t, err) + defer func(sr io.ReadCloser) { + err := sr.Close() + require.NoError(t, err) + }(sr) + + r := wlog.NewReader(sr) + dec := record.NewDecoder(labels.NewSymbolTable()) + + var ( + samples []record.RefSample + histograms []record.RefHistogramSample + + lastSeries record.RefSeries + outputSamples = make([]*walSample, 0) + ) + + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + series, err := dec.Series(rec, nil) + require.NoError(t, err) + lastSeries = series[0] + case record.Samples: + samples, err = dec.Samples(rec, samples[:0]) + require.NoError(t, err) + for _, s := range samples { + outputSamples = append(outputSamples, &walSample{ + t: s.T, + f: s.V, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + case record.HistogramSamples: + histograms, err = dec.HistogramSamples(rec, histograms[:0]) + require.NoError(t, err) + for _, h := range histograms { + outputSamples = append(outputSamples, &walSample{ + t: h.T, + h: h.H, + lbls: lastSeries.Labels.Copy(), + ref: storage.SeriesRef(lastSeries.Ref), + }) + } + } + } + + return outputSamples +} + func BenchmarkCreateSeries(b *testing.B) { s := createTestAgentDB(b, nil, DefaultOptions()) defer s.Close() diff --git a/tsdb/block.go b/tsdb/block.go index 2f32733f8c..6483f8d8bb 100644 --- a/tsdb/block.go +++ b/tsdb/block.go @@ -20,15 +20,16 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "slices" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -81,6 +82,10 @@ type IndexReader interface { // If no postings are found having at least one matching label, an empty iterator is returned. PostingsForLabelMatching(ctx context.Context, name string, match func(value string) bool) index.Postings + // PostingsForAllLabelValues returns a sorted iterator over all postings having a label with the given name. + // If no postings are found with the label in question, an empty iterator is returned. + PostingsForAllLabelValues(ctx context.Context, name string) index.Postings + // SortedPostings returns a postings list that is reordered to be sorted // by the label set of the underlying series. SortedPostings(index.Postings) index.Postings @@ -265,7 +270,7 @@ func readMetaFile(dir string) (*BlockMeta, int64, error) { return &m, int64(len(b)), nil } -func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error) { +func writeMetaFile(logger *slog.Logger, dir string, meta *BlockMeta) (int64, error) { meta.Version = metaVersion1 // Make any changes to the file appear atomic. @@ -273,7 +278,7 @@ func writeMetaFile(logger log.Logger, dir string, meta *BlockMeta) (int64, error tmp := path + ".tmp" defer func() { if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } }() @@ -319,7 +324,7 @@ type Block struct { indexr IndexReader tombstones tombstones.Reader - logger log.Logger + logger *slog.Logger numBytesChunks int64 numBytesIndex int64 @@ -329,9 +334,9 @@ type Block struct { // OpenBlock opens the block in the directory. It can be passed a chunk pool, which is used // to instantiate chunk structs. -func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, err error) { +func OpenBlock(logger *slog.Logger, dir string, pool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory) (pb *Block, err error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } var closers []io.Closer defer func() { @@ -350,7 +355,11 @@ func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, er } closers = append(closers, cr) - ir, err := index.NewFileReader(filepath.Join(dir, indexFilename)) + decoder := index.DecodePostingsRaw + if postingsDecoderFactory != nil { + decoder = postingsDecoderFactory(meta) + } + ir, err := index.NewFileReader(filepath.Join(dir, indexFilename), decoder) if err != nil { return nil, err } @@ -526,6 +535,10 @@ func (r blockIndexReader) PostingsForLabelMatching(ctx context.Context, name str return r.ir.PostingsForLabelMatching(ctx, name, match) } +func (r blockIndexReader) PostingsForAllLabelValues(ctx context.Context, name string) index.Postings { + return r.ir.PostingsForAllLabelValues(ctx, name) +} + func (r blockIndexReader) SortedPostings(p index.Postings) index.Postings { return r.ir.SortedPostings(p) } diff --git a/tsdb/block_test.go b/tsdb/block_test.go index f2569e35be..3e25cff3da 100644 --- a/tsdb/block_test.go +++ b/tsdb/block_test.go @@ -22,12 +22,13 @@ import ( "math/rand" "os" "path/filepath" + "slices" "sort" "strconv" "testing" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -46,7 +47,7 @@ import ( func TestBlockMetaMustNeverBeVersion2(t *testing.T) { dir := t.TempDir() - _, err := writeMetaFile(log.NewNopLogger(), dir, &BlockMeta{}) + _, err := writeMetaFile(promslog.NewNopLogger(), dir, &BlockMeta{}) require.NoError(t, err) meta, _, err := readMetaFile(dir) @@ -58,14 +59,14 @@ func TestSetCompactionFailed(t *testing.T) { tmpdir := t.TempDir() blockDir := createBlock(t, tmpdir, genSeries(1, 1, 0, 1)) - b, err := OpenBlock(nil, blockDir, nil) + b, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) require.False(t, b.meta.Compaction.Failed) require.NoError(t, b.setCompactionFailed()) require.True(t, b.meta.Compaction.Failed) require.NoError(t, b.Close()) - b, err = OpenBlock(nil, blockDir, nil) + b, err = OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) require.True(t, b.meta.Compaction.Failed) require.NoError(t, b.Close()) @@ -73,7 +74,7 @@ func TestSetCompactionFailed(t *testing.T) { func TestCreateBlock(t *testing.T) { tmpdir := t.TempDir() - b, err := OpenBlock(nil, createBlock(t, tmpdir, genSeries(1, 1, 0, 10)), nil) + b, err := OpenBlock(nil, createBlock(t, tmpdir, genSeries(1, 1, 0, 10)), nil, nil) require.NoError(t, err) require.NoError(t, b.Close()) } @@ -83,7 +84,7 @@ func BenchmarkOpenBlock(b *testing.B) { blockDir := createBlock(b, tmpdir, genSeries(1e6, 20, 0, 10)) b.Run("benchmark", func(b *testing.B) { for i := 0; i < b.N; i++ { - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(b, err) require.NoError(b, block.Close()) } @@ -151,7 +152,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, err) require.NoError(t, f.Truncate(fi.Size()-1)) }, - iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:26, available:25"), + iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: segment doesn't include enough bytes to read the chunk - required:25, available:24"), }, { name: "checksum mismatch", @@ -169,7 +170,7 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, err) require.Equal(t, 1, n) }, - iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:cfc0526c, actual:34815eae"), + iterErr: errors.New("cannot populate chunk 8 from block 00000000000000000000000000: checksum mismatch expected:231bddcf, actual:d85ad10d"), }, } { t.Run(tc.name, func(t *testing.T) { @@ -189,9 +190,9 @@ func TestCorruptedChunk(t *testing.T) { require.NoError(t, f.Close()) // Check open err. - b, err := OpenBlock(nil, blockDir, nil) + b, err := OpenBlock(nil, blockDir, nil, nil) if tc.openErr != nil { - require.Equal(t, tc.openErr.Error(), err.Error()) + require.EqualError(t, err, tc.openErr.Error()) return } defer func() { require.NoError(t, b.Close()) }() @@ -205,7 +206,7 @@ func TestCorruptedChunk(t *testing.T) { require.True(t, set.Next()) it := set.At().Iterator(nil) require.Equal(t, chunkenc.ValNone, it.Next()) - require.Equal(t, tc.iterErr.Error(), it.Err().Error()) + require.EqualError(t, it.Err(), tc.iterErr.Error()) }) } } @@ -244,7 +245,7 @@ func TestLabelValuesWithMatchers(t *testing.T) { require.NotEmpty(t, files, "No chunk created.") // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, block.Close()) }() @@ -310,6 +311,33 @@ func TestLabelValuesWithMatchers(t *testing.T) { } } +func TestBlockQuerierReturnsSortedLabelValues(t *testing.T) { + tmpdir := t.TempDir() + ctx := context.Background() + + var seriesEntries []storage.Series + for i := 100; i > 0; i-- { + seriesEntries = append(seriesEntries, storage.NewListSeries(labels.FromStrings( + "__name__", fmt.Sprintf("value%d", i), + ), []chunks.Sample{sample{100, 0, nil, nil}})) + } + + blockDir := createBlock(t, tmpdir, seriesEntries) + + // Check open err. + block, err := OpenBlock(nil, blockDir, nil, nil) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, block.Close()) }) + + q, err := newBlockBaseQuerier(block, 0, 100) + require.NoError(t, err) + t.Cleanup(func() { require.NoError(t, q.Close()) }) + + res, _, err := q.LabelValues(ctx, "__name__", nil) + require.NoError(t, err) + require.True(t, slices.IsSorted(res)) +} + // TestBlockSize ensures that the block size is calculated correctly. func TestBlockSize(t *testing.T) { tmpdir := t.TempDir() @@ -324,7 +352,7 @@ func TestBlockSize(t *testing.T) { // Create a block and compare the reported size vs actual disk size. { blockDirInit = createBlock(t, tmpdir, genSeries(10, 1, 1, 100)) - blockInit, err = OpenBlock(nil, blockDirInit, nil) + blockInit, err = OpenBlock(nil, blockDirInit, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, blockInit.Close()) @@ -344,12 +372,12 @@ func TestBlockSize(t *testing.T) { require.NoError(t, err) require.Equal(t, expAfterDelete, actAfterDelete, "after a delete reported block size doesn't match actual disk size") - c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(t, err) blockDirsAfterCompact, err := c.Compact(tmpdir, []string{blockInit.Dir()}, nil) require.NoError(t, err) require.Len(t, blockDirsAfterCompact, 1) - blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirsAfterCompact[0].String()), nil) + blockAfterCompact, err := OpenBlock(nil, filepath.Join(tmpdir, blockDirsAfterCompact[0].String()), nil, nil) require.NoError(t, err) defer func() { require.NoError(t, blockAfterCompact.Close()) @@ -380,7 +408,7 @@ func TestReadIndexFormatV1(t *testing.T) { */ blockDir := filepath.Join("testdata", "index_format_v1") - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) q, err := NewBlockQuerier(block, 0, 1000) @@ -417,7 +445,7 @@ func BenchmarkLabelValuesWithMatchers(b *testing.B) { require.NotEmpty(b, files, "No chunk created.") // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(b, err) defer func() { require.NoError(b, block.Close()) }() @@ -469,7 +497,7 @@ func TestLabelNamesWithMatchers(t *testing.T) { require.NotEmpty(t, files, "No chunk created.") // Check open err. - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) }) @@ -523,7 +551,7 @@ func TestBlockIndexReader_PostingsForLabelMatching(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, files, "No chunk created.") - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) }) @@ -593,13 +621,13 @@ func testPostingsForLabelMatching(t *testing.T, offset storage.SeriesRef, setUp // createBlock creates a block with given set of series and returns its dir. func createBlock(tb testing.TB, dir string, series []storage.Series) string { - blockDir, err := CreateBlock(series, dir, 0, log.NewNopLogger()) + blockDir, err := CreateBlock(series, dir, 0, promslog.NewNopLogger()) require.NoError(tb, err) return blockDir } func createBlockFromHead(tb testing.TB, dir string, head *Head) string { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil, nil) + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{1000000}, nil, nil) require.NoError(tb, err) require.NoError(tb, os.MkdirAll(dir, 0o777)) @@ -613,7 +641,7 @@ func createBlockFromHead(tb testing.TB, dir string, head *Head) string { } func createBlockFromOOOHead(tb testing.TB, dir string, head *OOOCompactionHead) string { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{1000000}, nil, nil) + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{1000000}, nil, nil) require.NoError(tb, err) require.NoError(tb, os.MkdirAll(dir, 0o777)) diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 232ec2b914..63f82e28df 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -17,11 +17,10 @@ import ( "context" "errors" "fmt" + "log/slog" "math" "os" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/prometheus/model/timestamp" @@ -31,7 +30,7 @@ import ( // BlockWriter is a block writer that allows appending and flushing series to disk. type BlockWriter struct { - logger log.Logger + logger *slog.Logger destinationDir string head *Head @@ -50,7 +49,7 @@ var ErrNoSeriesAppended = errors.New("no series appended, aborting") // contains anything at all. It is the caller's responsibility to // ensure that the resulting blocks do not overlap etc. // Writer ensures the block flush is atomic (via rename). -func NewBlockWriter(logger log.Logger, dir string, blockSize int64) (*BlockWriter, error) { +func NewBlockWriter(logger *slog.Logger, dir string, blockSize int64) (*BlockWriter, error) { w := &BlockWriter{ logger: logger, destinationDir: dir, @@ -95,7 +94,7 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { // Add +1 millisecond to block maxt because block intervals are half-open: [b.MinTime, b.MaxTime). // Because of this block intervals are always +1 than the total samples it includes. maxt := w.head.MaxTime() + 1 - level.Info(w.logger).Log("msg", "flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt)) + w.logger.Info("flushing", "series_count", w.head.NumSeries(), "mint", timestamp.Time(mint), "maxt", timestamp.Time(maxt)) compactor, err := NewLeveledCompactor(ctx, nil, @@ -121,7 +120,7 @@ func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { func (w *BlockWriter) Close() error { defer func() { if err := os.RemoveAll(w.chunkDir); err != nil { - level.Error(w.logger).Log("msg", "error in deleting BlockWriter files", "err", err) + w.logger.Error("error in deleting BlockWriter files", "err", err) } }() return w.head.Close() diff --git a/tsdb/blockwriter_test.go b/tsdb/blockwriter_test.go index d8240b53c6..2704b53566 100644 --- a/tsdb/blockwriter_test.go +++ b/tsdb/blockwriter_test.go @@ -19,9 +19,10 @@ import ( "path/filepath" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" ) @@ -29,7 +30,7 @@ import ( func TestBlockWriter(t *testing.T) { ctx := context.Background() outputDir := t.TempDir() - w, err := NewBlockWriter(log.NewNopLogger(), outputDir, DefaultBlockDuration) + w, err := NewBlockWriter(promslog.NewNopLogger(), outputDir, DefaultBlockDuration) require.NoError(t, err) // Add some series. @@ -46,7 +47,7 @@ func TestBlockWriter(t *testing.T) { // Confirm the block has the correct data. blockpath := filepath.Join(outputDir, id.String()) - b, err := OpenBlock(nil, blockpath, nil) + b, err := OpenBlock(nil, blockpath, nil, nil) require.NoError(t, err) defer func() { require.NoError(t, b.Close()) }() q, err := NewBlockQuerier(b, math.MinInt64, math.MaxInt64) diff --git a/tsdb/chunkenc/bstream.go b/tsdb/chunkenc/bstream.go index 8cc59f3ea7..6e01798f72 100644 --- a/tsdb/chunkenc/bstream.go +++ b/tsdb/chunkenc/bstream.go @@ -86,8 +86,8 @@ func (b *bstream) writeBit(bit bit) { func (b *bstream) writeByte(byt byte) { if b.count == 0 { - b.stream = append(b.stream, 0) - b.count = 8 + b.stream = append(b.stream, byt) + return } i := len(b.stream) - 1 @@ -95,10 +95,8 @@ func (b *bstream) writeByte(byt byte) { // Complete the last byte with the leftmost b.count bits from byt. b.stream[i] |= byt >> (8 - b.count) - b.stream = append(b.stream, 0) - i++ // Write the remainder, if any. - b.stream[i] = byt << b.count + b.stream = append(b.stream, byt< 250 { - break - } a.Append(p.t, p.v) - i++ - j++ } - chunks = append(chunks, c) } - - fmt.Println("num", b.N, "created chunks", len(chunks)) } diff --git a/tsdb/chunkenc/float_histogram.go b/tsdb/chunkenc/float_histogram.go index f18eb77dad..0da00dcda4 100644 --- a/tsdb/chunkenc/float_histogram.go +++ b/tsdb/chunkenc/float_histogram.go @@ -15,6 +15,7 @@ package chunkenc import ( "encoding/binary" + "errors" "fmt" "math" @@ -761,9 +762,9 @@ func (a *FloatHistogramAppender) AppendFloatHistogram(prev *FloatHistogramAppend if !okToAppend || counterReset { if appendOnly { if counterReset { - return nil, false, a, fmt.Errorf("float histogram counter reset") + return nil, false, a, errors.New("float histogram counter reset") } - return nil, false, a, fmt.Errorf("float histogram schema change") + return nil, false, a, errors.New("float histogram schema change") } newChunk := NewFloatHistogramChunk() app, err := newChunk.Appender() @@ -812,7 +813,7 @@ func (a *FloatHistogramAppender) AppendFloatHistogram(prev *FloatHistogramAppend pForwardInserts, nForwardInserts, pBackwardInserts, nBackwardInserts, pMergedSpans, nMergedSpans, okToAppend := a.appendableGauge(h) if !okToAppend { if appendOnly { - return nil, false, a, fmt.Errorf("float gauge histogram schema change") + return nil, false, a, errors.New("float gauge histogram schema change") } newChunk := NewFloatHistogramChunk() app, err := newChunk.Appender() @@ -975,6 +976,7 @@ func (it *floatHistogramIterator) Reset(b []byte) { it.atFloatHistogramCalled = false it.pBuckets, it.nBuckets = nil, nil it.pSpans, it.nSpans = nil, nil + it.customValues = nil } else { it.pBuckets, it.nBuckets = it.pBuckets[:0], it.nBuckets[:0] } @@ -1070,7 +1072,7 @@ func (it *floatHistogramIterator) Next() ValueType { // The case for the 2nd sample with single deltas is implicitly handled correctly with the double delta code, // so we don't need a separate single delta logic for the 2nd sample. - // Recycle bucket and span slices that have not been returned yet. Otherwise, copy them. + // Recycle bucket, span and custom value slices that have not been returned yet. Otherwise, copy them. // We can always recycle the slices for leading and trailing bits as they are // never returned to the caller. if it.atFloatHistogramCalled { @@ -1103,6 +1105,13 @@ func (it *floatHistogramIterator) Next() ValueType { } else { it.nSpans = nil } + if len(it.customValues) > 0 { + newCustomValues := make([]float64, len(it.customValues)) + copy(newCustomValues, it.customValues) + it.customValues = newCustomValues + } else { + it.customValues = nil + } } tDod, err := readVarbitInt(&it.br) diff --git a/tsdb/chunkenc/float_histogram_test.go b/tsdb/chunkenc/float_histogram_test.go index 6092c0f636..e99fd1c160 100644 --- a/tsdb/chunkenc/float_histogram_test.go +++ b/tsdb/chunkenc/float_histogram_test.go @@ -31,22 +31,27 @@ func TestFirstFloatHistogramExplicitCounterReset(t *testing.T) { tests := map[string]struct { hint histogram.CounterResetHint expHeader CounterResetHeader + expHint histogram.CounterResetHint }{ "CounterReset": { hint: histogram.CounterReset, expHeader: CounterReset, + expHint: histogram.UnknownCounterReset, }, "NotCounterReset": { hint: histogram.NotCounterReset, expHeader: UnknownCounterReset, + expHint: histogram.UnknownCounterReset, }, "UnknownCounterReset": { hint: histogram.UnknownCounterReset, expHeader: UnknownCounterReset, + expHint: histogram.UnknownCounterReset, }, "Gauge": { hint: histogram.GaugeType, expHeader: GaugeType, + expHint: histogram.GaugeType, }, } for name, test := range tests { @@ -63,6 +68,7 @@ func TestFirstFloatHistogramExplicitCounterReset(t *testing.T) { require.False(t, recoded) require.Equal(t, app, newApp) require.Equal(t, test.expHeader, chk.GetCounterResetHeader()) + assertFirstFloatHistogramSampleHint(t, chk, test.expHint) }) } } @@ -338,7 +344,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset, histogram.UnknownCounterReset) } { // Zero threshold change. @@ -348,7 +354,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset, histogram.UnknownCounterReset) } { // New histogram that has more buckets. @@ -397,7 +403,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // New histogram that has buckets missing but the buckets missing were empty. @@ -480,7 +486,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // New histogram that has a counter reset while new buckets were added. @@ -503,7 +509,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { @@ -532,7 +538,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // New histogram that has an explicit counter reset. @@ -540,7 +546,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { h2 := h1.Copy() h2.CounterResetHint = histogram.CounterReset - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // Start new chunk explicitly, and append a new histogram that is considered appendable to the previous chunk. @@ -557,6 +563,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.Equal(t, app, newApp) assertSampleCount(t, nextChunk, 1, ValFloatHistogram) require.Equal(t, NotCounterReset, nextChunk.GetCounterResetHeader()) + assertFirstFloatHistogramSampleHint(t, nextChunk, histogram.UnknownCounterReset) } { // Start new chunk explicitly, and append a new histogram that is not considered appendable to the previous chunk. @@ -574,6 +581,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.Equal(t, app, newApp) assertSampleCount(t, nextChunk, 1, ValFloatHistogram) require.Equal(t, CounterReset, nextChunk.GetCounterResetHeader()) + assertFirstFloatHistogramSampleHint(t, nextChunk, histogram.UnknownCounterReset) } { // Start new chunk explicitly, and append a new histogram that would need recoding if we added it to the chunk. @@ -600,6 +608,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { require.Equal(t, app, newApp) assertSampleCount(t, nextChunk, 1, ValFloatHistogram) require.Equal(t, NotCounterReset, nextChunk.GetCounterResetHeader()) + assertFirstFloatHistogramSampleHint(t, nextChunk, histogram.UnknownCounterReset) } { @@ -664,7 +673,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // Custom buckets, change only in custom bounds. @@ -674,7 +683,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // Custom buckets, with more buckets. @@ -705,7 +714,7 @@ func TestFloatHistogramChunkAppendable(t *testing.T) { } } -func assertNewFloatHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *FloatHistogramAppender, ts int64, h *histogram.FloatHistogram, expectHeader CounterResetHeader) { +func assertNewFloatHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *FloatHistogramAppender, ts int64, h *histogram.FloatHistogram, expectHeader CounterResetHeader, expectHint histogram.CounterResetHint) { oldChunkBytes := oldChunk.Bytes() newChunk, recoded, newAppender, err := hApp.AppendFloatHistogram(nil, ts, h, false) require.Equal(t, oldChunkBytes, oldChunk.Bytes()) // Sanity check that previous chunk is untouched. @@ -717,6 +726,7 @@ func assertNewFloatHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *Fl require.NotNil(t, newAppender) require.NotEqual(t, hApp, newAppender) assertSampleCount(t, newChunk, 1, ValFloatHistogram) + assertFirstFloatHistogramSampleHint(t, newChunk, expectHint) } func assertNoNewFloatHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *FloatHistogramAppender, ts int64, h *histogram.FloatHistogram, expectHeader CounterResetHeader) { @@ -1023,7 +1033,7 @@ func TestFloatHistogramChunkAppendableGauge(t *testing.T) { _, _, _, _, _, _, ok := hApp.appendableGauge(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType, histogram.GaugeType) } { // Zero threshold change. @@ -1033,7 +1043,7 @@ func TestFloatHistogramChunkAppendableGauge(t *testing.T) { _, _, _, _, _, _, ok := hApp.appendableGauge(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType, histogram.GaugeType) } { // New histogram that has more buckets. @@ -1208,7 +1218,7 @@ func TestFloatHistogramChunkAppendableGauge(t *testing.T) { _, _, _, _, _, _, ok := hApp.appendableGauge(h2) require.False(t, ok) - assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType) + assertNewFloatHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType, histogram.GaugeType) } { // Custom buckets, with more buckets. @@ -1357,3 +1367,56 @@ func TestFloatHistogramUniqueSpansAfterNext(t *testing.T) { require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") require.NotSame(t, &rh1.NegativeSpans[0], &rh2.NegativeSpans[0], "NegativeSpans should be unique between histograms") } + +func TestFloatHistogramUniqueCustomValuesAfterNext(t *testing.T) { + // Create two histograms with the same schema and custom values. + h1 := &histogram.FloatHistogram{ + Schema: -53, + Count: 10, + Sum: 15.0, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []float64{1, 2, 3, 4}, + CustomValues: []float64{10, 11, 12, 13}, + } + + h2 := h1.Copy() + + // Create a chunk and append both histograms. + c := NewFloatHistogramChunk() + app, err := c.Appender() + require.NoError(t, err) + + _, _, _, err = app.AppendFloatHistogram(nil, 0, h1, false) + require.NoError(t, err) + + _, _, _, err = app.AppendFloatHistogram(nil, 1, h2, false) + require.NoError(t, err) + + // Create an iterator and advance to the first histogram. + it := c.Iterator(nil) + require.Equal(t, ValFloatHistogram, it.Next()) + _, rh1 := it.AtFloatHistogram(nil) + + // Advance to the second histogram and retrieve it. + require.Equal(t, ValFloatHistogram, it.Next()) + _, rh2 := it.AtFloatHistogram(nil) + + require.Equal(t, rh1.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh1.CustomValues, h1.CustomValues, "Returned custom values are as expected") + require.Equal(t, rh2.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh2.CustomValues, h1.CustomValues, "Returned custom values are as expected") + + // Check that the spans and custom values for h1 and h2 are unique slices. + require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") + require.NotSame(t, &rh1.CustomValues[0], &rh2.CustomValues[0], "CustomValues should be unique between histograms") +} + +func assertFirstFloatHistogramSampleHint(t *testing.T, chunk Chunk, expected histogram.CounterResetHint) { + it := chunk.Iterator(nil) + require.Equal(t, ValFloatHistogram, it.Next()) + _, v := it.AtFloatHistogram(nil) + require.Equal(t, expected, v.CounterResetHint) +} diff --git a/tsdb/chunkenc/histogram.go b/tsdb/chunkenc/histogram.go index f8796d64ec..d2eec6b75a 100644 --- a/tsdb/chunkenc/histogram.go +++ b/tsdb/chunkenc/histogram.go @@ -15,6 +15,7 @@ package chunkenc import ( "encoding/binary" + "errors" "fmt" "math" @@ -795,9 +796,9 @@ func (a *HistogramAppender) AppendHistogram(prev *HistogramAppender, t int64, h if !okToAppend || counterReset { if appendOnly { if counterReset { - return nil, false, a, fmt.Errorf("histogram counter reset") + return nil, false, a, errors.New("histogram counter reset") } - return nil, false, a, fmt.Errorf("histogram schema change") + return nil, false, a, errors.New("histogram schema change") } newChunk := NewHistogramChunk() app, err := newChunk.Appender() @@ -846,7 +847,7 @@ func (a *HistogramAppender) AppendHistogram(prev *HistogramAppender, t int64, h pForwardInserts, nForwardInserts, pBackwardInserts, nBackwardInserts, pMergedSpans, nMergedSpans, okToAppend := a.appendableGauge(h) if !okToAppend { if appendOnly { - return nil, false, a, fmt.Errorf("gauge histogram schema change") + return nil, false, a, errors.New("gauge histogram schema change") } newChunk := NewHistogramChunk() app, err := newChunk.Appender() @@ -1074,6 +1075,7 @@ func (it *histogramIterator) Reset(b []byte) { it.atHistogramCalled = false it.pBuckets, it.nBuckets = nil, nil it.pSpans, it.nSpans = nil, nil + it.customValues = nil } else { it.pBuckets = it.pBuckets[:0] it.nBuckets = it.nBuckets[:0] @@ -1081,6 +1083,7 @@ func (it *histogramIterator) Reset(b []byte) { if it.atFloatHistogramCalled { it.atFloatHistogramCalled = false it.pFloatBuckets, it.nFloatBuckets = nil, nil + it.customValues = nil } else { it.pFloatBuckets = it.pFloatBuckets[:0] it.nFloatBuckets = it.nFloatBuckets[:0] @@ -1186,8 +1189,7 @@ func (it *histogramIterator) Next() ValueType { // The case for the 2nd sample with single deltas is implicitly handled correctly with the double delta code, // so we don't need a separate single delta logic for the 2nd sample. - // Recycle bucket and span slices that have not been returned yet. Otherwise, copy them. - // copy them. + // Recycle bucket, span and custom value slices that have not been returned yet. Otherwise, copy them. if it.atFloatHistogramCalled || it.atHistogramCalled { if len(it.pSpans) > 0 { newSpans := make([]histogram.Span, len(it.pSpans)) @@ -1203,6 +1205,13 @@ func (it *histogramIterator) Next() ValueType { } else { it.nSpans = nil } + if len(it.customValues) > 0 { + newCustomValues := make([]float64, len(it.customValues)) + copy(newCustomValues, it.customValues) + it.customValues = newCustomValues + } else { + it.customValues = nil + } } if it.atHistogramCalled { diff --git a/tsdb/chunkenc/histogram_meta.go b/tsdb/chunkenc/histogram_meta.go index 8d614b817f..7bb31acf00 100644 --- a/tsdb/chunkenc/histogram_meta.go +++ b/tsdb/chunkenc/histogram_meta.go @@ -558,26 +558,16 @@ func counterResetHint(crh CounterResetHeader, numRead uint16) histogram.CounterR // In a counter histogram chunk, there will not be any counter // resets after the first histogram. return histogram.NotCounterReset - case crh == CounterReset: - // If the chunk was started because of a counter reset, we can - // safely return that hint. This histogram always has to be - // treated as a counter reset. - return histogram.CounterReset default: // Sadly, we have to return "unknown" as the hint for all other - // cases, even if we know that the chunk was started without a + // cases, even if we know that the chunk was started with or without a // counter reset. But we cannot be sure that the previous chunk - // still exists in the TSDB, so we conservatively return - // "unknown". On the bright side, this case should be relatively - // rare. + // still exists in the TSDB, or if the previous chunk was added later + // by out of order or backfill, so we conservatively return "unknown". // - // TODO(beorn7): Nevertheless, if the current chunk is in the - // middle of a block (not the first chunk in the block for this - // series), it's probably safe to assume that the previous chunk - // will exist in the TSDB for as long as the current chunk - // exist, and we could safely return - // "histogram.NotCounterReset". This needs some more work and - // might not be worth the effort and/or risk. To be vetted... + // TODO: If we can detect whether the previous and current chunk are + // actually consecutive then we could trust its hint: + // https://github.com/prometheus/prometheus/issues/15346. return histogram.UnknownCounterReset } } diff --git a/tsdb/chunkenc/histogram_meta_test.go b/tsdb/chunkenc/histogram_meta_test.go index fdbd1825aa..1774dee867 100644 --- a/tsdb/chunkenc/histogram_meta_test.go +++ b/tsdb/chunkenc/histogram_meta_test.go @@ -14,7 +14,7 @@ // The code in this file was largely written by Damian Gryski as part of // https://github.com/dgryski/go-tsz and published under the license below. // It was modified to accommodate reading from byte slices without modifying -// the underlying bytes, which would panic when reading from mmap'd +// the underlying bytes, which would panic when reading from mmapped // read-only byte slices. package chunkenc diff --git a/tsdb/chunkenc/histogram_test.go b/tsdb/chunkenc/histogram_test.go index 29b77b1583..7e8807963b 100644 --- a/tsdb/chunkenc/histogram_test.go +++ b/tsdb/chunkenc/histogram_test.go @@ -32,22 +32,27 @@ func TestFirstHistogramExplicitCounterReset(t *testing.T) { tests := map[string]struct { hint histogram.CounterResetHint expHeader CounterResetHeader + expHint histogram.CounterResetHint }{ "CounterReset": { hint: histogram.CounterReset, expHeader: CounterReset, + expHint: histogram.UnknownCounterReset, }, "NotCounterReset": { hint: histogram.NotCounterReset, expHeader: UnknownCounterReset, + expHint: histogram.UnknownCounterReset, }, "UnknownCounterReset": { hint: histogram.UnknownCounterReset, expHeader: UnknownCounterReset, + expHint: histogram.UnknownCounterReset, }, "Gauge": { hint: histogram.GaugeType, expHeader: GaugeType, + expHint: histogram.GaugeType, }, } for name, test := range tests { @@ -64,6 +69,7 @@ func TestFirstHistogramExplicitCounterReset(t *testing.T) { require.False(t, recoded) require.Equal(t, app, newApp) require.Equal(t, test.expHeader, chk.GetCounterResetHeader()) + assertFirstIntHistogramSampleHint(t, chk, test.expHint) }) } } @@ -352,7 +358,7 @@ func TestHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset, histogram.UnknownCounterReset) } { // Zero threshold change. @@ -362,7 +368,7 @@ func TestHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, UnknownCounterReset, histogram.UnknownCounterReset) } { // New histogram that has more buckets. @@ -413,7 +419,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // New histogram that has buckets missing but the buckets missing were empty. @@ -498,7 +504,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // New histogram that has a counter reset while new buckets were added. @@ -524,7 +530,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { @@ -556,7 +562,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.False(t, ok) // Need to cut a new chunk. require.True(t, cr) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // New histogram that has an explicit counter reset. @@ -564,7 +570,7 @@ func TestHistogramChunkAppendable(t *testing.T) { h2 := h1.Copy() h2.CounterResetHint = histogram.CounterReset - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // Start new chunk explicitly, and append a new histogram that is considered appendable to the previous chunk. @@ -581,6 +587,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.Equal(t, app, newApp) assertSampleCount(t, nextChunk, 1, ValHistogram) require.Equal(t, NotCounterReset, nextChunk.GetCounterResetHeader()) + assertFirstIntHistogramSampleHint(t, nextChunk, histogram.UnknownCounterReset) } { // Start new chunk explicitly, and append a new histogram that is not considered appendable to the previous chunk. @@ -598,6 +605,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.Equal(t, app, newApp) assertSampleCount(t, nextChunk, 1, ValHistogram) require.Equal(t, CounterReset, nextChunk.GetCounterResetHeader()) + assertFirstIntHistogramSampleHint(t, nextChunk, histogram.UnknownCounterReset) } { // Start new chunk explicitly, and append a new histogram that would need recoding if we added it to the chunk. @@ -627,6 +635,7 @@ func TestHistogramChunkAppendable(t *testing.T) { require.Equal(t, app, newApp) assertSampleCount(t, nextChunk, 1, ValHistogram) require.Equal(t, NotCounterReset, nextChunk.GetCounterResetHeader()) + assertFirstIntHistogramSampleHint(t, nextChunk, histogram.UnknownCounterReset) } { @@ -691,7 +700,7 @@ func TestHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // Custom buckets, change only in custom bounds. @@ -701,7 +710,7 @@ func TestHistogramChunkAppendable(t *testing.T) { _, _, _, _, ok, _ := hApp.appendable(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, CounterReset, histogram.UnknownCounterReset) } { // Custom buckets, with more buckets. @@ -732,7 +741,7 @@ func TestHistogramChunkAppendable(t *testing.T) { } } -func assertNewHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *HistogramAppender, ts int64, h *histogram.Histogram, expectHeader CounterResetHeader) { +func assertNewHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *HistogramAppender, ts int64, h *histogram.Histogram, expectHeader CounterResetHeader, expectHint histogram.CounterResetHint) { oldChunkBytes := oldChunk.Bytes() newChunk, recoded, newAppender, err := hApp.AppendHistogram(nil, ts, h, false) require.Equal(t, oldChunkBytes, oldChunk.Bytes()) // Sanity check that previous chunk is untouched. @@ -744,6 +753,7 @@ func assertNewHistogramChunkOnAppend(t *testing.T, oldChunk Chunk, hApp *Histogr require.NotNil(t, newAppender) require.NotEqual(t, hApp, newAppender) assertSampleCount(t, newChunk, 1, ValHistogram) + assertFirstIntHistogramSampleHint(t, newChunk, expectHint) } func assertNoNewHistogramChunkOnAppend(t *testing.T, currChunk Chunk, hApp *HistogramAppender, ts int64, h *histogram.Histogram, expectHeader CounterResetHeader) { @@ -1203,7 +1213,7 @@ func TestHistogramChunkAppendableGauge(t *testing.T) { _, _, _, _, _, _, ok := hApp.appendableGauge(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType, histogram.GaugeType) } { // Zero threshold change. @@ -1213,7 +1223,7 @@ func TestHistogramChunkAppendableGauge(t *testing.T) { _, _, _, _, _, _, ok := hApp.appendableGauge(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType, histogram.GaugeType) } { // New histogram that has more buckets. @@ -1388,7 +1398,7 @@ func TestHistogramChunkAppendableGauge(t *testing.T) { _, _, _, _, _, _, ok := hApp.appendableGauge(h2) require.False(t, ok) - assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType) + assertNewHistogramChunkOnAppend(t, c, hApp, ts+1, h2, GaugeType, histogram.GaugeType) } { // Custom buckets, with more buckets. @@ -1487,7 +1497,7 @@ func TestHistogramAppendOnlyErrors(t *testing.T) { }) } -func TestHistogramUniqueSpansAfterNext(t *testing.T) { +func TestHistogramUniqueSpansAfterNextWithAtHistogram(t *testing.T) { // Create two histograms with the same schema and spans. h1 := &histogram.Histogram{ Schema: 1, @@ -1589,6 +1599,98 @@ func TestHistogramUniqueSpansAfterNextWithAtFloatHistogram(t *testing.T) { require.NotSame(t, &rh1.NegativeSpans[0], &rh2.NegativeSpans[0], "NegativeSpans should be unique between histograms") } +func TestHistogramUniqueCustomValuesAfterNextWithAtHistogram(t *testing.T) { + // Create two histograms with the same schema and custom values. + h1 := &histogram.Histogram{ + Schema: -53, + Count: 10, + Sum: 15.0, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 2, 3, 4}, + CustomValues: []float64{10, 11, 12, 13}, + } + + h2 := h1.Copy() + + // Create a chunk and append both histograms. + c := NewHistogramChunk() + app, err := c.Appender() + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 0, h1, false) + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 1, h2, false) + require.NoError(t, err) + + // Create an iterator and advance to the first histogram. + it := c.Iterator(nil) + require.Equal(t, ValHistogram, it.Next()) + _, rh1 := it.AtHistogram(nil) + + // Advance to the second histogram and retrieve it. + require.Equal(t, ValHistogram, it.Next()) + _, rh2 := it.AtHistogram(nil) + + require.Equal(t, rh1.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh1.CustomValues, h1.CustomValues, "Returned custom values are as expected") + require.Equal(t, rh2.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh2.CustomValues, h1.CustomValues, "Returned custom values are as expected") + + // Check that the spans and custom values for h1 and h2 are unique slices. + require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") + require.NotSame(t, &rh1.CustomValues[0], &rh2.CustomValues[0], "CustomValues should be unique between histograms") +} + +func TestHistogramUniqueCustomValuesAfterNextWithAtFloatHistogram(t *testing.T) { + // Create two histograms with the same schema and custom values. + h1 := &histogram.Histogram{ + Schema: -53, + Count: 10, + Sum: 15.0, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 2, 3, 4}, + CustomValues: []float64{10, 11, 12, 13}, + } + + h2 := h1.Copy() + + // Create a chunk and append both histograms. + c := NewHistogramChunk() + app, err := c.Appender() + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 0, h1, false) + require.NoError(t, err) + + _, _, _, err = app.AppendHistogram(nil, 1, h2, false) + require.NoError(t, err) + + // Create an iterator and advance to the first histogram. + it := c.Iterator(nil) + require.Equal(t, ValHistogram, it.Next()) + _, rh1 := it.AtFloatHistogram(nil) + + // Advance to the second histogram and retrieve it. + require.Equal(t, ValHistogram, it.Next()) + _, rh2 := it.AtFloatHistogram(nil) + + require.Equal(t, rh1.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh1.CustomValues, h1.CustomValues, "Returned custom values are as expected") + require.Equal(t, rh2.PositiveSpans, h1.PositiveSpans, "Returned positive spans are as expected") + require.Equal(t, rh2.CustomValues, h1.CustomValues, "Returned custom values are as expected") + + // Check that the spans and custom values for h1 and h2 are unique slices. + require.NotSame(t, &rh1.PositiveSpans[0], &rh2.PositiveSpans[0], "PositiveSpans should be unique between histograms") + require.NotSame(t, &rh1.CustomValues[0], &rh2.CustomValues[0], "CustomValues should be unique between histograms") +} + func BenchmarkAppendable(b *testing.B) { // Create a histogram with a bunch of spans and buckets. const ( @@ -1635,3 +1737,10 @@ func BenchmarkAppendable(b *testing.B) { b.Fail() } } + +func assertFirstIntHistogramSampleHint(t *testing.T, chunk Chunk, expected histogram.CounterResetHint) { + it := chunk.Iterator(nil) + require.Equal(t, ValHistogram, it.Next()) + _, v := it.AtHistogram(nil) + require.Equal(t, expected, v.CounterResetHint) +} diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go index 3177762f81..ac75a5994b 100644 --- a/tsdb/chunkenc/xor.go +++ b/tsdb/chunkenc/xor.go @@ -14,7 +14,7 @@ // The code in this file was largely written by Damian Gryski as part of // https://github.com/dgryski/go-tsz and published under the license below. // It was modified to accommodate reading from byte slices without modifying -// the underlying bytes, which would panic when reading from mmap'd +// the underlying bytes, which would panic when reading from mmapped // read-only byte slices. // Copyright (c) 2015,2016 Damian Gryski @@ -191,8 +191,8 @@ func (a *xorAppender) Append(t int64, v float64) { case dod == 0: a.b.writeBit(zero) case bitRange(dod, 14): - a.b.writeBits(0b10, 2) - a.b.writeBits(uint64(dod), 14) + a.b.writeByte(0b10<<6 | (uint8(dod>>8) & (1<<6 - 1))) // 0b10 size code combined with 6 bits of dod. + a.b.writeByte(uint8(dod)) // Bottom 8 bits of dod. case bitRange(dod, 17): a.b.writeBits(0b110, 3) a.b.writeBits(uint64(dod), 17) diff --git a/tsdb/chunks/chunk_write_queue.go b/tsdb/chunks/chunk_write_queue.go index 6d2dc743b0..ba9730d936 100644 --- a/tsdb/chunks/chunk_write_queue.go +++ b/tsdb/chunks/chunk_write_queue.go @@ -24,7 +24,7 @@ import ( ) const ( - // Minimum recorded peak since the last shrinking of chunkWriteQueue.chunkrefMap to shrink it again. + // Minimum recorded peak since the last shrinking of chunkWriteQueue.chunkRefMap to shrink it again. chunkRefMapShrinkThreshold = 1000 // Minimum interval between shrinking of chunkWriteQueue.chunkRefMap. diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go index ec0f6d4036..f505d762bb 100644 --- a/tsdb/chunks/chunks.go +++ b/tsdb/chunks/chunks.go @@ -16,6 +16,7 @@ package chunks import ( "bufio" "encoding/binary" + "errors" "fmt" "hash" "hash/crc32" @@ -172,7 +173,7 @@ func ChunkFromSamplesGeneric(s Samples) (Meta, error) { return emptyChunk, err } if newChunk != nil { - return emptyChunk, fmt.Errorf("did not expect to start a second chunk") + return emptyChunk, errors.New("did not expect to start a second chunk") } case chunkenc.ValFloatHistogram: newChunk, _, ca, err = ca.AppendFloatHistogram(nil, s.Get(i).T(), s.Get(i).FH(), false) @@ -180,7 +181,7 @@ func ChunkFromSamplesGeneric(s Samples) (Meta, error) { return emptyChunk, err } if newChunk != nil { - return emptyChunk, fmt.Errorf("did not expect to start a second chunk") + return emptyChunk, errors.New("did not expect to start a second chunk") } default: panic(fmt.Sprintf("unknown sample type %s", sampleType.String())) @@ -250,7 +251,7 @@ func (cm *Meta) OverlapsClosedInterval(mint, maxt int64) bool { return cm.MinTime <= maxt && mint <= cm.MaxTime } -var errInvalidSize = fmt.Errorf("invalid size") +var errInvalidSize = errors.New("invalid size") var castagnoliTable *crc32.Table diff --git a/tsdb/chunks/samples.go b/tsdb/chunks/samples.go index 638660c70c..a5b16094df 100644 --- a/tsdb/chunks/samples.go +++ b/tsdb/chunks/samples.go @@ -29,6 +29,7 @@ type Sample interface { H() *histogram.Histogram FH() *histogram.FloatHistogram Type() chunkenc.ValueType + Copy() Sample // Returns a deep copy. } type SampleSlice []Sample @@ -70,6 +71,17 @@ func (s sample) Type() chunkenc.ValueType { } } +func (s sample) Copy() Sample { + c := sample{t: s.t, f: s.f} + if s.h != nil { + c.h = s.h.Copy() + } + if s.fh != nil { + c.fh = s.fh.Copy() + } + return c +} + // GenerateSamples starting at start and counting up numSamples. func GenerateSamples(start, numSamples int) []Sample { return generateSamples(start, numSamples, func(i int) Sample { diff --git a/tsdb/compact.go b/tsdb/compact.go index 9ef42b339b..31b445f227 100644 --- a/tsdb/compact.go +++ b/tsdb/compact.go @@ -19,15 +19,15 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "path/filepath" "slices" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -80,13 +80,14 @@ type Compactor interface { // LeveledCompactor implements the Compactor interface. type LeveledCompactor struct { metrics *CompactorMetrics - logger log.Logger + logger *slog.Logger ranges []int64 chunkPool chunkenc.Pool ctx context.Context maxBlockChunkSegmentSize int64 mergeFunc storage.VerticalChunkSeriesMergeFunc postingsEncoder index.PostingsEncoder + postingsDecoderFactory PostingsDecoderFactory enableOverlappingCompaction bool } @@ -158,6 +159,9 @@ type LeveledCompactorOptions struct { // PE specifies the postings encoder. It is called when compactor is writing out the postings for a label name/value pair during compaction. // If it is nil then the default encoder is used. At the moment that is the "raw" encoder. See index.EncodePostingsRaw for more. PE index.PostingsEncoder + // PD specifies the postings decoder factory to return different postings decoder based on BlockMeta. It is called when opening a block or opening the index file. + // If it is nil then a default decoder is used, compatible with Prometheus v2. + PD PostingsDecoderFactory // MaxBlockChunkSegmentSize is the max block chunk segment size. If it is 0 then the default chunks.DefaultChunkSegmentSize is used. MaxBlockChunkSegmentSize int64 // MergeFunc is used for merging series together in vertical compaction. By default storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge) is used. @@ -167,7 +171,13 @@ type LeveledCompactorOptions struct { EnableOverlappingCompaction bool } -func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { +type PostingsDecoderFactory func(meta *BlockMeta) index.PostingsDecoder + +func DefaultPostingsDecoderFactory(_ *BlockMeta) index.PostingsDecoder { + return index.DecodePostingsRaw +} + +func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, maxBlockChunkSegmentSize int64, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MaxBlockChunkSegmentSize: maxBlockChunkSegmentSize, MergeFunc: mergeFunc, @@ -175,22 +185,22 @@ func NewLeveledCompactorWithChunkSize(ctx context.Context, r prometheus.Register }) } -func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { +func NewLeveledCompactor(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc) (*LeveledCompactor, error) { return NewLeveledCompactorWithOptions(ctx, r, l, ranges, pool, LeveledCompactorOptions{ MergeFunc: mergeFunc, EnableOverlappingCompaction: true, }) } -func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { +func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts LeveledCompactorOptions) (*LeveledCompactor, error) { if len(ranges) == 0 { - return nil, fmt.Errorf("at least one range must be provided") + return nil, errors.New("at least one range must be provided") } if pool == nil { pool = chunkenc.NewPool() } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } mergeFunc := opts.MergeFunc if mergeFunc == nil { @@ -213,6 +223,7 @@ func NewLeveledCompactorWithOptions(ctx context.Context, r prometheus.Registerer maxBlockChunkSegmentSize: maxBlockChunkSegmentSize, mergeFunc: mergeFunc, postingsEncoder: pe, + postingsDecoderFactory: opts.PD, enableOverlappingCompaction: opts.EnableOverlappingCompaction, }, nil } @@ -477,7 +488,7 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, if b == nil { var err error - b, err = OpenBlock(c.logger, d, c.chunkPool) + b, err = OpenBlock(c.logger, d, c.chunkPool, c.postingsDecoderFactory) if err != nil { return nil, err } @@ -500,15 +511,15 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, b.meta.Compaction.Deletable = true n, err := writeMetaFile(c.logger, b.dir, &b.meta) if err != nil { - level.Error(c.logger).Log( - "msg", "Failed to write 'Deletable' to meta file after compaction", + c.logger.Error( + "Failed to write 'Deletable' to meta file after compaction", "ulid", b.meta.ULID, ) } b.numBytesMeta = n } - level.Info(c.logger).Log( - "msg", "compact blocks resulted in empty block", + c.logger.Info( + "compact blocks resulted in empty block", "count", len(blocks), "sources", fmt.Sprintf("%v", uids), "duration", time.Since(start), @@ -516,8 +527,8 @@ func (c *LeveledCompactor) CompactWithBlockPopulator(dest string, dirs []string, return nil, nil } - level.Info(c.logger).Log( - "msg", "compact blocks", + c.logger.Info( + "compact blocks", "count", len(blocks), "mint", meta.MinTime, "maxt", meta.MaxTime, @@ -568,8 +579,8 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b } if meta.Stats.NumSamples == 0 { - level.Info(c.logger).Log( - "msg", "write block resulted in empty block", + c.logger.Info( + "write block resulted in empty block", "mint", meta.MinTime, "maxt", meta.MaxTime, "duration", time.Since(start), @@ -577,8 +588,8 @@ func (c *LeveledCompactor) Write(dest string, b BlockReader, mint, maxt int64, b return nil, nil } - level.Info(c.logger).Log( - "msg", "write block", + c.logger.Info( + "write block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, @@ -617,7 +628,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl // RemoveAll returns no error when tmp doesn't exist so it is safe to always run it. if err := os.RemoveAll(tmp); err != nil { - level.Error(c.logger).Log("msg", "removed tmp folder after failed compaction", "err", err.Error()) + c.logger.Error("removed tmp folder after failed compaction", "err", err.Error()) } c.metrics.Ran.Inc() c.metrics.Duration.Observe(time.Since(t).Seconds()) @@ -722,7 +733,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blockPopulator Bl } type BlockPopulator interface { - PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) error + PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger *slog.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) error } // IndexReaderPostingsFunc is a function to get a sorted posting iterator from a given index reader. @@ -743,7 +754,7 @@ type DefaultBlockPopulator struct{} // PopulateBlock fills the index and chunk writers with new data gathered as the union // of the provided blocks. It returns meta information for the new block. // It expects sorted blocks input by mint. -func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger log.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) (err error) { +func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *CompactorMetrics, logger *slog.Logger, chunkPool chunkenc.Pool, mergeFunc storage.VerticalChunkSeriesMergeFunc, blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter, postingsFunc IndexReaderPostingsFunc) (err error) { if len(blocks) == 0 { return errors.New("cannot populate block from no readers") } @@ -776,7 +787,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa if i > 0 && b.Meta().MinTime < globalMaxt { metrics.OverlappingBlocks.Inc() overlapping = true - level.Info(logger).Log("msg", "Found overlapping blocks during compaction", "ulid", meta.ULID) + logger.Info("Found overlapping blocks during compaction", "ulid", meta.ULID) } if b.Meta().MaxTime > globalMaxt { globalMaxt = b.Meta().MaxTime @@ -831,7 +842,7 @@ func (c DefaultBlockPopulator) PopulateBlock(ctx context.Context, metrics *Compa if len(sets) > 1 { // Merge series using specified chunk series merger. // The default one is the compacting series merger. - set = storage.NewMergeChunkSeriesSet(sets, mergeFunc) + set = storage.NewMergeChunkSeriesSet(sets, 0, mergeFunc) } // Iterate over all sorted chunk series. diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index e7998abf7d..310fe8f25a 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -28,9 +28,9 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/histogram" @@ -434,7 +434,7 @@ func TestRangeWithFailedCompactionWontGetSelected(t *testing.T) { } func TestCompactionFailWillCleanUpTempDir(t *testing.T) { - compactor, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{ + compactor, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{ 20, 60, 240, @@ -1045,8 +1045,7 @@ func TestCompaction_populateBlock(t *testing.T) { } err = blockPopulator.PopulateBlock(c.ctx, c.metrics, c.logger, c.chunkPool, c.mergeFunc, blocks, meta, iw, nopChunkWriter{}, irPostingsFunc) if tc.expErr != nil { - require.Error(t, err) - require.Equal(t, tc.expErr.Error(), err.Error()) + require.EqualError(t, err, tc.expErr.Error()) return } require.NoError(t, err) @@ -1154,7 +1153,7 @@ func BenchmarkCompaction(b *testing.B) { blockDirs := make([]string, 0, len(c.ranges)) var blocks []*Block for _, r := range c.ranges { - block, err := OpenBlock(nil, createBlock(b, dir, genSeries(nSeries, 10, r[0], r[1])), nil) + block, err := OpenBlock(nil, createBlock(b, dir, genSeries(nSeries, 10, r[0], r[1])), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer func() { @@ -1163,7 +1162,7 @@ func BenchmarkCompaction(b *testing.B) { blockDirs = append(blockDirs, block.Dir()) } - c, err := NewLeveledCompactor(context.Background(), nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(context.Background(), nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(b, err) b.ResetTimer() @@ -1319,7 +1318,7 @@ func TestCancelCompactions(t *testing.T) { // Measure the compaction time without interrupting it. var timeCompactionUninterrupted time.Duration { - db, err := open(tmpdir, log.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) + db, err := open(tmpdir, promslog.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 3, "initial block count mismatch") require.Equal(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.Ran), "initial compaction counter mismatch") @@ -1338,7 +1337,7 @@ func TestCancelCompactions(t *testing.T) { } // Measure the compaction time when closing the db in the middle of compaction. { - db, err := open(tmpdirCopy, log.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) + db, err := open(tmpdirCopy, promslog.NewNopLogger(), nil, DefaultOptions(), []int64{1, 2000}, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 3, "initial block count mismatch") require.Equal(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.Ran), "initial compaction counter mismatch") @@ -1359,7 +1358,7 @@ func TestCancelCompactions(t *testing.T) { // This checks that the `context.Canceled` error is properly checked at all levels: // - tsdb_errors.NewMulti() should have the Is() method implemented for correct checks. // - callers should check with errors.Is() instead of ==. - readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", log.NewNopLogger()) + readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", promslog.NewNopLogger()) require.NoError(t, err) blocks, err := readOnlyDB.Blocks() require.NoError(t, err) @@ -1371,7 +1370,7 @@ func TestCancelCompactions(t *testing.T) { } // TestDeleteCompactionBlockAfterFailedReload ensures that a failed reloadBlocks immediately after a compaction -// deletes the resulting block to avoid creatings blocks with the same time range. +// deletes the resulting block to avoid creating blocks with the same time range. func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) { tests := map[string]func(*DB) int{ "Test Head Compaction": func(db *DB) int { @@ -1550,7 +1549,7 @@ func TestHeadCompactionWithHistograms(t *testing.T) { require.Len(t, ids, 1) // Open the block and query it and check the histograms. - block, err := OpenBlock(nil, path.Join(head.opts.ChunkDirRoot, ids[0].String()), nil) + block, err := OpenBlock(nil, path.Join(head.opts.ChunkDirRoot, ids[0].String()), nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, block.Close()) @@ -1912,13 +1911,13 @@ func TestCompactEmptyResultBlockWithTombstone(t *testing.T) { ctx := context.Background() tmpdir := t.TempDir() blockDir := createBlock(t, tmpdir, genSeries(1, 1, 0, 10)) - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) // Write tombstone covering the whole block. err = block.Delete(ctx, 0, 10, labels.MustNewMatcher(labels.MatchEqual, defaultLabelName, "0")) require.NoError(t, err) - c, err := NewLeveledCompactor(ctx, nil, log.NewNopLogger(), []int64{0}, nil, nil) + c, err := NewLeveledCompactor(ctx, nil, promslog.NewNopLogger(), []int64{0}, nil, nil) require.NoError(t, err) ulids, err := c.Compact(tmpdir, []string{blockDir}, []*Block{block}) @@ -2114,7 +2113,7 @@ func TestDelayedCompactionDoesNotBlockUnrelatedOps(t *testing.T) { t.Parallel() tmpdir := t.TempDir() - // Some blocks that need compation are present. + // Some blocks that need compaction are present. createBlock(t, tmpdir, genSeries(1, 1, 0, 100)) createBlock(t, tmpdir, genSeries(1, 1, 100, 200)) createBlock(t, tmpdir, genSeries(1, 1, 200, 300)) @@ -2122,7 +2121,7 @@ func TestDelayedCompactionDoesNotBlockUnrelatedOps(t *testing.T) { options := DefaultOptions() // This will make the test timeout if compaction really waits for it. options.CompactionDelay = time.Hour - db, err := open(tmpdir, log.NewNopLogger(), nil, options, []int64{10, 200}, nil) + db, err := open(tmpdir, promslog.NewNopLogger(), nil, options, []int64{10, 200}, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) diff --git a/tsdb/db.go b/tsdb/db.go index a5b3a5e602..2cfa4f3638 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -20,6 +20,7 @@ import ( "fmt" "io" "io/fs" + "log/slog" "math" "math/rand" "os" @@ -29,10 +30,9 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "go.uber.org/atomic" "golang.org/x/sync/errgroup" @@ -52,6 +52,9 @@ const ( // DefaultBlockDuration in milliseconds. DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond) + // DefaultCompactionDelayMaxPercent in percentage. + DefaultCompactionDelayMaxPercent = 10 + // Block dir suffixes to make deletion and creation operations atomic. // We decided to do suffixes instead of creating meta.json as last (or delete as first) one, // because in error case you still can recover meta.json from the block content within local TSDB dir. @@ -86,7 +89,9 @@ func DefaultOptions() *Options { EnableOverlappingCompaction: true, EnableSharding: false, EnableDelayedCompaction: false, + CompactionDelayMaxPercent: DefaultCompactionDelayMaxPercent, CompactionDelay: time.Duration(0), + PostingsDecoderFactory: DefaultPostingsDecoderFactory, } } @@ -173,6 +178,12 @@ type Options struct { // EnableNativeHistograms enables the ingestion of native histograms. EnableNativeHistograms bool + // EnableOOONativeHistograms enables the ingestion of OOO native histograms. + // It will only take effect if EnableNativeHistograms is set to true and the + // OutOfOrderTimeWindow is > 0. This flag will be removed after testing of + // OOO Native Histogram ingestion is complete. + EnableOOONativeHistograms bool + // OutOfOrderTimeWindow specifies how much out of order is allowed, if any. // This can change during run-time, so this value from here should only be used // while initialising. @@ -198,6 +209,8 @@ type Options struct { // CompactionDelay delays the start time of auto compactions. // It can be increased by up to one minute if the DB does not commit too often. CompactionDelay time.Duration + // CompactionDelayMaxPercent is the upper limit for CompactionDelay, specified as a percentage of the head chunk range. + CompactionDelayMaxPercent int // NewCompactorFunc is a function that returns a TSDB compactor. NewCompactorFunc NewCompactorFunc @@ -207,9 +220,13 @@ type Options struct { // BlockChunkQuerierFunc is a function to return storage.ChunkQuerier from a BlockReader. BlockChunkQuerierFunc BlockChunkQuerierFunc + + // PostingsDecoderFactory allows users to customize postings decoders based on BlockMeta. + // By default, DefaultPostingsDecoderFactory will be used to create raw posting decoder. + PostingsDecoderFactory PostingsDecoderFactory } -type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) +type NewCompactorFunc func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) type BlocksToDeleteFunc func(blocks []*Block) map[ulid.ULID]struct{} @@ -223,7 +240,7 @@ type DB struct { dir string locker *tsdbutil.DirLocker - logger log.Logger + logger *slog.Logger metrics *dbMetrics opts *Options chunkPool chunkenc.Pool @@ -414,7 +431,7 @@ var ErrClosed = errors.New("db already closed") // Current implementation doesn't support concurrency so // all API calls should happen in the same go routine. type DBReadOnly struct { - logger log.Logger + logger *slog.Logger dir string sandboxDir string closers []io.Closer @@ -422,7 +439,7 @@ type DBReadOnly struct { } // OpenDBReadOnly opens DB in the given directory for read only operations. -func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, error) { +func OpenDBReadOnly(dir, sandboxDirRoot string, l *slog.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { return nil, fmt.Errorf("opening the db dir: %w", err) } @@ -436,7 +453,7 @@ func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, erro } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } return &DBReadOnly{ @@ -621,7 +638,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { return nil, ErrClosed default: } - loadable, corrupted, err := openBlocks(db.logger, db.dir, nil, nil) + loadable, corrupted, err := openBlocks(db.logger, db.dir, nil, nil, DefaultPostingsDecoderFactory) if err != nil { return nil, err } @@ -635,7 +652,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { if len(corrupted) > 0 { for _, b := range loadable { if err := b.Close(); err != nil { - level.Warn(db.logger).Log("msg", "Closing block failed", "err", err, "block", b) + db.logger.Warn("Closing block failed", "err", err, "block", b) } } errs := tsdb_errors.NewMulti() @@ -667,7 +684,7 @@ func (db *DBReadOnly) Blocks() ([]BlockReader, error) { blockMetas = append(blockMetas, b.Meta()) } if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 { - level.Warn(db.logger).Log("msg", "Overlapping blocks found during opening", "detail", overlaps.String()) + db.logger.Warn("Overlapping blocks found during opening", "detail", overlaps.String()) } // Close all previously open readers and add the new ones to the cache. @@ -719,7 +736,7 @@ func (db *DBReadOnly) LastBlockID() (string, error) { } // Block returns a block reader by given block id. -func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { +func (db *DBReadOnly) Block(blockID string, postingsDecoderFactory PostingsDecoderFactory) (BlockReader, error) { select { case <-db.closed: return nil, ErrClosed @@ -731,7 +748,7 @@ func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { return nil, fmt.Errorf("invalid block ID %s", blockID) } - block, err := OpenBlock(db.logger, filepath.Join(db.dir, blockID), nil) + block, err := OpenBlock(db.logger, filepath.Join(db.dir, blockID), nil, postingsDecoderFactory) if err != nil { return nil, err } @@ -745,7 +762,7 @@ func (db *DBReadOnly) Close() error { defer func() { // Delete the temporary sandbox directory that was created when opening the DB. if err := os.RemoveAll(db.sandboxDir); err != nil { - level.Error(db.logger).Log("msg", "delete sandbox dir", "err", err) + db.logger.Error("delete sandbox dir", "err", err) } }() select { @@ -759,7 +776,7 @@ func (db *DBReadOnly) Close() error { } // Open returns a new DB in the given directory. If options are empty, DefaultOptions will be used. -func Open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, stats *DBStats) (db *DB, err error) { +func Open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, stats *DBStats) (db *DB, err error) { var rngs []int64 opts, rngs = validateOpts(opts, nil) @@ -809,12 +826,12 @@ func validateOpts(opts *Options, rngs []int64) (*Options, []int64) { // open returns a new DB in the given directory. // It initializes the lockfile, WAL, compactor, and Head (by replaying the WAL), and runs the database. // It is not safe to open more than one DB in the same directory. -func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs []int64, stats *DBStats) (_ *DB, returnedErr error) { +func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rngs []int64, stats *DBStats) (_ *DB, returnedErr error) { if err := os.MkdirAll(dir, 0o777); err != nil { return nil, err } if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if stats == nil { stats = NewDBStats() @@ -890,6 +907,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs db.compactor, err = NewLeveledCompactorWithOptions(ctx, r, l, rngs, db.chunkPool, LeveledCompactorOptions{ MaxBlockChunkSegmentSize: opts.MaxBlockChunkSegmentSize, EnableOverlappingCompaction: opts.EnableOverlappingCompaction, + PD: opts.PostingsDecoderFactory, }) } if err != nil { @@ -948,6 +966,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs headOpts.MaxExemplars.Store(opts.MaxExemplars) headOpts.EnableMemorySnapshotOnShutdown = opts.EnableMemorySnapshotOnShutdown headOpts.EnableNativeHistograms.Store(opts.EnableNativeHistograms) + headOpts.EnableOOONativeHistograms.Store(opts.EnableOOONativeHistograms) headOpts.OutOfOrderTimeWindow.Store(opts.OutOfOrderTimeWindow) headOpts.OutOfOrderCapMax.Store(opts.OutOfOrderCapMax) headOpts.EnableSharding = opts.EnableSharding @@ -991,17 +1010,17 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs db.head.metrics.walCorruptionsTotal.Inc() var e *errLoadWbl if errors.As(initErr, &e) { - level.Warn(db.logger).Log("msg", "Encountered WBL read error, attempting repair", "err", initErr) + db.logger.Warn("Encountered WBL read error, attempting repair", "err", initErr) if err := wbl.Repair(e.err); err != nil { return nil, fmt.Errorf("repair corrupted WBL: %w", err) } - level.Info(db.logger).Log("msg", "Successfully repaired WBL") + db.logger.Info("Successfully repaired WBL") } else { - level.Warn(db.logger).Log("msg", "Encountered WAL read error, attempting repair", "err", initErr) + db.logger.Warn("Encountered WAL read error, attempting repair", "err", initErr) if err := wal.Repair(initErr); err != nil { return nil, fmt.Errorf("repair corrupted WAL: %w", err) } - level.Info(db.logger).Log("msg", "Successfully repaired WAL") + db.logger.Info("Successfully repaired WAL") } } @@ -1019,7 +1038,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs return db, nil } -func removeBestEffortTmpDirs(l log.Logger, dir string) error { +func removeBestEffortTmpDirs(l *slog.Logger, dir string) error { files, err := os.ReadDir(dir) if os.IsNotExist(err) { return nil @@ -1030,10 +1049,10 @@ func removeBestEffortTmpDirs(l log.Logger, dir string) error { for _, f := range files { if isTmpDir(f) { if err := os.RemoveAll(filepath.Join(dir, f.Name())); err != nil { - level.Error(l).Log("msg", "failed to delete tmp block dir", "dir", filepath.Join(dir, f.Name()), "err", err) + l.Error("failed to delete tmp block dir", "dir", filepath.Join(dir, f.Name()), "err", err) continue } - level.Info(l).Log("msg", "Found and deleted tmp block dir", "dir", filepath.Join(dir, f.Name())) + l.Info("Found and deleted tmp block dir", "dir", filepath.Join(dir, f.Name())) } } return nil @@ -1071,7 +1090,7 @@ func (db *DB) run(ctx context.Context) { case <-time.After(1 * time.Minute): db.cmtx.Lock() if err := db.reloadBlocks(); err != nil { - level.Error(db.logger).Log("msg", "reloadBlocks", "err", err) + db.logger.Error("reloadBlocks", "err", err) } db.cmtx.Unlock() @@ -1087,7 +1106,7 @@ func (db *DB) run(ctx context.Context) { db.autoCompactMtx.Lock() if db.autoCompact { if err := db.Compact(ctx); err != nil { - level.Error(db.logger).Log("msg", "compaction failed", "err", err) + db.logger.Error("compaction failed", "err", err) backoff = exponential(backoff, 1*time.Second, 1*time.Minute) } else { backoff = 0 @@ -1172,6 +1191,16 @@ func (db *DB) DisableNativeHistograms() { db.head.DisableNativeHistograms() } +// EnableOOONativeHistograms enables the ingestion of out-of-order native histograms. +func (db *DB) EnableOOONativeHistograms() { + db.head.EnableOOONativeHistograms() +} + +// DisableOOONativeHistograms disables the ingestion of out-of-order native histograms. +func (db *DB) DisableOOONativeHistograms() { + db.head.DisableOOONativeHistograms() +} + // dbAppender wraps the DB's head appender and triggers compactions on commit // if necessary. type dbAppender struct { @@ -1291,8 +1320,8 @@ func (db *DB) Compact(ctx context.Context) (returnErr error) { compactionDuration := time.Since(start) if compactionDuration.Milliseconds() > db.head.chunkRange.Load() { - level.Warn(db.logger).Log( - "msg", "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up", + db.logger.Warn( + "Head compaction took longer than the block time range, compactions are falling behind and won't be able to catch up", "duration", compactionDuration.String(), "block_range", db.head.chunkRange.Load(), ) @@ -1416,15 +1445,15 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID } if len(ulids) == 0 { - level.Info(db.logger).Log( - "msg", "compact ooo head resulted in no blocks", + db.logger.Info( + "compact ooo head resulted in no blocks", "duration", time.Since(start), ) return nil, nil } - level.Info(db.logger).Log( - "msg", "out-of-order compaction completed", + db.logger.Info( + "out-of-order compaction completed", "duration", time.Since(start), "ulids", fmt.Sprintf("%v", ulids), ) @@ -1466,7 +1495,7 @@ func (db *DB) compactBlocks() (err error) { // long enough that we end up with a HEAD block that needs to be written. // Check if that's the case and stop compactions early. if db.head.compactable() && !db.waitingForCompactionDelay() { - level.Warn(db.logger).Log("msg", "aborting block compactions to persit the head block") + db.logger.Warn("aborting block compactions to persit the head block") return nil } @@ -1545,7 +1574,7 @@ func (db *DB) reloadBlocks() (err error) { db.mtx.Lock() defer db.mtx.Unlock() - loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool) + loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool, db.opts.PostingsDecoderFactory) if err != nil { return err } @@ -1562,7 +1591,7 @@ func (db *DB) reloadBlocks() (err error) { for _, b := range block.Meta().Compaction.Parents { if _, ok := corrupted[b.ULID]; ok { delete(corrupted, b.ULID) - level.Warn(db.logger).Log("msg", "Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes.", "block", b.ULID) + db.logger.Warn("Found corrupted block, but replaced by compacted one so it's safe to delete. This should not happen with atomic deletes.", "block", b.ULID) } deletable[b.ULID] = nil } @@ -1624,7 +1653,7 @@ func (db *DB) reloadBlocks() (err error) { blockMetas = append(blockMetas, b.Meta()) } if overlaps := OverlappingBlocks(blockMetas); len(overlaps) > 0 { - level.Warn(db.logger).Log("msg", "Overlapping blocks found during reloadBlocks", "detail", overlaps.String()) + db.logger.Warn("Overlapping blocks found during reloadBlocks", "detail", overlaps.String()) } } @@ -1640,7 +1669,7 @@ func (db *DB) reloadBlocks() (err error) { return nil } -func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { +func openBlocks(l *slog.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool, postingsDecoderFactory PostingsDecoderFactory) (blocks []*Block, corrupted map[ulid.ULID]error, err error) { bDirs, err := blockDirs(dir) if err != nil { return nil, nil, fmt.Errorf("find blocks: %w", err) @@ -1650,14 +1679,14 @@ func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Po for _, bDir := range bDirs { meta, _, err := readMetaFile(bDir) if err != nil { - level.Error(l).Log("msg", "Failed to read meta.json for a block during reloadBlocks. Skipping", "dir", bDir, "err", err) + l.Error("Failed to read meta.json for a block during reloadBlocks. Skipping", "dir", bDir, "err", err) continue } // See if we already have the block in memory or open it otherwise. block, open := getBlock(loaded, meta.ULID) if !open { - block, err = OpenBlock(l, bDir, chunkPool) + block, err = OpenBlock(l, bDir, chunkPool, postingsDecoderFactory) if err != nil { corrupted[meta.ULID] = err continue @@ -1767,7 +1796,7 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { for ulid, block := range blocks { if block != nil { if err := block.Close(); err != nil { - level.Warn(db.logger).Log("msg", "Closing block failed", "err", err, "block", ulid) + db.logger.Warn("Closing block failed", "err", err, "block", ulid) } } @@ -1788,7 +1817,7 @@ func (db *DB) deleteBlocks(blocks map[ulid.ULID]*Block) error { if err := os.RemoveAll(tmpToDelete); err != nil { return fmt.Errorf("delete obsolete block %s: %w", ulid, err) } - level.Info(db.logger).Log("msg", "Deleting obsolete block", "block", ulid) + db.logger.Info("Deleting obsolete block", "block", ulid) } return nil @@ -1956,7 +1985,7 @@ func (db *DB) DisableCompactions() { defer db.autoCompactMtx.Unlock() db.autoCompact = false - level.Info(db.logger).Log("msg", "Compactions disabled") + db.logger.Info("Compactions disabled") } // EnableCompactions enables auto compactions. @@ -1965,12 +1994,11 @@ func (db *DB) EnableCompactions() { defer db.autoCompactMtx.Unlock() db.autoCompact = true - level.Info(db.logger).Log("msg", "Compactions enabled") + db.logger.Info("Compactions enabled") } func (db *DB) generateCompactionDelay() time.Duration { - // Up to 10% of the head's chunkRange. - return time.Duration(rand.Int63n(db.head.chunkRange.Load()/10)) * time.Millisecond + return time.Duration(rand.Int63n(db.head.chunkRange.Load()*int64(db.opts.CompactionDelayMaxPercent)/100)) * time.Millisecond } // ForceHeadMMap is intended for use only in tests and benchmarks. @@ -1982,10 +2010,10 @@ func (db *DB) ForceHeadMMap() { // will create a new block containing all data that's currently in the memory buffer/WAL. func (db *DB) Snapshot(dir string, withHead bool) error { if dir == db.dir { - return fmt.Errorf("cannot snapshot into base directory") + return errors.New("cannot snapshot into base directory") } if _, err := ulid.ParseStrict(dir); err == nil { - return fmt.Errorf("dir must not be a valid ULID") + return errors.New("dir must not be a valid ULID") } db.cmtx.Lock() @@ -1995,7 +2023,7 @@ func (db *DB) Snapshot(dir string, withHead bool) error { defer db.mtx.RUnlock() for _, b := range db.blocks { - level.Info(db.logger).Log("msg", "Snapshotting block", "block", b) + db.logger.Info("Snapshotting block", "block", b) if err := b.Snapshot(dir); err != nil { return fmt.Errorf("error snapshotting block: %s: %w", b.Dir(), err) @@ -2043,6 +2071,7 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.Querier + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) var err error @@ -2067,13 +2096,14 @@ func (db *DB) Querier(mint, maxt int64) (_ storage.Querier, err error) { if err != nil { return nil, fmt.Errorf("open block querier for head while getting new querier %s: %w", rh, err) } + inoMint = newMint } } if overlapsOOO { // We need to fetch from in-order and out-of-order chunks: wrap the headQuerier. isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef) - headQuerier = NewHeadAndOOOQuerier(mint, maxt, db.head, isoState, headQuerier) + headQuerier = NewHeadAndOOOQuerier(inoMint, mint, maxt, db.head, isoState, headQuerier) } if headQuerier != nil { @@ -2119,6 +2149,7 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer overlapsOOO := overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) var headQuerier storage.ChunkQuerier + inoMint := max(db.head.MinTime(), mint) if maxt >= db.head.MinTime() || overlapsOOO { rh := NewRangeHead(db.head, mint, maxt) headQuerier, err = db.blockChunkQuerierFunc(rh, mint, maxt) @@ -2142,13 +2173,14 @@ func (db *DB) blockChunkQuerierForRange(mint, maxt int64) (_ []storage.ChunkQuer if err != nil { return nil, fmt.Errorf("open querier for head while getting new querier %s: %w", rh, err) } + inoMint = newMint } } if overlapsOOO { // We need to fetch from in-order and out-of-order chunks: wrap the headQuerier. isoState := db.head.oooIso.TrackReadAfter(db.lastGarbageCollectedMmapRef) - headQuerier = NewHeadAndOOOChunkQuerier(mint, maxt, db.head, isoState, headQuerier) + headQuerier = NewHeadAndOOOChunkQuerier(inoMint, mint, maxt, db.head, isoState, headQuerier) } if headQuerier != nil { @@ -2252,7 +2284,7 @@ func (db *DB) CleanTombstones() (err error) { for _, uid := range uids { dir := filepath.Join(db.Dir(), uid.String()) if err := os.RemoveAll(dir); err != nil { - level.Error(db.logger).Log("msg", "failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) + db.logger.Error("failed to delete block after failed `CleanTombstones`", "dir", dir, "err", err) } } if err != nil { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 4e3a077f6a..306dc4579e 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -15,13 +15,18 @@ package tsdb import ( "bufio" + "bytes" "context" "encoding/binary" + "errors" "flag" "fmt" "hash/crc32" + "log/slog" "math" "math/rand" + "net/http" + "net/http/httptest" "os" "path" "path/filepath" @@ -32,14 +37,20 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/atomic" "go.uber.org/goleak" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage/remote" + + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -102,23 +113,9 @@ func query(t testing.TB, q storage.Querier, matchers ...*labels.Matcher) map[str for ss.Next() { series := ss.At() - samples := []chunks.Sample{} it = series.Iterator(it) - for typ := it.Next(); typ != chunkenc.ValNone; typ = it.Next() { - switch typ { - case chunkenc.ValFloat: - ts, v := it.At() - samples = append(samples, sample{t: ts, f: v}) - case chunkenc.ValHistogram: - ts, h := it.AtHistogram(nil) - samples = append(samples, sample{t: ts, h: h}) - case chunkenc.ValFloatHistogram: - ts, fh := it.AtFloatHistogram(nil) - samples = append(samples, sample{t: ts, fh: fh}) - default: - t.Fatalf("unknown sample type in query %s", typ.String()) - } - } + samples, err := storage.ExpandSamples(it, newSample) + require.NoError(t, err) require.NoError(t, it.Err()) if len(samples) == 0 { @@ -245,8 +242,8 @@ func TestDataAvailableOnlyAfterCommit(t *testing.T) { func TestNoPanicAfterWALCorruption(t *testing.T) { db := openTestDB(t, &Options{WALSegmentSize: 32 * 1024}, nil) - // Append until the first mmaped head chunk. - // This is to ensure that all samples can be read from the mmaped chunks when the WAL is corrupted. + // Append until the first mmapped head chunk. + // This is to ensure that all samples can be read from the mmapped chunks when the WAL is corrupted. var expSamples []chunks.Sample var maxt int64 ctx := context.Background() @@ -265,7 +262,7 @@ func TestNoPanicAfterWALCorruption(t *testing.T) { // Corrupt the WAL after the first sample of the series so that it has at least one sample and // it is not garbage collected. - // The repair deletes all WAL records after the corrupted record and these are read from the mmaped chunk. + // The repair deletes all WAL records after the corrupted record and these are read from the mmapped chunk. { walFiles, err := os.ReadDir(path.Join(db.Dir(), "wal")) require.NoError(t, err) @@ -1140,7 +1137,7 @@ func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBefore require.NoError(t, db.Close()) // Reopen the DB, replaying the WAL. - reopenDB, err := Open(db.Dir(), log.NewLogfmtLogger(os.Stderr), nil, nil, nil) + reopenDB, err := Open(db.Dir(), promslog.New(&promslog.Config{}), nil, nil, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, reopenDB.Close()) @@ -1323,7 +1320,7 @@ func TestTombstoneCleanFail(t *testing.T) { totalBlocks := 2 for i := 0; i < totalBlocks; i++ { blockDir := createBlock(t, db.Dir(), genSeries(1, 1, int64(i), int64(i)+1)) - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) // Add some fake tombstones to trigger the compaction. tomb := tombstones.NewMemTombstones() @@ -1378,7 +1375,7 @@ func TestTombstoneCleanRetentionLimitsRace(t *testing.T) { // Generate some blocks with old mint (near epoch). for j := 0; j < totalBlocks; j++ { blockDir := createBlock(t, dbDir, genSeries(10, 1, int64(j), int64(j)+1)) - block, err := OpenBlock(nil, blockDir, nil) + block, err := OpenBlock(nil, blockDir, nil, nil) require.NoError(t, err) // Cover block with tombstones so it can be deleted with CleanTombstones() as well. tomb := tombstones.NewMemTombstones() @@ -1436,10 +1433,10 @@ func (*mockCompactorFailing) Plan(string) ([]string, error) { func (c *mockCompactorFailing) Write(dest string, _ BlockReader, _, _ int64, _ *BlockMeta) ([]ulid.ULID, error) { if len(c.blocks) >= c.max { - return []ulid.ULID{}, fmt.Errorf("the compactor already did the maximum allowed blocks so it is time to fail") + return []ulid.ULID{}, errors.New("the compactor already did the maximum allowed blocks so it is time to fail") } - block, err := OpenBlock(nil, createBlock(c.t, dest, genSeries(1, 1, 0, 1)), nil) + block, err := OpenBlock(nil, createBlock(c.t, dest, genSeries(1, 1, 0, 1)), nil, nil) require.NoError(c.t, err) require.NoError(c.t, block.Close()) // Close block as we won't be using anywhere. c.blocks = append(c.blocks, block) @@ -1463,7 +1460,7 @@ func (*mockCompactorFailing) Compact(string, []string, []*Block) ([]ulid.ULID, e } func (*mockCompactorFailing) CompactOOO(string, *OOOCompactionHead) (result []ulid.ULID, err error) { - return nil, fmt.Errorf("mock compaction failing CompactOOO") + return nil, errors.New("mock compaction failing CompactOOO") } func TestTimeRetention(t *testing.T) { @@ -1609,7 +1606,7 @@ func TestSizeRetention(t *testing.T) { // Create a WAL checkpoint, and compare sizes. first, last, err := wlog.Segments(db.Head().wal.Dir()) require.NoError(t, err) - _, err = wlog.Checkpoint(log.NewNopLogger(), db.Head().wal, first, last-1, func(x chunks.HeadSeriesRef) bool { return false }, 0) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(x chunks.HeadSeriesRef) bool { return false }, 0) require.NoError(t, err) blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. walSize, err = db.Head().wal.Size() @@ -2350,7 +2347,7 @@ func TestCorrectNumTombstones(t *testing.T) { // This ensures that a snapshot that includes the head and creates a block with a custom time range // will not overlap with the first block created by the next compaction. func TestBlockRanges(t *testing.T) { - logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger := promslog.New(&promslog.Config{}) ctx := context.Background() dir := t.TempDir() @@ -2435,7 +2432,7 @@ func TestBlockRanges(t *testing.T) { func TestDBReadOnly(t *testing.T) { var ( dbDir string - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger = promslog.New(&promslog.Config{}) expBlocks []*Block expBlock *Block expSeries map[string][]chunks.Sample @@ -2512,13 +2509,13 @@ func TestDBReadOnly(t *testing.T) { }) t.Run("block", func(t *testing.T) { blockID := expBlock.meta.ULID.String() - block, err := dbReadOnly.Block(blockID) + block, err := dbReadOnly.Block(blockID, nil) require.NoError(t, err) require.Equal(t, expBlock.Meta(), block.Meta(), "block meta mismatch") }) t.Run("invalid block ID", func(t *testing.T) { blockID := "01GTDVZZF52NSWB5SXQF0P2PGF" - _, err := dbReadOnly.Block(blockID) + _, err := dbReadOnly.Block(blockID, nil) require.Error(t, err) }) t.Run("last block ID", func(t *testing.T) { @@ -2553,7 +2550,7 @@ func TestDBReadOnly(t *testing.T) { // all api methods return an ErrClosed. func TestDBReadOnlyClosing(t *testing.T) { sandboxDir := t.TempDir() - db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) + db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, promslog.New(&promslog.Config{})) require.NoError(t, err) // The sandboxDir was there. require.DirExists(t, db.sandboxDir) @@ -2570,7 +2567,7 @@ func TestDBReadOnlyClosing(t *testing.T) { func TestDBReadOnly_FlushWAL(t *testing.T) { var ( dbDir string - logger = log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)) + logger = promslog.New(&promslog.Config{}) err error maxt int ctx = context.Background() @@ -2650,7 +2647,7 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { spinUpQuerierAndCheck := func(dir, sandboxDir string, chunksCount int) { dBDirHash := dirHash(dir) - // Bootsrap a RO db from the same dir and set up a querier. + // Bootstrap a RO db from the same dir and set up a querier. dbReadOnly, err := OpenDBReadOnly(dir, sandboxDir, nil) require.NoError(t, err) require.Equal(t, chunksCount, countChunks(dir)) @@ -2669,7 +2666,7 @@ func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { require.NoError(t, db.Close()) }() - // Append until the first mmaped head chunk. + // Append until the first mmapped head chunk. for i := 0; i < 121; i++ { app := db.Appender(context.Background()) _, err := app.Append(0, labels.FromStrings("foo", "bar"), int64(i), 0) @@ -3115,7 +3112,7 @@ func TestCompactHead(t *testing.T) { WALCompression: wlog.CompressionSnappy, } - db, err := Open(dbDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err := Open(dbDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) ctx := context.Background() app := db.Appender(ctx) @@ -3136,7 +3133,7 @@ func TestCompactHead(t *testing.T) { // Delete everything but the new block and // reopen the db to query it to ensure it includes the head data. require.NoError(t, deleteNonBlocks(db.Dir())) - db, err = Open(dbDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err = Open(dbDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) require.Len(t, db.Blocks(), 1) require.Equal(t, int64(maxt), db.Head().MinTime()) @@ -3163,7 +3160,7 @@ func TestCompactHead(t *testing.T) { // TestCompactHeadWithDeletion tests https://github.com/prometheus/prometheus/issues/11585. func TestCompactHeadWithDeletion(t *testing.T) { - db, err := Open(t.TempDir(), log.NewNopLogger(), prometheus.NewRegistry(), nil, nil) + db, err := Open(t.TempDir(), promslog.NewNopLogger(), prometheus.NewRegistry(), nil, nil) require.NoError(t, err) ctx := context.Background() @@ -3276,7 +3273,7 @@ func TestOpen_VariousBlockStates(t *testing.T) { // Regression test: Already removed parent can be still in list, which was causing Open errors. m.Compaction.Parents = append(m.Compaction.Parents, BlockDesc{ULID: ulid.MustParse(filepath.Base(compacted))}) m.Compaction.Parents = append(m.Compaction.Parents, BlockDesc{ULID: ulid.MustParse(filepath.Base(compacted))}) - _, err = writeMetaFile(log.NewLogfmtLogger(os.Stderr), dir, m) + _, err = writeMetaFile(promslog.New(&promslog.Config{}), dir, m) require.NoError(t, err) } tmpCheckpointDir := path.Join(tmpDir, "wal/checkpoint.00000001.tmp") @@ -3288,7 +3285,7 @@ func TestOpen_VariousBlockStates(t *testing.T) { opts := DefaultOptions() opts.RetentionDuration = 0 - db, err := Open(tmpDir, log.NewLogfmtLogger(os.Stderr), nil, opts, nil) + db, err := Open(tmpDir, promslog.New(&promslog.Config{}), nil, opts, nil) require.NoError(t, err) loadedBlocks := db.Blocks() @@ -3332,7 +3329,7 @@ func TestOneCheckpointPerCompactCall(t *testing.T) { tmpDir := t.TempDir() ctx := context.Background() - db, err := Open(tmpDir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err := Open(tmpDir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, db.Close()) @@ -3394,7 +3391,7 @@ func TestOneCheckpointPerCompactCall(t *testing.T) { createBlock(t, db.dir, genSeries(1, 1, newBlockMint, newBlockMaxt)) - db, err = Open(db.dir, log.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) + db, err = Open(db.dir, promslog.NewNopLogger(), prometheus.NewRegistry(), tsdbCfg, nil) require.NoError(t, err) db.DisableCompactions() @@ -3443,7 +3440,7 @@ func TestNoPanicOnTSDBOpenError(t *testing.T) { tmpdir := t.TempDir() // Taking the lock will cause a TSDB startup error. - l, err := tsdbutil.NewDirLocker(tmpdir, "tsdb", log.NewNopLogger(), nil) + l, err := tsdbutil.NewDirLocker(tmpdir, "tsdb", promslog.NewNopLogger(), nil) require.NoError(t, err) require.NoError(t, l.Lock()) @@ -3996,6 +3993,307 @@ func newTestDB(t *testing.T) *DB { } func TestOOOWALWrite(t *testing.T) { + minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } + + s := labels.NewSymbolTable() + scratchBuilder1 := labels.NewScratchBuilderWithSymbolTable(s, 1) + scratchBuilder1.Add("l", "v1") + s1 := scratchBuilder1.Labels() + scratchBuilder2 := labels.NewScratchBuilderWithSymbolTable(s, 1) + scratchBuilder2.Add("l", "v2") + s2 := scratchBuilder2.Labels() + + scenarios := map[string]struct { + appendSample func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) + expectedOOORecords []interface{} + expectedInORecords []interface{} + }{ + "float": { + appendSample: func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) { + seriesRef, err := app.Append(0, l, minutes(mins), float64(mins)) + require.NoError(t, err) + return seriesRef, nil + }, + expectedOOORecords: []interface{}{ + // The MmapRef in this are not hand calculated, and instead taken from the test run. + // What is important here is the order of records, and that MmapRef increases for each record. + []record.RefMmapMarker{ + {Ref: 1}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(40), V: 40}, + }, + + []record.RefMmapMarker{ + {Ref: 2}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(42), V: 42}, + }, + + []record.RefSample{ + {Ref: 2, T: minutes(45), V: 45}, + {Ref: 1, T: minutes(35), V: 35}, + }, + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 8}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(36), V: 36}, + {Ref: 1, T: minutes(37), V: 37}, + }, + + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 58}, + }, + []record.RefSample{ // Does not contain the in-order sample here. + {Ref: 1, T: minutes(50), V: 50}, + }, + + // Single commit but multiple OOO records. + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 107}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(50), V: 50}, + {Ref: 2, T: minutes(51), V: 51}, + }, + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 156}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(52), V: 52}, + {Ref: 2, T: minutes(53), V: 53}, + }, + }, + expectedInORecords: []interface{}{ + []record.RefSeries{ + {Ref: 1, Labels: s1}, + {Ref: 2, Labels: s2}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(60), V: 60}, + {Ref: 2, T: minutes(60), V: 60}, + }, + []record.RefSample{ + {Ref: 1, T: minutes(40), V: 40}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(42), V: 42}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(45), V: 45}, + {Ref: 1, T: minutes(35), V: 35}, + {Ref: 1, T: minutes(36), V: 36}, + {Ref: 1, T: minutes(37), V: 37}, + }, + []record.RefSample{ // Contains both in-order and ooo sample. + {Ref: 1, T: minutes(50), V: 50}, + {Ref: 2, T: minutes(65), V: 65}, + }, + []record.RefSample{ + {Ref: 2, T: minutes(50), V: 50}, + {Ref: 2, T: minutes(51), V: 51}, + {Ref: 2, T: minutes(52), V: 52}, + {Ref: 2, T: minutes(53), V: 53}, + }, + }, + }, + "integer histogram": { + appendSample: func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) { + seriesRef, err := app.AppendHistogram(0, l, minutes(mins), tsdbutil.GenerateTestHistogram(int(mins)), nil) + require.NoError(t, err) + return seriesRef, nil + }, + expectedOOORecords: []interface{}{ + // The MmapRef in this are not hand calculated, and instead taken from the test run. + // What is important here is the order of records, and that MmapRef increases for each record. + []record.RefMmapMarker{ + {Ref: 1}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(40), H: tsdbutil.GenerateTestHistogram(40)}, + }, + + []record.RefMmapMarker{ + {Ref: 2}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(42), H: tsdbutil.GenerateTestHistogram(42)}, + }, + + []record.RefHistogramSample{ + {Ref: 2, T: minutes(45), H: tsdbutil.GenerateTestHistogram(45)}, + {Ref: 1, T: minutes(35), H: tsdbutil.GenerateTestHistogram(35)}, + }, + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 8}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(36), H: tsdbutil.GenerateTestHistogram(36)}, + {Ref: 1, T: minutes(37), H: tsdbutil.GenerateTestHistogram(37)}, + }, + + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 89}, + }, + []record.RefHistogramSample{ // Does not contain the in-order sample here. + {Ref: 1, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + }, + + // Single commit but multiple OOO records. + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 172}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + {Ref: 2, T: minutes(51), H: tsdbutil.GenerateTestHistogram(51)}, + }, + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 257}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(52), H: tsdbutil.GenerateTestHistogram(52)}, + {Ref: 2, T: minutes(53), H: tsdbutil.GenerateTestHistogram(53)}, + }, + }, + expectedInORecords: []interface{}{ + []record.RefSeries{ + {Ref: 1, Labels: s1}, + {Ref: 2, Labels: s2}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(60), H: tsdbutil.GenerateTestHistogram(60)}, + {Ref: 2, T: minutes(60), H: tsdbutil.GenerateTestHistogram(60)}, + }, + []record.RefHistogramSample{ + {Ref: 1, T: minutes(40), H: tsdbutil.GenerateTestHistogram(40)}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(42), H: tsdbutil.GenerateTestHistogram(42)}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(45), H: tsdbutil.GenerateTestHistogram(45)}, + {Ref: 1, T: minutes(35), H: tsdbutil.GenerateTestHistogram(35)}, + {Ref: 1, T: minutes(36), H: tsdbutil.GenerateTestHistogram(36)}, + {Ref: 1, T: minutes(37), H: tsdbutil.GenerateTestHistogram(37)}, + }, + []record.RefHistogramSample{ // Contains both in-order and ooo sample. + {Ref: 1, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + {Ref: 2, T: minutes(65), H: tsdbutil.GenerateTestHistogram(65)}, + }, + []record.RefHistogramSample{ + {Ref: 2, T: minutes(50), H: tsdbutil.GenerateTestHistogram(50)}, + {Ref: 2, T: minutes(51), H: tsdbutil.GenerateTestHistogram(51)}, + {Ref: 2, T: minutes(52), H: tsdbutil.GenerateTestHistogram(52)}, + {Ref: 2, T: minutes(53), H: tsdbutil.GenerateTestHistogram(53)}, + }, + }, + }, + "float histogram": { + appendSample: func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error) { + seriesRef, err := app.AppendHistogram(0, l, minutes(mins), nil, tsdbutil.GenerateTestFloatHistogram(int(mins))) + require.NoError(t, err) + return seriesRef, nil + }, + expectedOOORecords: []interface{}{ + // The MmapRef in this are not hand calculated, and instead taken from the test run. + // What is important here is the order of records, and that MmapRef increases for each record. + []record.RefMmapMarker{ + {Ref: 1}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(40), FH: tsdbutil.GenerateTestFloatHistogram(40)}, + }, + + []record.RefMmapMarker{ + {Ref: 2}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(42), FH: tsdbutil.GenerateTestFloatHistogram(42)}, + }, + + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(45), FH: tsdbutil.GenerateTestFloatHistogram(45)}, + {Ref: 1, T: minutes(35), FH: tsdbutil.GenerateTestFloatHistogram(35)}, + }, + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 8}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(36), FH: tsdbutil.GenerateTestFloatHistogram(36)}, + {Ref: 1, T: minutes(37), FH: tsdbutil.GenerateTestFloatHistogram(37)}, + }, + + []record.RefMmapMarker{ // 3rd sample, hence m-mapped. + {Ref: 1, MmapRef: 0x100000000 + 177}, + }, + []record.RefFloatHistogramSample{ // Does not contain the in-order sample here. + {Ref: 1, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + }, + + // Single commit but multiple OOO records. + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 348}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + {Ref: 2, T: minutes(51), FH: tsdbutil.GenerateTestFloatHistogram(51)}, + }, + []record.RefMmapMarker{ + {Ref: 2, MmapRef: 0x100000000 + 521}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(52), FH: tsdbutil.GenerateTestFloatHistogram(52)}, + {Ref: 2, T: minutes(53), FH: tsdbutil.GenerateTestFloatHistogram(53)}, + }, + }, + expectedInORecords: []interface{}{ + []record.RefSeries{ + {Ref: 1, Labels: s1}, + {Ref: 2, Labels: s2}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(60), FH: tsdbutil.GenerateTestFloatHistogram(60)}, + {Ref: 2, T: minutes(60), FH: tsdbutil.GenerateTestFloatHistogram(60)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 1, T: minutes(40), FH: tsdbutil.GenerateTestFloatHistogram(40)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(42), FH: tsdbutil.GenerateTestFloatHistogram(42)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(45), FH: tsdbutil.GenerateTestFloatHistogram(45)}, + {Ref: 1, T: minutes(35), FH: tsdbutil.GenerateTestFloatHistogram(35)}, + {Ref: 1, T: minutes(36), FH: tsdbutil.GenerateTestFloatHistogram(36)}, + {Ref: 1, T: minutes(37), FH: tsdbutil.GenerateTestFloatHistogram(37)}, + }, + []record.RefFloatHistogramSample{ // Contains both in-order and ooo sample. + {Ref: 1, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + {Ref: 2, T: minutes(65), FH: tsdbutil.GenerateTestFloatHistogram(65)}, + }, + []record.RefFloatHistogramSample{ + {Ref: 2, T: minutes(50), FH: tsdbutil.GenerateTestFloatHistogram(50)}, + {Ref: 2, T: minutes(51), FH: tsdbutil.GenerateTestFloatHistogram(51)}, + {Ref: 2, T: minutes(52), FH: tsdbutil.GenerateTestFloatHistogram(52)}, + {Ref: 2, T: minutes(53), FH: tsdbutil.GenerateTestFloatHistogram(53)}, + }, + }, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testOOOWALWrite(t, scenario.appendSample, scenario.expectedOOORecords, scenario.expectedInORecords) + }) + } +} + +func testOOOWALWrite(t *testing.T, + appendSample func(app storage.Appender, l labels.Labels, mins int64) (storage.SeriesRef, error), + expectedOOORecords []interface{}, + expectedInORecords []interface{}, +) { dir := t.TempDir() opts := DefaultOptions() @@ -4004,18 +4302,14 @@ func TestOOOWALWrite(t *testing.T) { db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) s1, s2 := labels.FromStrings("l", "v1"), labels.FromStrings("l", "v2") - minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - - appendSample := func(app storage.Appender, l labels.Labels, mins int64) { - _, err = app.Append(0, l, minutes(mins), float64(mins)) - require.NoError(t, err) - } // Ingest sample at 1h. app := db.Appender(context.Background()) @@ -4055,92 +4349,6 @@ func TestOOOWALWrite(t *testing.T) { appendSample(app, s2, 53) require.NoError(t, app.Commit()) - // The MmapRef in this are not hand calculated, and instead taken from the test run. - // What is important here is the order of records, and that MmapRef increases for each record. - oooRecords := []interface{}{ - []record.RefMmapMarker{ - {Ref: 1}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(40), V: 40}, - }, - - []record.RefMmapMarker{ - {Ref: 2}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(42), V: 42}, - }, - - []record.RefSample{ - {Ref: 2, T: minutes(45), V: 45}, - {Ref: 1, T: minutes(35), V: 35}, - }, - []record.RefMmapMarker{ // 3rd sample, hence m-mapped. - {Ref: 1, MmapRef: 4294967304}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(36), V: 36}, - {Ref: 1, T: minutes(37), V: 37}, - }, - - []record.RefMmapMarker{ // 3rd sample, hence m-mapped. - {Ref: 1, MmapRef: 4294967354}, - }, - []record.RefSample{ // Does not contain the in-order sample here. - {Ref: 1, T: minutes(50), V: 50}, - }, - - // Single commit but multiple OOO records. - []record.RefMmapMarker{ - {Ref: 2, MmapRef: 4294967403}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(50), V: 50}, - {Ref: 2, T: minutes(51), V: 51}, - }, - []record.RefMmapMarker{ - {Ref: 2, MmapRef: 4294967452}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(52), V: 52}, - {Ref: 2, T: minutes(53), V: 53}, - }, - } - - inOrderRecords := []interface{}{ - []record.RefSeries{ - {Ref: 1, Labels: s1}, - {Ref: 2, Labels: s2}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(60), V: 60}, - {Ref: 2, T: minutes(60), V: 60}, - }, - []record.RefSample{ - {Ref: 1, T: minutes(40), V: 40}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(42), V: 42}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(45), V: 45}, - {Ref: 1, T: minutes(35), V: 35}, - {Ref: 1, T: minutes(36), V: 36}, - {Ref: 1, T: minutes(37), V: 37}, - }, - []record.RefSample{ // Contains both in-order and ooo sample. - {Ref: 1, T: minutes(50), V: 50}, - {Ref: 2, T: minutes(65), V: 65}, - }, - []record.RefSample{ - {Ref: 2, T: minutes(50), V: 50}, - {Ref: 2, T: minutes(51), V: 51}, - {Ref: 2, T: minutes(52), V: 52}, - {Ref: 2, T: minutes(53), V: 53}, - }, - } - getRecords := func(walDir string) []interface{} { sr, err := wlog.NewSegmentsReader(walDir) require.NoError(t, err) @@ -4149,10 +4357,8 @@ func TestOOOWALWrite(t *testing.T) { require.NoError(t, sr.Close()) }() - var ( - records []interface{} - dec record.Decoder = record.NewDecoder(labels.NewSymbolTable()) - ) + var records []interface{} + dec := record.NewDecoder(nil) for r.Next() { rec := r.Record() switch typ := dec.Type(rec); typ { @@ -4168,6 +4374,14 @@ func TestOOOWALWrite(t *testing.T) { markers, err := dec.MmapMarkers(rec, nil) require.NoError(t, err) records = append(records, markers) + case record.HistogramSamples: + histogramSamples, err := dec.HistogramSamples(rec, nil) + require.NoError(t, err) + records = append(records, histogramSamples) + case record.FloatHistogramSamples: + floatHistogramSamples, err := dec.FloatHistogramSamples(rec, nil) + require.NoError(t, err) + records = append(records, floatHistogramSamples) default: t.Fatalf("got a WAL record that is not series or samples: %v", typ) } @@ -4178,11 +4392,11 @@ func TestOOOWALWrite(t *testing.T) { // The normal WAL. actRecs := getRecords(path.Join(dir, "wal")) - testutil.RequireEqual(t, inOrderRecords, actRecs) + require.Equal(t, expectedInORecords, actRecs) // The WBL. actRecs = getRecords(path.Join(dir, wlog.WblDirName)) - testutil.RequireEqual(t, oooRecords, actRecs) + require.Equal(t, expectedOOORecords, actRecs) } // Tests https://github.com/prometheus/prometheus/issues/10291#issuecomment-1044373110. @@ -4381,7 +4595,7 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) { keep := func(id chunks.HeadSeriesRef) bool { return id != 3 } - _, err = wlog.Checkpoint(log.NewNopLogger(), w, first, last-1, keep, 0) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0) require.NoError(t, err) // Confirm there's been a checkpoint. @@ -4495,6 +4709,160 @@ func TestMetadataAssertInMemoryData(t *testing.T) { require.Equal(t, *reopenDB.head.series.getByHash(s4.Hash(), s4).meta, m4) } +// TestMultipleEncodingsCommitOrder mainly serves to demonstrate when happens when committing a batch of samples for the +// same series when there are multiple encodings. Commit() will process all float samples before histogram samples. This +// means that if histograms are appended before floats, the histograms could be marked as OOO when they are committed. +// While possible, this shouldn't happen very often - you need the same series to be ingested as both a float and a +// histogram in a single write request. +func TestMultipleEncodingsCommitOrder(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() + + series1 := labels.FromStrings("foo", "bar1") + + db := openTestDB(t, opts, nil) + db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + defer func() { + require.NoError(t, db.Close()) + }() + + addSample := func(app storage.Appender, ts int64, valType chunkenc.ValueType) chunks.Sample { + if valType == chunkenc.ValFloat { + _, err := app.Append(0, labels.FromStrings("foo", "bar1"), ts, float64(ts)) + require.NoError(t, err) + return sample{t: ts, f: float64(ts)} + } + if valType == chunkenc.ValHistogram { + h := tsdbutil.GenerateTestHistogram(int(ts)) + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + require.NoError(t, err) + return sample{t: ts, h: h} + } + fh := tsdbutil.GenerateTestFloatHistogram(int(ts)) + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + require.NoError(t, err) + return sample{t: ts, fh: fh} + } + + verifySamples := func(minT, maxT int64, expSamples []chunks.Sample, oooCount int) { + requireEqualOOOSamples(t, oooCount, db) + + // Verify samples querier. + querier, err := db.Querier(minT, maxT) + require.NoError(t, err) + defer querier.Close() + + seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.Len(t, seriesSet, 1) + gotSamples := seriesSet[series1.String()] + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) + + // Verify chunks querier. + chunkQuerier, err := db.ChunkQuerier(minT, maxT) + require.NoError(t, err) + defer chunkQuerier.Close() + + chks := queryChunks(t, chunkQuerier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.NotNil(t, chks[series1.String()]) + require.Len(t, chks, 1) + var gotChunkSamples []chunks.Sample + for _, chunk := range chks[series1.String()] { + it := chunk.Chunk.Iterator(nil) + smpls, err := storage.ExpandSamples(it, newSample) + require.NoError(t, err) + gotChunkSamples = append(gotChunkSamples, smpls...) + require.NoError(t, it.Err()) + } + requireEqualSamples(t, series1.String(), expSamples, gotChunkSamples, requireEqualSamplesIgnoreCounterResets) + } + + var expSamples []chunks.Sample + + // Append samples with different encoding types and then commit them at once. + app := db.Appender(context.Background()) + + for i := 100; i < 105; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + // These samples will be marked as OOO as their timestamps are less than the max timestamp for float samples in the + // same batch. + for i := 110; i < 120; i++ { + s := addSample(app, int64(i), chunkenc.ValHistogram) + expSamples = append(expSamples, s) + } + // These samples will be marked as OOO as their timestamps are less than the max timestamp for float samples in the + // same batch. + for i := 120; i < 130; i++ { + s := addSample(app, int64(i), chunkenc.ValFloatHistogram) + expSamples = append(expSamples, s) + } + // These samples will be marked as in-order as their timestamps are greater than the max timestamp for float + // samples in the same batch. + for i := 140; i < 150; i++ { + s := addSample(app, int64(i), chunkenc.ValFloatHistogram) + expSamples = append(expSamples, s) + } + // These samples will be marked as in-order, even though they're appended after the float histograms from ts 140-150 + // because float samples are processed first and these samples are in-order wrt to the float samples in the batch. + for i := 130; i < 135; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + + require.NoError(t, app.Commit()) + + sort.Slice(expSamples, func(i, j int) bool { + return expSamples[i].T() < expSamples[j].T() + }) + + // oooCount = 20 because the histograms from 120 - 130 and float histograms from 120 - 130 are detected as OOO. + verifySamples(100, 150, expSamples, 20) + + // Append and commit some in-order histograms by themselves. + app = db.Appender(context.Background()) + for i := 150; i < 160; i++ { + s := addSample(app, int64(i), chunkenc.ValHistogram) + expSamples = append(expSamples, s) + } + require.NoError(t, app.Commit()) + + // oooCount remains at 20 as no new OOO samples have been added. + verifySamples(100, 160, expSamples, 20) + + // Append and commit samples for all encoding types. This time all samples will be treated as OOO because samples + // with newer timestamps have already been committed. + app = db.Appender(context.Background()) + for i := 50; i < 55; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + for i := 60; i < 70; i++ { + s := addSample(app, int64(i), chunkenc.ValHistogram) + expSamples = append(expSamples, s) + } + for i := 70; i < 75; i++ { + s := addSample(app, int64(i), chunkenc.ValFloat) + expSamples = append(expSamples, s) + } + for i := 80; i < 90; i++ { + s := addSample(app, int64(i), chunkenc.ValFloatHistogram) + expSamples = append(expSamples, s) + } + require.NoError(t, app.Commit()) + + // Sort samples again because OOO samples have been added. + sort.Slice(expSamples, func(i, j int) bool { + return expSamples[i].T() < expSamples[j].T() + }) + + // oooCount = 50 as we've added 30 more OOO samples. + verifySamples(50, 160, expSamples, 50) +} + // TODO(codesome): test more samples incoming once compaction has started. To verify new samples after the start // // are not included in this compaction. @@ -4516,6 +4884,8 @@ func testOOOCompaction(t *testing.T, scenario sampleTypeScenario, addExtraSample opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -4721,6 +5091,8 @@ func testOOOCompactionWithNormalCompaction(t *testing.T, scenario sampleTypeScen db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -4826,10 +5198,14 @@ func testOOOCompactionWithDisabledWriteLog(t *testing.T, scenario sampleTypeScen opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() opts.WALSegmentSize = -1 // disabled WAL and WBL + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -4935,6 +5311,8 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa opts.OutOfOrderCapMax = 10 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() opts.EnableMemorySnapshotOnShutdown = true + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -5034,7 +5412,67 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa verifySamples(90, 109) } -func Test_Querier_OOOQuery(t *testing.T) { +func TestQuerierOOOQuery(t *testing.T) { + scenarios := map[string]struct { + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) + sampleFunc func(ts int64) chunks.Sample + }{ + "float": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + return app.Append(0, labels.FromStrings("foo", "bar1"), ts, float64(ts)) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + if counterReset { + h.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + fh := tsdbutil.GenerateTestFloatHistogram(int(ts)) + if counterReset { + fh.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + "integer histogram counter resets": { + // Adding counter reset to all histograms means each histogram will have its own chunk. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + h.CounterResetHint = histogram.CounterReset // For this scenario, ignore the counterReset argument. + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + } + + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testQuerierOOOQuery(t, scenario.appendFunc, scenario.sampleFunc) + }) + } +} + +func testQuerierOOOQuery(t *testing.T, + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error), + sampleFunc func(ts int64) chunks.Sample, +) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() @@ -5044,16 +5482,16 @@ func Test_Querier_OOOQuery(t *testing.T) { defaultFilterFunc := func(t int64) bool { return true } minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) { + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc, counterReset bool) ([]chunks.Sample, int) { app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { if !filter(m / time.Minute.Milliseconds()) { continue } - _, err := app.Append(0, series1, m, float64(m)) + _, err := appendFunc(app, m, counterReset) if m >= queryMinT && m <= queryMaxT { - expSamples = append(expSamples, sample{t: m, f: float64(m)}) + expSamples = append(expSamples, sampleFunc(m)) } require.NoError(t, err) totalAppended++ @@ -5064,10 +5502,11 @@ func Test_Querier_OOOQuery(t *testing.T) { } type sampleBatch struct { - minT int64 - maxT int64 - filter filterFunc - isOOO bool + minT int64 + maxT int64 + filter filterFunc + counterReset bool + isOOO bool } tests := []struct { @@ -5115,6 +5554,31 @@ func Test_Querier_OOOQuery(t *testing.T) { }, }, }, + { + name: "alternating OOO batches", // In order: 100-200 normal. out of order first path: 0, 2, 4, ... 98 (no counter reset), second pass: 1, 3, 5, ... 99 (with counter reset). + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: true, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 1 }, + counterReset: true, + isOOO: true, + }, + }, + }, { name: "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval", oooCap: 30, @@ -5156,7 +5620,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, }, { - name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query inorder contain ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 5, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5169,7 +5633,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmapped OOO chunk and fit inside the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5182,7 +5646,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, }, { - name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query overlapping inorder and ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 30, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5195,7 +5659,7 @@ func Test_Querier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmapped OOO chunk and overlap the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5213,6 +5677,8 @@ func Test_Querier_OOOQuery(t *testing.T) { opts.OutOfOrderCapMax = tc.oooCap db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() defer func() { require.NoError(t, db.Close()) }() @@ -5221,7 +5687,7 @@ func Test_Querier_OOOQuery(t *testing.T) { var oooSamples, appendedCount int for _, batch := range tc.batches { - expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter) + expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter, batch.counterReset) if batch.isOOO { oooSamples += appendedCount } @@ -5236,15 +5702,107 @@ func Test_Querier_OOOQuery(t *testing.T) { defer querier.Close() seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) - require.NotNil(t, seriesSet[series1.String()]) + gotSamples := seriesSet[series1.String()] + require.NotNil(t, gotSamples) require.Len(t, seriesSet, 1) - require.Equal(t, expSamples, seriesSet[series1.String()]) + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) requireEqualOOOSamples(t, oooSamples, db) }) } } -func Test_ChunkQuerier_OOOQuery(t *testing.T) { +func TestChunkQuerierOOOQuery(t *testing.T) { + nBucketHistogram := func(n int64) *histogram.Histogram { + h := &histogram.Histogram{ + Count: uint64(n), + Sum: float64(n), + } + if n == 0 { + h.PositiveSpans = []histogram.Span{} + h.PositiveBuckets = []int64{} + return h + } + h.PositiveSpans = []histogram.Span{{Offset: 0, Length: uint32(n)}} + h.PositiveBuckets = make([]int64, n) + h.PositiveBuckets[0] = 1 + return h + } + + scenarios := map[string]struct { + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) + sampleFunc func(ts int64) chunks.Sample + checkInUseBucket bool + }{ + "float": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + return app.Append(0, labels.FromStrings("foo", "bar1"), ts, float64(ts)) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + if counterReset { + h.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + fh := tsdbutil.GenerateTestFloatHistogram(int(ts)) + if counterReset { + fh.CounterResetHint = histogram.CounterReset + } + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + "integer histogram counter resets": { + // Adding counter reset to all histograms means each histogram will have its own chunk. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + h := tsdbutil.GenerateTestHistogram(int(ts)) + h.CounterResetHint = histogram.CounterReset // For this scenario, ignore the counterReset argument. + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "integer histogram with recode": { + // Histograms have increasing number of buckets so their chunks are recoded. + appendFunc: func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error) { + n := ts / time.Minute.Milliseconds() + return app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nBucketHistogram(n), nil) + }, + sampleFunc: func(ts int64) chunks.Sample { + n := ts / time.Minute.Milliseconds() + return sample{t: ts, h: nBucketHistogram(n)} + }, + // Only check in-use buckets for this scenario. + // Recoding adds empty buckets. + checkInUseBucket: true, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testChunkQuerierOOOQuery(t, scenario.appendFunc, scenario.sampleFunc, scenario.checkInUseBucket) + }) + } +} + +func testChunkQuerierOOOQuery(t *testing.T, + appendFunc func(app storage.Appender, ts int64, counterReset bool) (storage.SeriesRef, error), + sampleFunc func(ts int64) chunks.Sample, + checkInUseBuckets bool, +) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() @@ -5255,16 +5813,16 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { defaultFilterFunc := func(t int64) bool { return true } minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } - addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) { + addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc, counterReset bool) ([]chunks.Sample, int) { app := db.Appender(context.Background()) totalAppended := 0 for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() { if !filter(m / time.Minute.Milliseconds()) { continue } - _, err := app.Append(0, series1, m, float64(m)) + _, err := appendFunc(app, m, counterReset) if m >= queryMinT && m <= queryMaxT { - expSamples = append(expSamples, sample{t: m, f: float64(m)}) + expSamples = append(expSamples, sampleFunc(m)) } require.NoError(t, err) totalAppended++ @@ -5275,10 +5833,11 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { } type sampleBatch struct { - minT int64 - maxT int64 - filter filterFunc - isOOO bool + minT int64 + maxT int64 + filter filterFunc + counterReset bool + isOOO bool } tests := []struct { @@ -5326,6 +5885,31 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, }, }, + { + name: "alternating OOO batches", // In order: 100-200 normal. out of order first path: 0, 2, 4, ... 98 (no counter reset), second pass: 1, 3, 5, ... 99 (with counter reset). + queryMinT: minutes(0), + queryMaxT: minutes(200), + batches: []sampleBatch{ + { + minT: minutes(100), + maxT: minutes(200), + filter: defaultFilterFunc, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 0 }, + isOOO: true, + }, + { + minT: minutes(0), + maxT: minutes(99), + filter: func(t int64) bool { return t%2 == 1 }, + counterReset: true, + isOOO: true, + }, + }, + }, { name: "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval", oooCap: 30, @@ -5367,7 +5951,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, }, { - name: "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query inorder contain ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 5, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5380,7 +5964,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk. + maxT: minutes(101 + (5-1)*2), // Append samples to fit in a single mmapped OOO chunk and fit inside the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5393,7 +5977,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, }, { - name: "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval", + name: "query overlapping inorder and ooo mmapped samples returns all ingested samples at the beginning of the interval", oooCap: 30, queryMinT: minutes(0), queryMaxT: minutes(200), @@ -5406,7 +5990,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { }, { minT: minutes(101), - maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk. + maxT: minutes(101 + (30-1)*2), // Append samples to fit in a single mmapped OOO chunk and overlap the first in-order mmapped chunk. filter: func(t int64) bool { return t%2 == 1 }, isOOO: true, }, @@ -5424,6 +6008,8 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { opts.OutOfOrderCapMax = tc.oooCap db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() defer func() { require.NoError(t, db.Close()) }() @@ -5432,7 +6018,7 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { var oooSamples, appendedCount int for _, batch := range tc.batches { - expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter) + expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter, batch.counterReset) if batch.isOOO { oooSamples += appendedCount } @@ -5453,12 +6039,440 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) { var gotSamples []chunks.Sample for _, chunk := range chks[series1.String()] { it := chunk.Chunk.Iterator(nil) - for it.Next() == chunkenc.ValFloat { - ts, v := it.At() - gotSamples = append(gotSamples, sample{t: ts, f: v}) + smpls, err := storage.ExpandSamples(it, newSample) + require.NoError(t, err) + + // Verify that no sample is outside the chunk's time range. + for i, s := range smpls { + switch i { + case 0: + require.Equal(t, chunk.MinTime, s.T(), "first sample %v not at chunk min time %v", s, chunk.MinTime) + case len(smpls) - 1: + require.Equal(t, chunk.MaxTime, s.T(), "last sample %v not at chunk max time %v", s, chunk.MaxTime) + default: + require.GreaterOrEqual(t, s.T(), chunk.MinTime, "sample %v before chunk min time %v", s, chunk.MinTime) + require.LessOrEqual(t, s.T(), chunk.MaxTime, "sample %v after chunk max time %v", s, chunk.MaxTime) + } + } + + gotSamples = append(gotSamples, smpls...) + require.NoError(t, it.Err()) + } + if checkInUseBuckets { + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets, requireEqualSamplesInUseBucketCompare) + } else { + requireEqualSamples(t, series1.String(), expSamples, gotSamples, requireEqualSamplesIgnoreCounterResets) + } + }) + } +} + +// TestOOONativeHistogramsWithCounterResets verifies the counter reset headers for in-order and out-of-order samples +// upon ingestion. Note that when the counter reset(s) occur in OOO samples, the header is set to UnknownCounterReset +// rather than CounterReset. This is because with OOO native histogram samples, it cannot be definitely +// determined if a counter reset occurred because the samples are not consecutive, and another sample +// could potentially come in that would change the status of the header. In this case, the UnknownCounterReset +// headers would be re-checked at query time and updated as needed. However, this test is checking the counter +// reset headers at the time of storage. +func TestOOONativeHistogramsWithCounterResets(t *testing.T) { + for name, scenario := range sampleTypeScenarios { + t.Run(name, func(t *testing.T) { + if name == intHistogram || name == floatHistogram { + testOOONativeHistogramsWithCounterResets(t, scenario) + } + }) + } +} + +func testOOONativeHistogramsWithCounterResets(t *testing.T, scenario sampleTypeScenario) { + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() + + type resetFunc func(v int64) bool + defaultResetFunc := func(v int64) bool { return false } + + lbls := labels.FromStrings("foo", "bar1") + minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } + + type sampleBatch struct { + from int64 + until int64 + shouldReset resetFunc + expCounterResetHints []histogram.CounterResetHint + } + + tests := []struct { + name string + queryMin int64 + queryMax int64 + batches []sampleBatch + expectedSamples []chunks.Sample + }{ + { + name: "Counter reset within in-order samples", + queryMin: minutes(40), + queryMax: minutes(55), + batches: []sampleBatch{ + // In-order samples + { + from: 40, + until: 50, + shouldReset: func(v int64) bool { + return v == 45 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + }, + }, + { + name: "Counter reset right at beginning of OOO samples", + queryMin: minutes(40), + queryMax: minutes(55), + batches: []sampleBatch{ + // In-order samples + { + from: 40, + until: 45, + shouldReset: defaultResetFunc, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + { + from: 50, + until: 55, + shouldReset: defaultResetFunc, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + // OOO samples + { + from: 45, + until: 50, + shouldReset: func(v int64) bool { + return v == 45 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + }, + }, + { + name: "Counter resets in both in-order and OOO samples", + queryMin: minutes(40), + queryMax: minutes(55), + batches: []sampleBatch{ + // In-order samples + { + from: 40, + until: 45, + shouldReset: func(v int64) bool { + return v == 44 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.UnknownCounterReset}, + }, + { + from: 50, + until: 55, + shouldReset: defaultResetFunc, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset}, + }, + // OOO samples + { + from: 45, + until: 50, + shouldReset: func(v int64) bool { + return v == 49 + }, + expCounterResetHints: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.NotCounterReset, histogram.UnknownCounterReset}, + }, + }, + }, + } + for _, tc := range tests { + t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) { + db := openTestDB(t, opts, nil) + db.DisableCompactions() + db.EnableOOONativeHistograms() + defer func() { + require.NoError(t, db.Close()) + }() + + app := db.Appender(context.Background()) + + expSamples := make(map[string][]chunks.Sample) + + for _, batch := range tc.batches { + j := batch.from + smplIdx := 0 + for i := batch.from; i < batch.until; i++ { + resetCount := batch.shouldReset(i) + if resetCount { + j = 0 + } + _, s, err := scenario.appendFunc(app, lbls, minutes(i), j) + require.NoError(t, err) + if s.Type() == chunkenc.ValHistogram { + s.H().CounterResetHint = batch.expCounterResetHints[smplIdx] + } else if s.Type() == chunkenc.ValFloatHistogram { + s.FH().CounterResetHint = batch.expCounterResetHints[smplIdx] + } + expSamples[lbls.String()] = append(expSamples[lbls.String()], s) + j++ + smplIdx++ } } - require.Equal(t, expSamples, gotSamples) + + require.NoError(t, app.Commit()) + + for k, v := range expSamples { + sort.Slice(v, func(i, j int) bool { + return v[i].T() < v[j].T() + }) + expSamples[k] = v + } + + querier, err := db.Querier(tc.queryMin, tc.queryMax) + require.NoError(t, err) + defer querier.Close() + + seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.NotNil(t, seriesSet[lbls.String()]) + require.Len(t, seriesSet, 1) + requireEqualSeries(t, expSamples, seriesSet, false) + }) + } +} + +func TestOOOInterleavedImplicitCounterResets(t *testing.T) { + for name, scenario := range sampleTypeScenarios { + t.Run(name, func(t *testing.T) { + testOOOInterleavedImplicitCounterResets(t, name, scenario) + }) + } +} + +func testOOOInterleavedImplicitCounterResets(t *testing.T, name string, scenario sampleTypeScenario) { + var appendFunc func(app storage.Appender, ts, v int64) error + + if scenario.sampleType != sampleMetricTypeHistogram { + return + } + + switch name { + case intHistogram: + appendFunc = func(app storage.Appender, ts, v int64) error { + h := &histogram.Histogram{ + Count: uint64(v), + Sum: float64(v), + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{v}, + } + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, h, nil) + return err + } + case floatHistogram: + appendFunc = func(app storage.Appender, ts, v int64) error { + fh := &histogram.FloatHistogram{ + Count: float64(v), + Sum: float64(v), + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []float64{float64(v)}, + } + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar1"), ts, nil, fh) + return err + } + case gaugeIntHistogram, gaugeFloatHistogram: + return + } + + // Not a sample, we're encoding an integer counter that we convert to a + // histogram with a single bucket. + type tsValue struct { + ts int64 + v int64 + } + + type expectedTsValue struct { + ts int64 + v int64 + hint histogram.CounterResetHint + } + + type expectedChunk struct { + hint histogram.CounterResetHint + size int + } + + cases := map[string]struct { + samples []tsValue + oooCap int64 + // The expected samples with counter reset. + expectedSamples []expectedTsValue + // The expected counter reset hint for each chunk. + expectedChunks []expectedChunk + }{ + "counter reset in-order cleared by in-memory OOO chunk": { + samples: []tsValue{ + {1, 40}, // New in In-order. I1. + {4, 30}, // In-order counter reset. I2. + {2, 40}, // New in OOO. O1. + {3, 10}, // OOO counter reset. O2. + }, + oooCap: 30, + // Expect all to be set to UnknownCounterReset because we switch between + // in-order and out-of-order samples. + expectedSamples: []expectedTsValue{ + {1, 40, histogram.UnknownCounterReset}, // I1. + {2, 40, histogram.UnknownCounterReset}, // O1. + {3, 10, histogram.UnknownCounterReset}, // O2. + {4, 30, histogram.UnknownCounterReset}, // I2. Counter reset cleared by iterator change. + }, + expectedChunks: []expectedChunk{ + {histogram.UnknownCounterReset, 1}, // I1. + {histogram.UnknownCounterReset, 1}, // O1. + {histogram.UnknownCounterReset, 1}, // O2. + {histogram.UnknownCounterReset, 1}, // I2. + }, + }, + "counter reset in OOO mmapped chunk cleared by in-memory ooo chunk": { + samples: []tsValue{ + {8, 30}, // In-order, new chunk. I1. + {1, 10}, // OOO, new chunk (will be mmapped). MO1. + {2, 20}, // OOO, no reset (will be mmapped). MO1. + {3, 30}, // OOO, no reset (will be mmapped). MO1. + {5, 20}, // OOO, reset (will be mmapped). MO2. + {6, 10}, // OOO, reset (will be mmapped). MO3. + {7, 20}, // OOO, no reset (will be mmapped). MO3. + {4, 10}, // OOO, inserted into memory, triggers mmap. O1. + }, + oooCap: 6, + expectedSamples: []expectedTsValue{ + {1, 10, histogram.UnknownCounterReset}, // MO1. + {2, 20, histogram.NotCounterReset}, // MO1. + {3, 30, histogram.NotCounterReset}, // MO1. + {4, 10, histogram.UnknownCounterReset}, // O1. Counter reset cleared by iterator change. + {5, 20, histogram.UnknownCounterReset}, // MO2. + {6, 10, histogram.UnknownCounterReset}, // MO3. + {7, 20, histogram.NotCounterReset}, // MO3. + {8, 30, histogram.UnknownCounterReset}, // I1. + }, + expectedChunks: []expectedChunk{ + {histogram.UnknownCounterReset, 3}, // MO1. + {histogram.UnknownCounterReset, 1}, // O1. + {histogram.UnknownCounterReset, 1}, // MO2. + {histogram.UnknownCounterReset, 2}, // MO3. + {histogram.UnknownCounterReset, 1}, // I1. + }, + }, + "counter reset in OOO mmapped chunk cleared by another OOO mmapped chunk": { + samples: []tsValue{ + {8, 100}, // In-order, new chunk. I1. + {1, 50}, // OOO, new chunk (will be mmapped). MO1. + {5, 40}, // OOO, reset (will be mmapped). MO2. + {6, 50}, // OOO, no reset (will be mmapped). MO2. + {2, 10}, // OOO, new chunk no reset (will be mmapped). MO3. + {3, 20}, // OOO, no reset (will be mmapped). MO3. + {4, 30}, // OOO, no reset (will be mmapped). MO3. + {7, 60}, // OOO, no reset in memory. O1. + }, + oooCap: 3, + expectedSamples: []expectedTsValue{ + {1, 50, histogram.UnknownCounterReset}, // MO1. + {2, 10, histogram.UnknownCounterReset}, // MO3. + {3, 20, histogram.NotCounterReset}, // MO3. + {4, 30, histogram.NotCounterReset}, // MO3. + {5, 40, histogram.UnknownCounterReset}, // MO2. + {6, 50, histogram.NotCounterReset}, // MO2. + {7, 60, histogram.UnknownCounterReset}, // O1. + {8, 100, histogram.UnknownCounterReset}, // I1. + }, + expectedChunks: []expectedChunk{ + {histogram.UnknownCounterReset, 1}, // MO1. + {histogram.UnknownCounterReset, 3}, // MO3. + {histogram.UnknownCounterReset, 2}, // MO2. + {histogram.UnknownCounterReset, 1}, // O1. + {histogram.UnknownCounterReset, 1}, // I1. + }, + }, + } + + for tcName, tc := range cases { + t.Run(tcName, func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderCapMax = tc.oooCap + opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds() + + db := openTestDB(t, opts, nil) + db.DisableCompactions() + db.EnableOOONativeHistograms() + defer func() { + require.NoError(t, db.Close()) + }() + + app := db.Appender(context.Background()) + for _, s := range tc.samples { + require.NoError(t, appendFunc(app, s.ts, s.v)) + } + require.NoError(t, app.Commit()) + + t.Run("querier", func(t *testing.T) { + querier, err := db.Querier(0, 10) + require.NoError(t, err) + defer querier.Close() + + seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.Len(t, seriesSet, 1) + samples, ok := seriesSet["{foo=\"bar1\"}"] + require.True(t, ok) + require.Len(t, samples, len(tc.samples)) + require.Len(t, samples, len(tc.expectedSamples)) + + // We expect all unknown counter resets because we clear the counter reset + // hint when we switch between in-order and out-of-order samples. + for i, s := range samples { + switch name { + case intHistogram: + require.Equal(t, tc.expectedSamples[i].hint, s.H().CounterResetHint, "sample %d", i) + require.Equal(t, tc.expectedSamples[i].v, int64(s.H().Count), "sample %d", i) + case floatHistogram: + require.Equal(t, tc.expectedSamples[i].hint, s.FH().CounterResetHint, "sample %d", i) + require.Equal(t, tc.expectedSamples[i].v, int64(s.FH().Count), "sample %d", i) + default: + t.Fatalf("unexpected sample type %s", name) + } + } + }) + + t.Run("chunk-querier", func(t *testing.T) { + querier, err := db.ChunkQuerier(0, 10) + require.NoError(t, err) + defer querier.Close() + + chunkSet := queryAndExpandChunks(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")) + require.Len(t, chunkSet, 1) + chunks, ok := chunkSet["{foo=\"bar1\"}"] + require.True(t, ok) + require.Len(t, chunks, len(tc.expectedChunks)) + idx := 0 + for i, samples := range chunks { + require.Len(t, samples, tc.expectedChunks[i].size) + for j, s := range samples { + expectHint := tc.expectedChunks[i].hint + if j > 0 { + expectHint = histogram.NotCounterReset + } + switch name { + case intHistogram: + require.Equal(t, expectHint, s.H().CounterResetHint, "sample %d", idx) + require.Equal(t, tc.expectedSamples[idx].v, int64(s.H().Count), "sample %d", idx) + case floatHistogram: + require.Equal(t, expectHint, s.FH().CounterResetHint, "sample %d", idx) + require.Equal(t, tc.expectedSamples[idx].v, int64(s.FH().Count), "sample %d", idx) + default: + t.Fatalf("unexpected sample type %s", name) + } + idx++ + } + } + }) }) } } @@ -5478,6 +6492,8 @@ func testOOOAppendAndQuery(t *testing.T, scenario sampleTypeScenario) { db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -5555,9 +6571,9 @@ func testOOOAppendAndQuery(t *testing.T, scenario sampleTypeScenario) { addSample(s2, 255, 265, false) verifyOOOMinMaxTimes(250, 265) testQuery(math.MinInt64, math.MaxInt64) - testQuery(minutes(250), minutes(265)) // Test querying ono data time range - testQuery(minutes(290), minutes(300)) // Test querying in-order data time range - testQuery(minutes(250), minutes(300)) // Test querying the entire range + testQuery(minutes(250), minutes(265)) // Test querying ooo data time range. + testQuery(minutes(290), minutes(300)) // Test querying in-order data time range. + testQuery(minutes(250), minutes(300)) // Test querying the entire range. // Out of time window. addSample(s1, 59, 59, true) @@ -5609,6 +6625,8 @@ func testOOODisabled(t *testing.T, scenario sampleTypeScenario) { opts.OutOfOrderTimeWindow = 0 db := openTestDB(t, opts, nil) db.DisableCompactions() + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -5681,6 +6699,8 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderCapMax = 30 opts.OutOfOrderTimeWindow = 4 * time.Hour.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db := openTestDB(t, opts, nil) db.DisableCompactions() @@ -5830,7 +6850,7 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { resetMmapToOriginal() // We neet to reset because new duplicate chunks can be written above. // Removing m-map markers in WBL by rewriting it. - newWbl, err := wlog.New(log.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), wlog.CompressionNone) + newWbl, err := wlog.New(promslog.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), wlog.CompressionNone) require.NoError(t, err) sr, err := wlog.NewSegmentsReader(originalWblDir) require.NoError(t, err) @@ -5861,6 +6881,477 @@ func testWBLAndMmapReplay(t *testing.T, scenario sampleTypeScenario) { }) } +func TestOOOHistogramCompactionWithCounterResets(t *testing.T) { + for _, floatHistogram := range []bool{false, true} { + dir := t.TempDir() + ctx := context.Background() + + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 500 * time.Minute.Milliseconds() + + db, err := Open(dir, nil, nil, opts, nil) + require.NoError(t, err) + db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + series1 := labels.FromStrings("foo", "bar1") + series2 := labels.FromStrings("foo", "bar2") + + var series1ExpSamplesPreCompact, series2ExpSamplesPreCompact, series1ExpSamplesPostCompact, series2ExpSamplesPostCompact []chunks.Sample + + addSample := func(ts int64, l labels.Labels, val int, hint histogram.CounterResetHint) sample { + app := db.Appender(context.Background()) + tsMs := ts * time.Minute.Milliseconds() + if floatHistogram { + h := tsdbutil.GenerateTestFloatHistogram(val) + h.CounterResetHint = hint + _, err = app.AppendHistogram(0, l, tsMs, nil, h) + require.NoError(t, err) + require.NoError(t, app.Commit()) + return sample{t: tsMs, fh: h.Copy()} + } + + h := tsdbutil.GenerateTestHistogram(val) + h.CounterResetHint = hint + _, err = app.AppendHistogram(0, l, tsMs, h, nil) + require.NoError(t, err) + require.NoError(t, app.Commit()) + return sample{t: tsMs, h: h.Copy()} + } + + // Add an in-order sample to each series. + s := addSample(520, series1, 1000000, histogram.UnknownCounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + + s = addSample(520, series2, 1000000, histogram.UnknownCounterReset) + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + + // Verify that the in-memory ooo chunk is empty. + checkEmptyOOOChunk := func(lbls labels.Labels) { + ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) + require.NoError(t, err) + require.False(t, created) + require.Nil(t, ms.ooo) + } + + checkEmptyOOOChunk(series1) + checkEmptyOOOChunk(series2) + + // Add samples for series1. There are three head chunks that will be created: + // Chunk 1 - Samples between 100 - 440. One explicit counter reset at ts 250. + // Chunk 2 - Samples between 105 - 395. Overlaps with Chunk 1. One detected counter reset at ts 165. + // Chunk 3 - Samples between 480 - 509. All within one block boundary. One detected counter reset at 490. + + // Chunk 1. + // First add 10 samples. + for i := 100; i < 200; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // Before compaction, all the samples have UnknownCounterReset even though they've been added to the same + // chunk. This is because they overlap with the samples from chunk two and when merging two chunks on read, + // the header is set as unknown when the next sample is not in the same chunk as the previous one. + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // After compaction, samples from multiple mmapped chunks will be merged, so there won't be any overlapping + // chunks. Therefore, most samples will have the NotCounterReset header. + // 100 is the first sample in the first chunk in the blocks, so is still set to UnknownCounterReset. + // 120 is a block boundary - after compaction, 120 will be the first sample in a chunk, so is still set to + // UnknownCounterReset. + if i > 100 && i != 120 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + // Explicit counter reset - the counter reset header is set to CounterReset but the value is higher + // than for the previous timestamp. Explicit counter reset headers are actually ignored though, so when reading + // the sample back you actually get unknown/not counter reset. This is as the chainSampleIterator ignores + // existing headers and sets the header as UnknownCounterReset if the next sample is not in the same chunk as + // the previous one, and counter resets always create a new chunk. + // This case has been added to document what's happening, though it might not be the ideal behavior. + s = addSample(250, series1, 100000+250, histogram.CounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, copyWithCounterReset(s, histogram.UnknownCounterReset)) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, copyWithCounterReset(s, histogram.NotCounterReset)) + + // Add 19 more samples to complete a chunk. + for i := 260; i < 450; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // The samples with timestamp less than 410 overlap with the samples from chunk 2, so before compaction, + // they're all UnknownCounterReset. Samples greater than or equal to 410 don't overlap with other chunks + // so they're always detected as NotCounterReset pre and post compaction. + if i >= 410 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // + // 360 is a block boundary, so after compaction its header is still UnknownCounterReset. + if i != 360 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + + // Chunk 2. + // Add six OOO samples. + for i := 105; i < 165; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // Samples overlap with chunk 1 so before compaction all headers are UnknownCounterReset. + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, copyWithCounterReset(s, histogram.NotCounterReset)) + } + + // Add sample that will be detected as a counter reset. + s = addSample(165, series1, 100000, histogram.UnknownCounterReset) + // Before compaction, sample has an UnknownCounterReset header due to the chainSampleIterator. + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // After compaction, the sample's counter reset is still UnknownCounterReset as we cannot trust CounterReset + // headers in chunks at the moment, so when reading the first sample in a chunk, its hint is set to + // UnknownCounterReset. + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + + // Add 23 more samples to complete a chunk. + for i := 175; i < 405; i += 10 { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // Samples between 205-255 overlap with chunk 1 so before compaction those samples will have the + // UnknownCounterReset header. + if i >= 205 && i < 255 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // 245 is the first sample >= the block boundary at 240, so it's still UnknownCounterReset after compaction. + if i != 245 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } else { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + + // Chunk 3. + for i := 480; i < 490; i++ { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + // No overlapping samples in other chunks, so all other samples will already be detected as NotCounterReset + // before compaction. + if i > 480 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + // 480 is block boundary. + if i == 480 { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + // Counter reset. + s = addSample(int64(490), series1, 100000, histogram.UnknownCounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + // Add some more samples after the counter reset. + for i := 491; i < 510; i++ { + s = addSample(int64(i), series1, 100000+i, histogram.UnknownCounterReset) + s = copyWithCounterReset(s, histogram.NotCounterReset) + series1ExpSamplesPreCompact = append(series1ExpSamplesPreCompact, s) + series1ExpSamplesPostCompact = append(series1ExpSamplesPostCompact, s) + } + + // Add samples for series2 - one chunk with one detected counter reset at 300. + for i := 200; i < 300; i += 10 { + s = addSample(int64(i), series2, 100000+i, histogram.UnknownCounterReset) + if i > 200 { + s = copyWithCounterReset(s, histogram.NotCounterReset) + } + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + if i == 240 { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + } + // Counter reset. + s = addSample(int64(300), series2, 100000, histogram.UnknownCounterReset) + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + // Add some more samples after the counter reset. + for i := 310; i < 500; i += 10 { + s := addSample(int64(i), series2, 100000+i, histogram.UnknownCounterReset) + s = copyWithCounterReset(s, histogram.NotCounterReset) + series2ExpSamplesPreCompact = append(series2ExpSamplesPreCompact, s) + // 360 and 480 are block boundaries. + if i == 360 || i == 480 { + s = copyWithCounterReset(s, histogram.UnknownCounterReset) + } + series2ExpSamplesPostCompact = append(series2ExpSamplesPostCompact, s) + } + + // Sort samples (as OOO samples not added in time-order). + sort.Slice(series1ExpSamplesPreCompact, func(i, j int) bool { + return series1ExpSamplesPreCompact[i].T() < series1ExpSamplesPreCompact[j].T() + }) + sort.Slice(series1ExpSamplesPostCompact, func(i, j int) bool { + return series1ExpSamplesPostCompact[i].T() < series1ExpSamplesPostCompact[j].T() + }) + sort.Slice(series2ExpSamplesPreCompact, func(i, j int) bool { + return series2ExpSamplesPreCompact[i].T() < series2ExpSamplesPreCompact[j].T() + }) + sort.Slice(series2ExpSamplesPostCompact, func(i, j int) bool { + return series2ExpSamplesPostCompact[i].T() < series2ExpSamplesPostCompact[j].T() + }) + + verifyDBSamples := func(s1Samples, s2Samples []chunks.Sample) { + expRes := map[string][]chunks.Sample{ + series1.String(): s1Samples, + series2.String(): s2Samples, + } + + q, err := db.Querier(math.MinInt64, math.MaxInt64) + require.NoError(t, err) + actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + requireEqualSeries(t, expRes, actRes, false) + } + + // Verify DB samples before compaction. + verifyDBSamples(series1ExpSamplesPreCompact, series2ExpSamplesPreCompact) + + // Verify that the in-memory ooo chunk is not empty. + checkNonEmptyOOOChunk := func(lbls labels.Labels) { + ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) + require.NoError(t, err) + require.False(t, created) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) + } + + checkNonEmptyOOOChunk(series1) + checkNonEmptyOOOChunk(series2) + + // No blocks before compaction. + require.Empty(t, db.Blocks()) + + // There is a 0th WBL file. + require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + files, err := os.ReadDir(db.head.wbl.Dir()) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "00000000", files[0].Name()) + f, err := files[0].Info() + require.NoError(t, err) + require.Greater(t, f.Size(), int64(100)) + + // OOO compaction happens here. + require.NoError(t, db.CompactOOOHead(ctx)) + + // Check that blocks are created after compaction. + require.Len(t, db.Blocks(), 5) + + // Check samples after compaction. + verifyDBSamples(series1ExpSamplesPostCompact, series2ExpSamplesPostCompact) + + // 0th WBL file will be deleted and 1st will be the only present. + files, err = os.ReadDir(db.head.wbl.Dir()) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "00000001", files[0].Name()) + f, err = files[0].Info() + require.NoError(t, err) + require.Equal(t, int64(0), f.Size()) + + // OOO stuff should not be present in the Head now. + checkEmptyOOOChunk(series1) + checkEmptyOOOChunk(series2) + + verifyBlockSamples := func(block *Block, fromMins, toMins int64) { + var series1Samples, series2Samples []chunks.Sample + + for _, s := range series1ExpSamplesPostCompact { + if s.T() >= fromMins*time.Minute.Milliseconds() { + // Samples should be sorted, so break out of loop when we reach a timestamp that's too big. + if s.T() > toMins*time.Minute.Milliseconds() { + break + } + series1Samples = append(series1Samples, s) + } + } + for _, s := range series2ExpSamplesPostCompact { + if s.T() >= fromMins*time.Minute.Milliseconds() { + // Samples should be sorted, so break out of loop when we reach a timestamp that's too big. + if s.T() > toMins*time.Minute.Milliseconds() { + break + } + series2Samples = append(series2Samples, s) + } + } + + expRes := map[string][]chunks.Sample{} + if len(series1Samples) != 0 { + expRes[series1.String()] = series1Samples + } + if len(series2Samples) != 0 { + expRes[series2.String()] = series2Samples + } + + q, err := NewBlockQuerier(block, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + + actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + requireEqualSeries(t, expRes, actRes, false) + } + + // Checking for expected data in the blocks. + verifyBlockSamples(db.Blocks()[0], 100, 119) + verifyBlockSamples(db.Blocks()[1], 120, 239) + verifyBlockSamples(db.Blocks()[2], 240, 359) + verifyBlockSamples(db.Blocks()[3], 360, 479) + verifyBlockSamples(db.Blocks()[4], 480, 509) + + // There should be a single m-map file. + mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot) + files, err = os.ReadDir(mmapDir) + require.NoError(t, err) + require.Len(t, files, 1) + + // Compact the in-order head and expect another block. + // Since this is a forced compaction, this block is not aligned with 2h. + err = db.CompactHead(NewRangeHead(db.head, 500*time.Minute.Milliseconds(), 550*time.Minute.Milliseconds())) + require.NoError(t, err) + require.Len(t, db.Blocks(), 6) + verifyBlockSamples(db.Blocks()[5], 520, 520) + + // Blocks created out of normal and OOO head now. But not merged. + verifyDBSamples(series1ExpSamplesPostCompact, series2ExpSamplesPostCompact) + + // The compaction also clears out the old m-map files. Including + // the file that has ooo chunks. + files, err = os.ReadDir(mmapDir) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "000001", files[0].Name()) + + // This will merge overlapping block. + require.NoError(t, db.Compact(ctx)) + + require.Len(t, db.Blocks(), 5) + verifyBlockSamples(db.Blocks()[0], 100, 119) + verifyBlockSamples(db.Blocks()[1], 120, 239) + verifyBlockSamples(db.Blocks()[2], 240, 359) + verifyBlockSamples(db.Blocks()[3], 360, 479) + verifyBlockSamples(db.Blocks()[4], 480, 520) // Merged block. + + // Final state. Blocks from normal and OOO head are merged. + verifyDBSamples(series1ExpSamplesPostCompact, series2ExpSamplesPostCompact) + } +} + +func TestInterleavedInOrderAndOOOHistogramCompactionWithCounterResets(t *testing.T) { + for _, floatHistogram := range []bool{false, true} { + dir := t.TempDir() + ctx := context.Background() + + opts := DefaultOptions() + opts.OutOfOrderCapMax = 30 + opts.OutOfOrderTimeWindow = 500 * time.Minute.Milliseconds() + + db, err := Open(dir, nil, nil, opts, nil) + require.NoError(t, err) + db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + + series1 := labels.FromStrings("foo", "bar1") + + addSample := func(ts int64, l labels.Labels, val int) sample { + app := db.Appender(context.Background()) + tsMs := ts + if floatHistogram { + h := tsdbutil.GenerateTestFloatHistogram(val) + _, err = app.AppendHistogram(0, l, tsMs, nil, h) + require.NoError(t, err) + require.NoError(t, app.Commit()) + return sample{t: tsMs, fh: h.Copy()} + } + + h := tsdbutil.GenerateTestHistogram(val) + _, err = app.AppendHistogram(0, l, tsMs, h, nil) + require.NoError(t, err) + require.NoError(t, app.Commit()) + return sample{t: tsMs, h: h.Copy()} + } + + var expSamples []chunks.Sample + + s := addSample(0, series1, 0) + expSamples = append(expSamples, s) + s = addSample(1, series1, 10) + expSamples = append(expSamples, copyWithCounterReset(s, histogram.NotCounterReset)) + s = addSample(3, series1, 3) + expSamples = append(expSamples, copyWithCounterReset(s, histogram.UnknownCounterReset)) + s = addSample(2, series1, 0) + expSamples = append(expSamples, copyWithCounterReset(s, histogram.UnknownCounterReset)) + + // Sort samples (as OOO samples not added in time-order). + sort.Slice(expSamples, func(i, j int) bool { + return expSamples[i].T() < expSamples[j].T() + }) + + verifyDBSamples := func(s1Samples []chunks.Sample) { + t.Helper() + expRes := map[string][]chunks.Sample{ + series1.String(): s1Samples, + } + + q, err := db.Querier(math.MinInt64, math.MaxInt64) + require.NoError(t, err) + actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + requireEqualSeries(t, expRes, actRes, false) + } + + // Verify DB samples before compaction. + verifyDBSamples(expSamples) + + require.NoError(t, db.CompactOOOHead(ctx)) + + // Check samples after OOO compaction. + verifyDBSamples(expSamples) + + // Checking for expected data in the blocks. + // Check that blocks are created after compaction. + require.Len(t, db.Blocks(), 1) + + // Compact the in-order head and expect another block. + // Since this is a forced compaction, this block is not aligned with 2h. + err = db.CompactHead(NewRangeHead(db.head, 0, 3)) + require.NoError(t, err) + require.Len(t, db.Blocks(), 2) + + // Blocks created out of normal and OOO head now. But not merged. + verifyDBSamples(expSamples) + + // This will merge overlapping block. + require.NoError(t, db.Compact(ctx)) + + require.Len(t, db.Blocks(), 1) + + // Final state. Blocks from normal and OOO head are merged. + verifyDBSamples(expSamples) + } +} + +func copyWithCounterReset(s sample, hint histogram.CounterResetHint) sample { + if s.h != nil { + h := s.h.Copy() + h.CounterResetHint = hint + return sample{t: s.t, h: h} + } + + h := s.fh.Copy() + h.CounterResetHint = hint + return sample{t: s.t, fh: h} +} + func TestOOOCompactionFailure(t *testing.T) { for name, scenario := range sampleTypeScenarios { t.Run(name, func(t *testing.T) { @@ -5880,6 +7371,8 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) { db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) db.DisableCompactions() // We want to manually call it. + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() t.Cleanup(func() { require.NoError(t, db.Close()) }) @@ -5907,7 +7400,7 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) { // There is a 0th WBL file. verifyFirstWBLFileIs0 := func(count int) { - require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + require.NoError(t, db.head.wbl.Sync()) // Syncing to make sure wbl is flushed in windows. files, err := os.ReadDir(db.head.wbl.Dir()) require.NoError(t, err) require.Len(t, files, count) @@ -5961,7 +7454,7 @@ func testOOOCompactionFailure(t *testing.T, scenario sampleTypeScenario) { require.Len(t, db.Blocks(), 3) require.Equal(t, oldBlocks, db.Blocks()) - // There should be a single m-map file + // There should be a single m-map file. verifyMmapFiles("000001") // All but last WBL file will be deleted. @@ -6057,7 +7550,7 @@ func TestWBLCorruption(t *testing.T) { // should be deleted after replay. // Checking where we corrupt it. - require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + require.NoError(t, db.head.wbl.Sync()) // Syncing to make sure wbl is flushed in windows. files, err := os.ReadDir(db.head.wbl.Dir()) require.NoError(t, err) require.Len(t, files, 2) @@ -6080,7 +7573,7 @@ func TestWBLCorruption(t *testing.T) { addSamples(310, 320, false) // Verifying that we have data after corruption point. - require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows + require.NoError(t, db.head.wbl.Sync()) // Syncing to make sure wbl is flushed in windows. files, err = os.ReadDir(db.head.wbl.Dir()) require.NoError(t, err) require.Len(t, files, 3) @@ -6167,6 +7660,8 @@ func testOOOMmapCorruption(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderCapMax = 10 opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6300,6 +7795,8 @@ func testOutOfOrderRuntimeConfig(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = oooTimeWindow + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6593,6 +8090,8 @@ func testNoGapAfterRestartWithOOO(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = 30 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6651,6 +8150,8 @@ func testWblReplayAfterOOODisableAndRestart(t *testing.T, scenario sampleTypeSce opts := DefaultOptions() opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6718,6 +8219,8 @@ func testPanicOnApplyConfig(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6775,6 +8278,8 @@ func testDiskFillingUpAfterDisablingOOO(t *testing.T, scenario sampleTypeScenari opts := DefaultOptions() opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) @@ -6873,7 +8378,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { }) ctx := context.Background() - appendHistogram := func( + appendHistogram := func(t *testing.T, lbls labels.Labels, tsMinute int, h *histogram.Histogram, exp *[]chunks.Sample, expCRH histogram.CounterResetHint, ) { @@ -6894,7 +8399,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { require.NoError(t, err) require.NoError(t, app.Commit()) } - appendFloat := func(lbls labels.Labels, tsMinute int, val float64, exp *[]chunks.Sample) { + appendFloat := func(t *testing.T, lbls labels.Labels, tsMinute int, val float64, exp *[]chunks.Sample) { t.Helper() app := db.Appender(ctx) _, err := app.Append(0, lbls, minute(tsMinute), val) @@ -6903,7 +8408,7 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { *exp = append(*exp, sample{t: minute(tsMinute), f: val}) } - testQuery := func(name, value string, exp map[string][]chunks.Sample) { + testQuery := func(t *testing.T, name, value string, exp map[string][]chunks.Sample) { t.Helper() q, err := db.Querier(math.MinInt64, math.MaxInt64) require.NoError(t, err) @@ -6941,24 +8446,24 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { t.Run("series with only histograms", func(t *testing.T) { h := baseH.Copy() // This is shared across all sub tests. - appendHistogram(series1, 100, h, &exp1, histogram.UnknownCounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 100, h, &exp1, histogram.UnknownCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) h.PositiveBuckets[0]++ h.NegativeBuckets[0] += 2 h.Count += 10 - appendHistogram(series1, 101, h, &exp1, histogram.NotCounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 101, h, &exp1, histogram.NotCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) t.Run("changing schema", func(t *testing.T) { h.Schema = 2 - appendHistogram(series1, 102, h, &exp1, histogram.UnknownCounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 102, h, &exp1, histogram.UnknownCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) // Schema back to old. h.Schema = 1 - appendHistogram(series1, 103, h, &exp1, histogram.UnknownCounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 103, h, &exp1, histogram.UnknownCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) }) t.Run("new buckets incoming", func(t *testing.T) { @@ -6986,8 +8491,8 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { h.PositiveSpans[1].Length++ h.PositiveBuckets = append(h.PositiveBuckets, 1) h.Count += 3 - appendHistogram(series1, 104, h, &exp1, histogram.NotCounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 104, h, &exp1, histogram.NotCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) // Because of the previous two histograms being on the active chunk, // and the next append is only adding a new bucket, the active chunk @@ -7024,14 +8529,14 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { h.Count += 3 // {2, 1, -1, 0, 1} -> {2, 1, 0, -1, 0, 1} h.PositiveBuckets = append(h.PositiveBuckets[:2], append([]int64{0}, h.PositiveBuckets[2:]...)...) - appendHistogram(series1, 105, h, &exp1, histogram.NotCounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 105, h, &exp1, histogram.NotCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) // We add 4 more histograms to clear out the buffer and see the re-encoded histograms. - appendHistogram(series1, 106, h, &exp1, histogram.NotCounterReset) - appendHistogram(series1, 107, h, &exp1, histogram.NotCounterReset) - appendHistogram(series1, 108, h, &exp1, histogram.NotCounterReset) - appendHistogram(series1, 109, h, &exp1, histogram.NotCounterReset) + appendHistogram(t, series1, 106, h, &exp1, histogram.NotCounterReset) + appendHistogram(t, series1, 107, h, &exp1, histogram.NotCounterReset) + appendHistogram(t, series1, 108, h, &exp1, histogram.NotCounterReset) + appendHistogram(t, series1, 109, h, &exp1, histogram.NotCounterReset) // Update the expected histograms to reflect the re-encoding. if floatHistogram { @@ -7058,69 +8563,69 @@ func testHistogramAppendAndQueryHelper(t *testing.T, floatHistogram bool) { exp1[l-6] = sample{t: exp1[l-6].T(), h: h6} } - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) }) t.Run("buckets disappearing", func(t *testing.T) { h.PositiveSpans[1].Length-- h.PositiveBuckets = h.PositiveBuckets[:len(h.PositiveBuckets)-1] h.Count -= 3 - appendHistogram(series1, 110, h, &exp1, histogram.CounterReset) - testQuery("foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) + appendHistogram(t, series1, 110, h, &exp1, histogram.UnknownCounterReset) + testQuery(t, "foo", "bar1", map[string][]chunks.Sample{series1.String(): exp1}) }) }) t.Run("series starting with float and then getting histograms", func(t *testing.T) { - appendFloat(series2, 100, 100, &exp2) - appendFloat(series2, 101, 101, &exp2) - appendFloat(series2, 102, 102, &exp2) - testQuery("foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) + appendFloat(t, series2, 100, 100, &exp2) + appendFloat(t, series2, 101, 101, &exp2) + appendFloat(t, series2, 102, 102, &exp2) + testQuery(t, "foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) h := baseH.Copy() - appendHistogram(series2, 103, h, &exp2, histogram.UnknownCounterReset) - appendHistogram(series2, 104, h, &exp2, histogram.NotCounterReset) - appendHistogram(series2, 105, h, &exp2, histogram.NotCounterReset) - testQuery("foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) + appendHistogram(t, series2, 103, h, &exp2, histogram.UnknownCounterReset) + appendHistogram(t, series2, 104, h, &exp2, histogram.NotCounterReset) + appendHistogram(t, series2, 105, h, &exp2, histogram.NotCounterReset) + testQuery(t, "foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) // Switching between float and histograms again. - appendFloat(series2, 106, 106, &exp2) - appendFloat(series2, 107, 107, &exp2) - testQuery("foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) + appendFloat(t, series2, 106, 106, &exp2) + appendFloat(t, series2, 107, 107, &exp2) + testQuery(t, "foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) - appendHistogram(series2, 108, h, &exp2, histogram.UnknownCounterReset) - appendHistogram(series2, 109, h, &exp2, histogram.NotCounterReset) - testQuery("foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) + appendHistogram(t, series2, 108, h, &exp2, histogram.UnknownCounterReset) + appendHistogram(t, series2, 109, h, &exp2, histogram.NotCounterReset) + testQuery(t, "foo", "bar2", map[string][]chunks.Sample{series2.String(): exp2}) }) t.Run("series starting with histogram and then getting float", func(t *testing.T) { h := baseH.Copy() - appendHistogram(series3, 101, h, &exp3, histogram.UnknownCounterReset) - appendHistogram(series3, 102, h, &exp3, histogram.NotCounterReset) - appendHistogram(series3, 103, h, &exp3, histogram.NotCounterReset) - testQuery("foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) + appendHistogram(t, series3, 101, h, &exp3, histogram.UnknownCounterReset) + appendHistogram(t, series3, 102, h, &exp3, histogram.NotCounterReset) + appendHistogram(t, series3, 103, h, &exp3, histogram.NotCounterReset) + testQuery(t, "foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) - appendFloat(series3, 104, 100, &exp3) - appendFloat(series3, 105, 101, &exp3) - appendFloat(series3, 106, 102, &exp3) - testQuery("foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) + appendFloat(t, series3, 104, 100, &exp3) + appendFloat(t, series3, 105, 101, &exp3) + appendFloat(t, series3, 106, 102, &exp3) + testQuery(t, "foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) // Switching between histogram and float again. - appendHistogram(series3, 107, h, &exp3, histogram.UnknownCounterReset) - appendHistogram(series3, 108, h, &exp3, histogram.NotCounterReset) - testQuery("foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) + appendHistogram(t, series3, 107, h, &exp3, histogram.UnknownCounterReset) + appendHistogram(t, series3, 108, h, &exp3, histogram.NotCounterReset) + testQuery(t, "foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) - appendFloat(series3, 109, 106, &exp3) - appendFloat(series3, 110, 107, &exp3) - testQuery("foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) + appendFloat(t, series3, 109, 106, &exp3) + appendFloat(t, series3, 110, 107, &exp3) + testQuery(t, "foo", "bar3", map[string][]chunks.Sample{series3.String(): exp3}) }) t.Run("query mix of histogram and float series", func(t *testing.T) { // A float only series. - appendFloat(series4, 100, 100, &exp4) - appendFloat(series4, 101, 101, &exp4) - appendFloat(series4, 102, 102, &exp4) + appendFloat(t, series4, 100, 100, &exp4) + appendFloat(t, series4, 101, 101, &exp4) + appendFloat(t, series4, 102, 102, &exp4) - testQuery("foo", "bar.*", map[string][]chunks.Sample{ + testQuery(t, "foo", "bar.*", map[string][]chunks.Sample{ series1.String(): exp1, series2.String(): exp2, series3.String(): exp3, @@ -7147,28 +8652,16 @@ func TestQueryHistogramFromBlocksWithCompaction(t *testing.T) { createBlock(t, db.Dir(), series) for _, s := range series { - key := s.Labels().String() + lbls := s.Labels().String() + slice := exp[lbls] it = s.Iterator(it) - slice := exp[key] - for typ := it.Next(); typ != chunkenc.ValNone; typ = it.Next() { - switch typ { - case chunkenc.ValFloat: - ts, v := it.At() - slice = append(slice, sample{t: ts, f: v}) - case chunkenc.ValHistogram: - ts, h := it.AtHistogram(nil) - slice = append(slice, sample{t: ts, h: h}) - case chunkenc.ValFloatHistogram: - ts, h := it.AtFloatHistogram(nil) - slice = append(slice, sample{t: ts, fh: h}) - default: - t.Fatalf("unexpected sample value type %d", typ) - } - } + smpls, err := storage.ExpandSamples(it, nil) + require.NoError(t, err) + slice = append(slice, smpls...) sort.Slice(slice, func(i, j int) bool { return slice[i].T() < slice[j].T() }) - exp[key] = slice + exp[lbls] = slice } } @@ -7201,10 +8694,10 @@ func TestQueryHistogramFromBlocksWithCompaction(t *testing.T) { // due to origin from different overlapping chunks anymore. for _, ss := range exp { for i, s := range ss[1:] { - if s.H() != nil && ss[i].H() != nil && s.H().CounterResetHint == histogram.UnknownCounterReset { + if s.Type() == chunkenc.ValHistogram && ss[i].Type() == chunkenc.ValHistogram && s.H().CounterResetHint == histogram.UnknownCounterReset { s.H().CounterResetHint = histogram.NotCounterReset } - if s.FH() != nil && ss[i].FH() != nil && s.FH().CounterResetHint == histogram.UnknownCounterReset { + if s.Type() == chunkenc.ValFloatHistogram && ss[i].Type() == chunkenc.ValFloatHistogram && s.FH().CounterResetHint == histogram.UnknownCounterReset { s.FH().CounterResetHint = histogram.NotCounterReset } } @@ -7328,6 +8821,112 @@ func TestNativeHistogramFlag(t *testing.T) { }, act) } +func TestOOONativeHistogramFlag(t *testing.T) { + h := &histogram.Histogram{ + Count: 9, + ZeroCount: 4, + ZeroThreshold: 0.001, + Sum: 35.5, + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 2, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, + } + + l := labels.FromStrings("foo", "bar") + + t.Run("Test OOO native histograms if OOO is disabled", func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 0 + db := openTestDB(t, opts, []int64{100}) + defer func() { + require.NoError(t, db.Close()) + }() + + // Enable Native Histograms and OOO Native Histogram ingestion + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + + app := db.Appender(context.Background()) + _, err := app.AppendHistogram(0, l, 100, h, nil) + require.NoError(t, err) + + _, err = app.AppendHistogram(0, l, 50, h, nil) + require.NoError(t, err) // The OOO sample is not detected until it is committed, so no error is returned + + require.NoError(t, app.Commit()) + + q, err := db.Querier(math.MinInt, math.MaxInt64) + require.NoError(t, err) + act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + require.Equal(t, map[string][]chunks.Sample{ + l.String(): {sample{t: 100, h: h}}, + }, act) + }) + t.Run("Test OOO Native Histograms if Native Histograms are disabled", func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 100 + db := openTestDB(t, opts, []int64{100}) + defer func() { + require.NoError(t, db.Close()) + }() + + // Disable Native Histograms and enable OOO Native Histogram ingestion + db.DisableNativeHistograms() + db.EnableOOONativeHistograms() + + // Attempt to add an in-order sample + app := db.Appender(context.Background()) + _, err := app.AppendHistogram(0, l, 200, h, nil) + require.Equal(t, storage.ErrNativeHistogramsDisabled, err) + + // Attempt to add an OOO sample + _, err = app.AppendHistogram(0, l, 100, h, nil) + require.Equal(t, storage.ErrNativeHistogramsDisabled, err) + + require.NoError(t, app.Commit()) + + q, err := db.Querier(math.MinInt, math.MaxInt64) + require.NoError(t, err) + act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + require.Equal(t, map[string][]chunks.Sample{}, act) + }) + t.Run("Test OOO native histograms when flag is enabled", func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 100 + db := openTestDB(t, opts, []int64{100}) + defer func() { + require.NoError(t, db.Close()) + }() + + // Enable Native Histograms and OOO Native Histogram ingestion + db.EnableNativeHistograms() + db.EnableOOONativeHistograms() + + // Add in-order samples + app := db.Appender(context.Background()) + _, err := app.AppendHistogram(0, l, 200, h, nil) + require.NoError(t, err) + + // Add OOO samples + _, err = app.AppendHistogram(0, l, 100, h, nil) + require.NoError(t, err) + _, err = app.AppendHistogram(0, l, 150, h, nil) + require.NoError(t, err) + + require.NoError(t, app.Commit()) + + q, err := db.Querier(math.MinInt, math.MaxInt64) + require.NoError(t, err) + act := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) + requireEqualSeries(t, map[string][]chunks.Sample{ + l.String(): {sample{t: 100, h: h}, sample{t: 150, h: h}, sample{t: 200, h: h}}, + }, act, true) + }) +} + // compareSeries essentially replaces `require.Equal(t, expected, actual) in // situations where the actual series might contain more counter reset hints // "unknown" than the expected series. This can easily happen for long series @@ -7343,29 +8942,47 @@ func compareSeries(t require.TestingT, expected, actual map[string][]chunks.Samp // package. require.Equal(t, expected, actual, "number of series differs") } - for key, eSamples := range expected { - aSamples, ok := actual[key] + for key, expSamples := range expected { + actSamples, ok := actual[key] if !ok { require.Equal(t, expected, actual, "expected series %q not found", key) } - if len(eSamples) != len(aSamples) { - require.Equal(t, eSamples, aSamples, "number of samples for series %q differs", key) + if len(expSamples) != len(actSamples) { + require.Equal(t, expSamples, actSamples, "number of samples for series %q differs", key) } - for i, eS := range eSamples { - aS := aSamples[i] - aH, eH := aS.H(), eS.H() - aFH, eFH := aS.FH(), eS.FH() - switch { - case aH != nil && eH != nil && aH.CounterResetHint == histogram.UnknownCounterReset && eH.CounterResetHint != histogram.GaugeType: - eH = eH.Copy() - eH.CounterResetHint = histogram.UnknownCounterReset - eS = sample{t: eS.T(), h: eH} - case aFH != nil && eFH != nil && aFH.CounterResetHint == histogram.UnknownCounterReset && eFH.CounterResetHint != histogram.GaugeType: - eFH = eFH.Copy() - eFH.CounterResetHint = histogram.UnknownCounterReset - eS = sample{t: eS.T(), fh: eFH} + + for i, eS := range expSamples { + aS := actSamples[i] + + // Must use the interface as Equal does not work when actual types differ + // not only does the type differ, but chunk.Sample.FH() interface may auto convert from chunk.Sample.H()! + require.Equal(t, eS.T(), aS.T(), "timestamp of sample %d in series %q differs", i, key) + + require.Equal(t, eS.Type(), aS.Type(), "type of sample %d in series %q differs", i, key) + + switch eS.Type() { + case chunkenc.ValFloat: + require.Equal(t, eS.F(), aS.F(), "sample %d in series %q differs", i, key) + case chunkenc.ValHistogram: + eH, aH := eS.H(), aS.H() + if aH.CounterResetHint == histogram.UnknownCounterReset && aH.CounterResetHint != histogram.GaugeType { + eH = eH.Copy() + // It is always safe to set the counter reset hint to UnknownCounterReset + eH.CounterResetHint = histogram.UnknownCounterReset + eS = sample{t: eS.T(), h: eH} + } + require.Equal(t, eH, aH, "histogram sample %d in series %q differs", i, key) + + case chunkenc.ValFloatHistogram: + eFH, aFH := eS.FH(), aS.FH() + if aFH.CounterResetHint == histogram.UnknownCounterReset && aFH.CounterResetHint != histogram.GaugeType { + eFH = eFH.Copy() + // It is always safe to set the counter reset hint to UnknownCounterReset + eFH.CounterResetHint = histogram.UnknownCounterReset + eS = sample{t: eS.T(), fh: eFH} + } + require.Equal(t, eFH, aFH, "float histogram sample %d in series %q differs", i, key) } - require.Equal(t, eS, aS, "sample %d in series %q differs", i, key) } } } @@ -7468,7 +9085,7 @@ func TestAbortBlockCompactions(t *testing.T) { defer func() { require.NoError(t, db.Close()) }() - // It should NOT be compactible at the beginning of the test + // It should NOT be compactable at the beginning of the test require.False(t, db.head.compactable(), "head should NOT be compactable") // Track the number of compactions run inside db.compactBlocks() @@ -7478,7 +9095,7 @@ func TestAbortBlockCompactions(t *testing.T) { db.compactor = &mockCompactorFn{ planFn: func() ([]string, error) { // On every Plan() run increment compactions. After 4 compactions - // update HEAD to make it compactible to force an exit from db.compactBlocks() loop. + // update HEAD to make it compactable to force an exit from db.compactBlocks() loop. compactions++ if compactions > 3 { chunkRange := db.head.chunkRange.Load() @@ -7507,7 +9124,7 @@ func TestNewCompactorFunc(t *testing.T) { opts := DefaultOptions() block1 := ulid.MustNew(1, nil) block2 := ulid.MustNew(2, nil) - opts.NewCompactorFunc = func(ctx context.Context, r prometheus.Registerer, l log.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) { + opts.NewCompactorFunc = func(ctx context.Context, r prometheus.Registerer, l *slog.Logger, ranges []int64, pool chunkenc.Pool, opts *Options) (Compactor, error) { return &mockCompactorFn{ planFn: func() ([]string, error) { return []string{block1.String(), block2.String()}, nil @@ -7567,7 +9184,7 @@ func TestBlockQuerierAndBlockChunkQuerier(t *testing.T) { // Include blockID into series to identify which block got touched. serieses := []storage.Series{storage.NewListSeries(labels.FromMap(map[string]string{"block": fmt.Sprintf("block-%d", i), labels.MetricName: "test_metric"}), []chunks.Sample{sample{t: 0, f: 1}})} blockDir := createBlock(t, db.Dir(), serieses) - b, err := OpenBlock(db.logger, blockDir, db.chunkPool) + b, err := OpenBlock(db.logger, blockDir, db.chunkPool, nil) require.NoError(t, err) // Overwrite meta.json with compaction section for testing purpose. @@ -7613,23 +9230,151 @@ func TestBlockQuerierAndBlockChunkQuerier(t *testing.T) { } func TestGenerateCompactionDelay(t *testing.T) { - assertDelay := func(delay time.Duration) { + assertDelay := func(delay time.Duration, expectedMaxPercentDelay int) { t.Helper() require.GreaterOrEqual(t, delay, time.Duration(0)) - // Less than 10% of the chunkRange. - require.LessOrEqual(t, delay, 6000*time.Millisecond) + // Expect to generate a delay up to MaxPercentDelay of the head chunk range + require.LessOrEqual(t, delay, (time.Duration(60000*expectedMaxPercentDelay/100) * time.Millisecond)) } opts := DefaultOptions() - opts.EnableDelayedCompaction = true - db := openTestDB(t, opts, []int64{60000}) - defer func() { - require.NoError(t, db.Close()) - }() - // The offset is generated and changed while opening. - assertDelay(db.opts.CompactionDelay) + cases := []struct { + compactionDelayPercent int + }{ + { + compactionDelayPercent: 1, + }, + { + compactionDelayPercent: 10, + }, + { + compactionDelayPercent: 60, + }, + { + compactionDelayPercent: 100, + }, + } - for i := 0; i < 1000; i++ { - assertDelay(db.generateCompactionDelay()) + opts.EnableDelayedCompaction = true + + for _, c := range cases { + opts.CompactionDelayMaxPercent = c.compactionDelayPercent + db := openTestDB(t, opts, []int64{60000}) + defer func() { + require.NoError(t, db.Close()) + }() + // The offset is generated and changed while opening. + assertDelay(db.opts.CompactionDelay, c.compactionDelayPercent) + + for i := 0; i < 1000; i++ { + assertDelay(db.generateCompactionDelay(), c.compactionDelayPercent) + } + } +} + +type blockedResponseRecorder struct { + r *httptest.ResponseRecorder + + // writeblocked is used to block writing until the test wants it to resume. + writeBlocked chan struct{} + // writeStarted is closed by blockedResponseRecorder to signal that writing has started. + writeStarted chan struct{} +} + +func (br *blockedResponseRecorder) Write(buf []byte) (int, error) { + select { + case <-br.writeStarted: + default: + close(br.writeStarted) + } + + <-br.writeBlocked + return br.r.Write(buf) +} + +func (br *blockedResponseRecorder) Header() http.Header { return br.r.Header() } + +func (br *blockedResponseRecorder) WriteHeader(code int) { br.r.WriteHeader(code) } + +func (br *blockedResponseRecorder) Flush() { br.r.Flush() } + +// TestBlockClosingBlockedDuringRemoteRead ensures that a TSDB Block is not closed while it is being queried +// through remote read. This is a regression test for https://github.com/prometheus/prometheus/issues/14422. +// TODO: Ideally, this should reside in storage/remote/read_handler_test.go once the necessary TSDB utils are accessible there. +func TestBlockClosingBlockedDuringRemoteRead(t *testing.T) { + dir := t.TempDir() + + createBlock(t, dir, genSeries(2, 1, 0, 10)) + db, err := Open(dir, nil, nil, nil, nil) + require.NoError(t, err) + // No error checking as manually closing the block is supposed to make this fail. + defer db.Close() + + readAPI := remote.NewReadHandler(nil, nil, db, func() config.Config { + return config.Config{} + }, + 0, 1, 0, + ) + + matcher, err := labels.NewMatcher(labels.MatchRegexp, "__name__", ".*") + require.NoError(t, err) + + query, err := remote.ToQuery(0, 10, []*labels.Matcher{matcher}, nil) + require.NoError(t, err) + + req := &prompb.ReadRequest{ + Queries: []*prompb.Query{query}, + AcceptedResponseTypes: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS}, + } + data, err := proto.Marshal(req) + require.NoError(t, err) + + request, err := http.NewRequest(http.MethodPost, "", bytes.NewBuffer(snappy.Encode(nil, data))) + require.NoError(t, err) + + blockedRecorder := &blockedResponseRecorder{ + r: httptest.NewRecorder(), + writeBlocked: make(chan struct{}), + writeStarted: make(chan struct{}), + } + + readDone := make(chan struct{}) + go func() { + readAPI.ServeHTTP(blockedRecorder, request) + require.Equal(t, http.StatusOK, blockedRecorder.r.Code) + close(readDone) + }() + + // Wait for the read API to start streaming data. + <-blockedRecorder.writeStarted + + // Try to close the queried block. + blockClosed := make(chan struct{}) + go func() { + for _, block := range db.Blocks() { + block.Close() + } + close(blockClosed) + }() + + // Closing the queried block should block. + // Wait a little bit to make sure of that. + select { + case <-time.After(100 * time.Millisecond): + case <-readDone: + require.Fail(t, "read API should still be streaming data.") + case <-blockClosed: + require.Fail(t, "Block shouldn't get closed while being queried.") + } + + // Resume the read API data streaming. + close(blockedRecorder.writeBlocked) + <-readDone + + // The block should be no longer needed and closing it should end. + select { + case <-time.After(10 * time.Millisecond): + require.Fail(t, "Closing the block timed out.") + case <-blockClosed: } } diff --git a/tsdb/docs/format/chunks.md b/tsdb/docs/format/chunks.md index d4f78dfd3d..837db020e3 100644 --- a/tsdb/docs/format/chunks.md +++ b/tsdb/docs/format/chunks.md @@ -25,18 +25,18 @@ in-file offset (lower 4 bytes) and segment sequence number (upper 4 bytes). └──────────────────────────────┘ ``` - # Chunk ``` -┌───────────────┬───────────────────┬──────────────┬───────────────────┐ -│ len │ encoding <1 byte> │ data │ checksum <4 byte> │ -└───────────────┴───────────────────┴──────────────┴───────────────────┘ +┌───────────────┬───────────────────┬─────────────┬───────────────────┐ +│ len │ encoding <1 byte> │ data │ checksum <4 byte> │ +└───────────────┴───────────────────┴─────────────┴───────────────────┘ ``` Notes: -* `len`: Chunk size in bytes. Encoded using 1 to 10 bytes, using the [`` encoding](https://go.dev/src/encoding/binary/varint.go). -* `encoding`: Currently either `XOR` (1), `histogram` (2), or `float histogram` (3). + +* `len`: Chunk size in bytes. 1 to 10 bytes long using the [`` encoding](https://go.dev/src/encoding/binary/varint.go). +* `encoding`: Currently either `XOR`, `histogram`, or `floathistogram`, see [code for numerical values](https://github.com/prometheus/prometheus/blob/02d0de9987ad99dee5de21853715954fadb3239f/tsdb/chunkenc/chunk.go#L28-L47). * `data`: See below for each encoding. * `checksum`: Checksum of `encoding` and `data`. It's a [cyclic redudancy check](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) with the Castagnoli polynomial, serialised as an unsigned 32 bits big endian number. Can be refered as a `CRC-32C`. @@ -48,7 +48,7 @@ Notes: └──────────────────────┴───────────────┴───────────────┴──────────────────────┴──────────────────────┴──────────────────────┴──────────────────────┴─────┴──────────────────────┴──────────────────────┴──────────────────┘ ``` -### Notes: +### Notes * `ts` is the timestamp, `v` is the value. * `...` means to repeat the previous two fields as needed, with `n` starting at 2 and going up to `num_samples` – 1. @@ -68,12 +68,12 @@ Notes: ## Histogram chunk data ``` -┌──────────────────────┬──────────────────────────┬───────────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬────────────────┬──────────────────┐ -│ num_samples │ histogram_flags <1 byte> │ zero_threshold <1 or 9 bytes> │ schema │ pos_spans │ neg_spans │ samples │ padding │ -└──────────────────────┴──────────────────────────┴───────────────────────────────┴─────────────────────┴──────────────────┴──────────────────┴────────────────┴──────────────────┘ +┌──────────────────────┬──────────────────────────┬───────────────────────────────┬─────────────────────┬──────────────────┬──────────────────┬──────────────────────┬────────────────┬──────────────────┐ +│ num_samples │ histogram_flags <1 byte> │ zero_threshold <1 or 9 bytes> │ schema │ pos_spans │ neg_spans │ custom_values │ samples │ padding │ +└──────────────────────┴──────────────────────────┴───────────────────────────────┴─────────────────────┴──────────────────┴──────────────────┴──────────────────────┴────────────────┴──────────────────┘ ``` -### Positive and negative spans data: +### Positive and negative spans data ``` ┌─────────────────────────┬────────────────────────┬───────────────────────┬────────────────────────┬───────────────────────┬─────┬────────────────────────┬───────────────────────┐ @@ -81,7 +81,17 @@ Notes: └─────────────────────────┴────────────────────────┴───────────────────────┴────────────────────────┴───────────────────────┴─────┴────────────────────────┴───────────────────────┘ ``` -### Samples data: +### Custom values data + +The `custom_values` data is currently only used for schema -53 (custom bucket boundaries). For other schemas, it is empty (length of zero). + +``` +┌──────────────────────────┬──────────────────┬──────────────────┬─────┬──────────────────┐ +│ num_values │ value_0 │ value_1 │ ... │ value_n │ +└──────────────────────────┴─────────────────────────────────────┴─────┴──────────────────┘ +``` + +### Samples data ``` ┌──────────────────────────┐ @@ -93,11 +103,11 @@ Notes: ├──────────────────────────┤ │ ... │ ├──────────────────────────┤ -│ Sample_n │ +│ sample_n │ └──────────────────────────┘ ``` -#### Sample 0 data: +#### Sample 0 data ``` ┌─────────────────┬─────────────────────┬──────────────────────────┬───────────────┬───────────────────────────┬─────┬───────────────────────────┬───────────────────────────┬─────┬───────────────────────────┐ @@ -105,15 +115,15 @@ Notes: └─────────────────┴─────────────────────┴──────────────────────────┴───────────────┴───────────────────────────┴─────┴───────────────────────────┴───────────────────────────┴─────┴───────────────────────────┘ ``` -#### Sample 1 data: +#### Sample 1 data ``` -┌────────────────────────┬───────────────────────────┬────────────────────────────────┬──────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┐ -│ ts_delta │ count_delta │ zero_count_delta │ sum_xor │ pos_bucket_0_delta │ ... │ pos_bucket_n_delta │ neg_bucket_0_delta │ ... │ neg_bucket_n_delta │ -└────────────────────────┴───────────────────────────┴────────────────────────────────┴──────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┘ +┌───────────────────────┬──────────────────────────┬───────────────────────────────┬──────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┬─────────────────────────────────┬─────┬─────────────────────────────────┐ +│ ts_delta │ count_delta │ zero_count_delta │ sum_xor │ pos_bucket_0_delta │ ... │ pos_bucket_n_delta │ neg_bucket_0_delta │ ... │ neg_bucket_n_delta │ +└───────────────────────┴──────────────────────────┴───────────────────────────────┴──────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┴─────────────────────────────────┴─────┴─────────────────────────────────┘ ``` -#### Sample 2 data and following: +#### Sample 2 data and following ``` ┌─────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ @@ -121,7 +131,7 @@ Notes: └─────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ ``` -### Notes: +### Notes * `histogram_flags` is a byte of which currently only the first two bits are used: * `10`: Counter reset between the previous chunk and this one. @@ -132,7 +142,9 @@ Notes: * If 0, it is a single zero byte. * If a power of two between 2^-243 and 2^10, it is a single byte between 1 and 254. * Otherwise, it is a byte with all bits set (255), followed by a float64, resulting in 9 bytes length. -* `schema` is a specific value defined by the exposition format. Currently valid values are -4 <= n <= 8. +* `schema` is a specific value defined by the exposition format. Currently + valid values are either -4 <= n <= 8 (standard exponential schemas) or -53 + (custom bucket boundaries). * `` is a variable bitwidth encoding for signed integers, optimized for “delta of deltas” of bucket deltas. It has between 1 bit and 9 bytes. See [code for details](https://github.com/prometheus/prometheus/blob/8c1507ebaa4ca552958ffb60c2d1b21afb7150e4/tsdb/chunkenc/varbit.go#L31-L60). * `` is a variable bitwidth encoding for unsigned integers with the same bit-bucketing as ``. @@ -143,3 +155,68 @@ Notes: * Note that buckets are inherently deltas between the current bucket and the previous bucket. Only `bucket_0` is an absolute count. * The chunk can have as few as one sample, i.e. sample 1 and following are optional. * Similarly, there could be down to zero spans and down to zero buckets. + +The `` encoding within the custom values data depends on the schema. +For schema -53 (custom bucket boundaries, currently the only use case for +custom values), the values to encode are bucket boundaries in the form of +floats. The encoding of a given float value _x_ works as follows: + +1. Create an intermediate value _y_ = _x_ * 1000. +2. If 0 ≤ _y_ ≤ 33554430 _and_ if the decimal value of _y_ is integer, store + _y_ + 1 as ``. +3. Otherwise, store a 0 bit, followed by the 64 bit of the original _x_ + encoded as plain ``. + +Note that values stored as per (2) will always start with a 1 bit, which allow +decoders to recognize this case in contrast to values stores as per (3), which +always start with a 0 bit. + +The rational behind this encoding is that most custom bucket boundaries are set +by humans as decimal numbers with not very many decimal places. In most cases, +the encoding will therefore result in a short varbit representation. The upper +bound of 33554430 is picked so that the varbit encoded value will take at most +4 bytes. + +## Float histogram chunk data + +Float histograms have the same layout as histograms apart from the encoding of samples. + +### Samples data + +``` +┌──────────────────────────┐ +│ sample_0 │ +├──────────────────────────┤ +│ sample_1 │ +├──────────────────────────┤ +│ sample_2 │ +├──────────────────────────┤ +│ ... │ +├──────────────────────────┤ +│ sample_n │ +└──────────────────────────┘ +``` + +#### Sample 0 data + +``` +┌─────────────────┬─────────────────┬──────────────────────┬───────────────┬────────────────────────┬─────┬────────────────────────┬────────────────────────┬─────┬────────────────────────┐ +│ ts │ count │ zero_count │ sum │ pos_bucket_0 │ ... │ pos_bucket_n │ neg_bucket_0 │ ... │ neg_bucket_n │ +└─────────────────┴─────────────────┴──────────────────────┴───────────────┴────────────────────────┴─────┴────────────────────────┴────────────────────────┴─────┴────────────────────────┘ +``` + +#### Sample 1 data + +``` +┌───────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ +│ ts_delta │ count_xor │ zero_count_xor │ sum_xor │ pos_bucket_0_xor │ ... │ pos_bucket_n_xor │ neg_bucket_0_xor │ ... │ neg_bucket_n_xor │ +└───────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ +``` + +#### Sample 2 data and following + +``` +┌─────────────────────┬────────────────────────┬─────────────────────────────┬──────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┬───────────────────────────────┬─────┬───────────────────────────────┐ +│ ts_dod │ count_xor │ zero_count_xor │ sum_xor │ pos_bucket_0_xor │ ... │ pos_bucket_n_xor │ neg_bucket_0_xor │ ... │ neg_bucket_n_xor │ +└─────────────────────┴────────────────────────┴─────────────────────────────┴──────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┴───────────────────────────────┴─────┴───────────────────────────────┘ +``` diff --git a/tsdb/docs/format/head_chunks.md b/tsdb/docs/format/head_chunks.md index 5737f42058..7040dcf41a 100644 --- a/tsdb/docs/format/head_chunks.md +++ b/tsdb/docs/format/head_chunks.md @@ -37,3 +37,7 @@ is used while replaying the chunks. | series ref <8 byte> | mint <8 byte, uint64> | maxt <8 byte, uint64> | encoding <1 byte> | len | data │ CRC32 <4 byte> │ └─────────────────────┴───────────────────────┴───────────────────────┴───────────────────┴───────────────┴──────────────┴────────────────┘ ``` + +## OOO encoding + +Head chunks use the highest bit of the `encoding` field to indicate whether it is out-of-order (1) or not (0). This bit is not set for chunks in the on-disk blocks. diff --git a/tsdb/docs/format/wal.md b/tsdb/docs/format/wal.md index 39be2a808c..db1ce97a8b 100644 --- a/tsdb/docs/format/wal.md +++ b/tsdb/docs/format/wal.md @@ -17,7 +17,15 @@ Notable deviations are that the record fragment is encoded as: └───────────┴──────────┴────────────┴──────────────┘ ``` -The type flag has the following states: +The initial type byte is made up of three components: a 3-bit reserved field, a 1-bit zstd compression flag, a 1-bit snappy compression flag, and a 3-bit type flag. + +``` +┌─────────────────┬──────────────────┬────────────────────┬──────────────────┐ +│ reserved <3bit> │ zstd_flag <1bit> │ snappy_flag <1bit> │ type_flag <3bit> │ +└─────────────────┴──────────────────┴────────────────────┴──────────────────┘ +``` + +The lowest 3 bits within this flag represent the record type as follows: * `0`: rest of page will be empty * `1`: a full record encoded in a single fragment diff --git a/tsdb/encoding/encoding.go b/tsdb/encoding/encoding.go index 88fdd30c85..cc7d0990f6 100644 --- a/tsdb/encoding/encoding.go +++ b/tsdb/encoding/encoding.go @@ -20,7 +20,6 @@ import ( "hash" "hash/crc32" "math" - "unsafe" "github.com/dennwc/varint" ) @@ -75,8 +74,7 @@ func (e *Encbuf) PutVarint64(x int64) { // PutUvarintStr writes a string to the buffer prefixed by its varint length (in bytes!). func (e *Encbuf) PutUvarintStr(s string) { - b := *(*[]byte)(unsafe.Pointer(&s)) - e.PutUvarint(len(b)) + e.PutUvarint(len(s)) e.PutString(s) } @@ -201,8 +199,9 @@ func (d *Decbuf) UvarintStr() string { return string(d.UvarintBytes()) } -// UvarintBytes returns invalid values if the byte slice goes away. -// Compared to UvarintStr, it avoid allocations. +// UvarintBytes returns a pointer to internal data; +// the return value becomes invalid if the byte slice goes away. +// Compared to UvarintStr, this avoids allocations. func (d *Decbuf) UvarintBytes() []byte { l := d.Uvarint64() if d.E != nil { diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 7545ab9a60..31d461bed9 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -29,7 +29,7 @@ import ( ) const ( - // Indicates that there is no index entry for an exmplar. + // Indicates that there is no index entry for an exemplar. noExemplar = -1 // Estimated number of exemplars per series, for sizing the index. estimatedExemplarsPerSeries = 16 @@ -152,13 +152,13 @@ func (ce *CircularExemplarStorage) Querier(_ context.Context) (storage.ExemplarQ func (ce *CircularExemplarStorage) Select(start, end int64, matchers ...[]*labels.Matcher) ([]exemplar.QueryResult, error) { ret := make([]exemplar.QueryResult, 0) + ce.lock.RLock() + defer ce.lock.RUnlock() + if len(ce.exemplars) == 0 { return ret, nil } - ce.lock.RLock() - defer ce.lock.RUnlock() - // Loop through each index entry, which will point us to first/last exemplar for each series. for _, idx := range ce.index { var se exemplar.QueryResult @@ -281,13 +281,13 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { l = 0 } + ce.lock.Lock() + defer ce.lock.Unlock() + if l == int64(len(ce.exemplars)) { return 0 } - ce.lock.Lock() - defer ce.lock.Unlock() - oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) @@ -349,6 +349,11 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byt } func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemplar) error { + // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. + // Optimize by moving the lock to be per series (& benchmark it). + ce.lock.Lock() + defer ce.lock.Unlock() + if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -356,11 +361,6 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp var buf [1024]byte seriesLabels := l.Bytes(buf[:]) - // TODO(bwplotka): This lock can lock all scrapers, there might high contention on this on scale. - // Optimize by moving the lock to be per series (& benchmark it). - ce.lock.Lock() - defer ce.lock.Unlock() - idx, ok := ce.index[string(seriesLabels)] err := ce.validateExemplar(idx, e, true) if err != nil { diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 7723ec3894..dbd34cc48c 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -20,6 +20,7 @@ import ( "reflect" "strconv" "strings" + "sync" "testing" "github.com/prometheus/client_golang/prometheus" @@ -499,3 +500,40 @@ func BenchmarkResizeExemplars(b *testing.B) { }) } } + +// TestCircularExemplarStorage_Concurrent_AddExemplar_Resize tries to provoke a data race between AddExemplar and Resize. +// Run with race detection enabled. +func TestCircularExemplarStorage_Concurrent_AddExemplar_Resize(t *testing.T) { + exs, err := NewCircularExemplarStorage(0, eMetrics) + require.NoError(t, err) + es := exs.(*CircularExemplarStorage) + + l := labels.FromStrings("service", "asdf") + e := exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", "qwerty"), + Value: 0.1, + Ts: 1, + } + + var wg sync.WaitGroup + wg.Add(1) + t.Cleanup(wg.Wait) + + started := make(chan struct{}) + + go func() { + defer wg.Done() + + <-started + for i := 0; i < 100; i++ { + require.NoError(t, es.AddExemplar(l, e)) + } + }() + + for i := 0; i < 100; i++ { + es.Resize(int64(i + 1)) + if i == 0 { + close(started) + } + } +} diff --git a/tsdb/head.go b/tsdb/head.go index b7bfaa0fda..c67c438e52 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "path/filepath" "runtime" @@ -25,12 +26,11 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/oklog/ulid" "go.uber.org/atomic" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -84,7 +84,7 @@ type Head struct { wal, wbl *wlog.WL exemplarMetrics *ExemplarMetrics exemplars ExemplarStorage - logger log.Logger + logger *slog.Logger appendPool zeropool.Pool[[]record.RefSample] exemplarsPool zeropool.Pool[[]exemplarWithSeriesRef] histogramsPool zeropool.Pool[[]record.RefHistogramSample] @@ -128,6 +128,7 @@ type Head struct { writeNotified wlog.WriteNotified memTruncationInProcess atomic.Bool + memTruncationCallBack func() // For testing purposes. } type ExemplarStorage interface { @@ -149,9 +150,10 @@ type HeadOptions struct { // EnableNativeHistograms enables the ingestion of native histograms. EnableNativeHistograms atomic.Bool - // EnableCreatedTimestampZeroIngestion enables the ingestion of the created timestamp as a synthetic zero sample. - // See: https://github.com/prometheus/proposals/blob/main/proposals/2023-06-13_created-timestamp.md - EnableCreatedTimestampZeroIngestion bool + // EnableOOONativeHistograms enables the ingestion of OOO native histograms. + // It will only take effect if EnableNativeHistograms is set to true and the + // OutOfOrderTimeWindow is > 0 + EnableOOONativeHistograms atomic.Bool ChunkRange int64 // ChunkDirRoot is the parent directory of the chunks directory. @@ -221,10 +223,10 @@ type SeriesLifecycleCallback interface { } // NewHead opens the head block in dir. -func NewHead(r prometheus.Registerer, l log.Logger, wal, wbl *wlog.WL, opts *HeadOptions, stats *HeadStats) (*Head, error) { +func NewHead(r prometheus.Registerer, l *slog.Logger, wal, wbl *wlog.WL, opts *HeadOptions, stats *HeadStats) (*Head, error) { var err error if l == nil { - l = log.NewNopLogger() + l = promslog.NewNopLogger() } if opts.OutOfOrderTimeWindow.Load() < 0 { @@ -560,7 +562,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics { }, func() float64 { val, err := h.chunkDiskMapper.Size() if err != nil { - level.Error(h.logger).Log("msg", "Failed to calculate size of \"chunks_head\" dir", + h.logger.Error("Failed to calculate size of \"chunks_head\" dir", "err", err.Error()) } return float64(val) @@ -623,7 +625,7 @@ func (h *Head) Init(minValidTime int64) error { } }() - level.Info(h.logger).Log("msg", "Replaying on-disk memory mappable chunks if any") + h.logger.Info("Replaying on-disk memory mappable chunks if any") start := time.Now() snapIdx, snapOffset := -1, 0 @@ -632,7 +634,7 @@ func (h *Head) Init(minValidTime int64) error { snapshotLoaded := false var chunkSnapshotLoadDuration time.Duration if h.opts.EnableMemorySnapshotOnShutdown { - level.Info(h.logger).Log("msg", "Chunk snapshot is enabled, replaying from the snapshot") + h.logger.Info("Chunk snapshot is enabled, replaying from the snapshot") // If there are any WAL files, there should be at least one WAL file with an index that is current or newer // than the snapshot index. If the WAL index is behind the snapshot index somehow, the snapshot is assumed // to be outdated. @@ -645,14 +647,14 @@ func (h *Head) Init(minValidTime int64) error { _, idx, _, err := LastChunkSnapshot(h.opts.ChunkDirRoot) if err != nil && !errors.Is(err, record.ErrNotFound) { - level.Error(h.logger).Log("msg", "Could not find last snapshot", "err", err) + h.logger.Error("Could not find last snapshot", "err", err) } if err == nil && endAt < idx { loadSnapshot = false - level.Warn(h.logger).Log("msg", "Last WAL file is behind snapshot, removing snapshots") + h.logger.Warn("Last WAL file is behind snapshot, removing snapshots") if err := DeleteChunkSnapshots(h.opts.ChunkDirRoot, math.MaxInt, math.MaxInt); err != nil { - level.Error(h.logger).Log("msg", "Error while deleting snapshot directories", "err", err) + h.logger.Error("Error while deleting snapshot directories", "err", err) } } } @@ -662,14 +664,14 @@ func (h *Head) Init(minValidTime int64) error { if err == nil { snapshotLoaded = true chunkSnapshotLoadDuration = time.Since(start) - level.Info(h.logger).Log("msg", "Chunk snapshot loading time", "duration", chunkSnapshotLoadDuration.String()) + h.logger.Info("Chunk snapshot loading time", "duration", chunkSnapshotLoadDuration.String()) } if err != nil { snapIdx, snapOffset = -1, 0 refSeries = make(map[chunks.HeadSeriesRef]*memSeries) h.metrics.snapshotReplayErrorTotal.Inc() - level.Error(h.logger).Log("msg", "Failed to load chunk snapshot", "err", err) + h.logger.Error("Failed to load chunk snapshot", "err", err) // We clear the partially loaded data to replay fresh from the WAL. if err := h.resetInMemoryState(); err != nil { return err @@ -693,7 +695,7 @@ func (h *Head) Init(minValidTime int64) error { mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.loadMmappedChunks(refSeries) if err != nil { // TODO(codesome): clear out all m-map chunks here for refSeries. - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) + h.logger.Error("Loading on-disk chunks failed", "err", err) var cerr *chunks.CorruptionErr if errors.As(err, &cerr) { h.metrics.mmapChunkCorruptionTotal.Inc() @@ -710,15 +712,15 @@ func (h *Head) Init(minValidTime int64) error { } } mmapChunkReplayDuration = time.Since(mmapChunkReplayStart) - level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", mmapChunkReplayDuration.String()) + h.logger.Info("On-disk memory mappable chunks replay completed", "duration", mmapChunkReplayDuration.String()) } if h.wal == nil { - level.Info(h.logger).Log("msg", "WAL not found") + h.logger.Info("WAL not found") return nil } - level.Info(h.logger).Log("msg", "Replaying WAL, this may take a while") + h.logger.Info("Replaying WAL, this may take a while") checkpointReplayStart := time.Now() // Backfill the checkpoint first if it exists. @@ -744,7 +746,7 @@ func (h *Head) Init(minValidTime int64) error { } defer func() { if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err) + h.logger.Warn("Error while closing the wal segments reader", "err", err) } }() @@ -755,7 +757,7 @@ func (h *Head) Init(minValidTime int64) error { } h.updateWALReplayStatusRead(startFrom) startFrom++ - level.Info(h.logger).Log("msg", "WAL checkpoint loaded") + h.logger.Info("WAL checkpoint loaded") } checkpointReplayDuration := time.Since(checkpointReplayStart) @@ -785,12 +787,12 @@ func (h *Head) Init(minValidTime int64) error { } err = h.loadWAL(wlog.NewReader(sr), syms, multiRef, mmappedChunks, oooMmappedChunks) if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err) + h.logger.Warn("Error while closing the wal segments reader", "err", err) } if err != nil { return err } - level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", endAt) + h.logger.Info("WAL segment loaded", "segment", i, "maxSegment", endAt) h.updateWALReplayStatusRead(i) } walReplayDuration := time.Since(walReplayStart) @@ -813,12 +815,12 @@ func (h *Head) Init(minValidTime int64) error { sr := wlog.NewSegmentBufReader(s) err = h.loadWBL(wlog.NewReader(sr), syms, multiRef, lastMmapRef) if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "Error while closing the wbl segments reader", "err", err) + h.logger.Warn("Error while closing the wbl segments reader", "err", err) } if err != nil { return &errLoadWbl{err} } - level.Info(h.logger).Log("msg", "WBL segment loaded", "segment", i, "maxSegment", endAt) + h.logger.Info("WBL segment loaded", "segment", i, "maxSegment", endAt) h.updateWALReplayStatusRead(i) } } @@ -827,8 +829,8 @@ func (h *Head) Init(minValidTime int64) error { totalReplayDuration := time.Since(start) h.metrics.dataTotalReplayDuration.Set(totalReplayDuration.Seconds()) - level.Info(h.logger).Log( - "msg", "WAL replay completed", + h.logger.Info( + "WAL replay completed", "checkpoint_replay_duration", checkpointReplayDuration.String(), "wal_replay_duration", walReplayDuration.String(), "wbl_replay_duration", wblReplayDuration.String(), @@ -938,28 +940,28 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries) // removeCorruptedMmappedChunks attempts to delete the corrupted mmapped chunks and if it fails, it clears all the previously // loaded mmapped chunks. func (h *Head) removeCorruptedMmappedChunks(err error) (map[chunks.HeadSeriesRef][]*mmappedChunk, map[chunks.HeadSeriesRef][]*mmappedChunk, chunks.ChunkDiskMapperRef, error) { - level.Info(h.logger).Log("msg", "Deleting mmapped chunk files") + h.logger.Info("Deleting mmapped chunk files") // We never want to preserve the in-memory series from snapshots if we are repairing m-map chunks. if err := h.resetInMemoryState(); err != nil { return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, err } - level.Info(h.logger).Log("msg", "Deleting mmapped chunk files") + h.logger.Info("Deleting mmapped chunk files") if err := h.chunkDiskMapper.DeleteCorrupted(err); err != nil { - level.Info(h.logger).Log("msg", "Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err) + h.logger.Info("Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err) if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil { - level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed", "err", err) + h.logger.Error("Deletion of all mmap chunk files failed", "err", err) } return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, nil } - level.Info(h.logger).Log("msg", "Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks") + h.logger.Info("Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks") mmappedChunks, oooMmappedChunks, lastRef, err := h.loadMmappedChunks(make(map[chunks.HeadSeriesRef]*memSeries)) if err != nil { - level.Error(h.logger).Log("msg", "Loading on-disk chunks failed, discarding chunk files completely", "err", err) + h.logger.Error("Loading on-disk chunks failed, discarding chunk files completely", "err", err) if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil { - level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed after failed loading", "err", err) + h.logger.Error("Deletion of all mmap chunk files failed after failed loading", "err", err) } mmappedChunks = map[chunks.HeadSeriesRef][]*mmappedChunk{} } @@ -994,7 +996,7 @@ func (h *Head) ApplyConfig(cfg *config.Config, wbl *wlog.WL) { } migrated := h.exemplars.(*CircularExemplarStorage).Resize(newSize) - level.Info(h.logger).Log("msg", "Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated) + h.logger.Info("Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated) } // SetOutOfOrderTimeWindow updates the out of order related parameters. @@ -1017,6 +1019,16 @@ func (h *Head) DisableNativeHistograms() { h.opts.EnableNativeHistograms.Store(false) } +// EnableOOONativeHistograms enables the ingestion of out-of-order native histograms. +func (h *Head) EnableOOONativeHistograms() { + h.opts.EnableOOONativeHistograms.Store(true) +} + +// DisableOOONativeHistograms disables the ingestion of out-of-order native histograms. +func (h *Head) DisableOOONativeHistograms() { + h.opts.EnableOOONativeHistograms.Store(false) +} + // PostingsCardinalityStats returns highest cardinality stats by label and value names. func (h *Head) PostingsCardinalityStats(statsByLabelName string, limit int) *index.PostingsStats { cacheKey := statsByLabelName + ";" + strconv.Itoa(limit) @@ -1129,6 +1141,10 @@ func (h *Head) truncateMemory(mint int64) (err error) { h.memTruncationInProcess.Store(true) defer h.memTruncationInProcess.Store(false) + if h.memTruncationCallBack != nil { + h.memTruncationCallBack() + } + // We wait for pending queries to end that overlap with this truncation. if initialized { h.WaitForPendingReadersInTimeRange(h.MinTime(), mint) @@ -1291,7 +1307,7 @@ func (h *Head) truncateWAL(mint int64) error { // If truncating fails, we'll just try again at the next checkpoint. // Leftover segments will just be ignored in the future if there's a checkpoint // that supersedes them. - level.Error(h.logger).Log("msg", "truncating segments failed", "err", err) + h.logger.Error("truncating segments failed", "err", err) } // The checkpoint is written and segments before it is truncated, so we no @@ -1309,12 +1325,12 @@ func (h *Head) truncateWAL(mint int64) error { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher checkpoint exists. - level.Error(h.logger).Log("msg", "delete old checkpoints", "err", err) + h.logger.Error("delete old checkpoints", "err", err) h.metrics.checkpointDeleteFail.Inc() } h.metrics.walTruncateDuration.Observe(time.Since(start).Seconds()) - level.Info(h.logger).Log("msg", "WAL checkpoint complete", + h.logger.Info("WAL checkpoint complete", "first", first, "last", last, "duration", time.Since(start)) return nil @@ -1352,7 +1368,7 @@ func (h *Head) truncateSeriesAndChunkDiskMapper(caller string) error { start := time.Now() headMaxt := h.MaxTime() actualMint, minOOOTime, minMmapFile := h.gc() - level.Info(h.logger).Log("msg", "Head GC completed", "caller", caller, "duration", time.Since(start)) + h.logger.Info("Head GC completed", "caller", caller, "duration", time.Since(start)) h.metrics.gcDuration.Observe(time.Since(start).Seconds()) if actualMint > h.minTime.Load() { @@ -1504,7 +1520,7 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match series := h.series.getByID(chunks.HeadSeriesRef(p.At())) if series == nil { - level.Debug(h.logger).Log("msg", "Series not found in Head.Delete") + h.logger.Debug("Series not found in Head.Delete") continue } @@ -2061,6 +2077,17 @@ func (s sample) Type() chunkenc.ValueType { } } +func (s sample) Copy() chunks.Sample { + c := sample{t: s.t, f: s.f} + if s.h != nil { + c.h = s.h.Copy() + } + if s.fh != nil { + c.fh = s.fh.Copy() + } + return c +} + // memSeries is the in-memory representation of a series. None of its methods // are goroutine safe and it is the caller's responsibility to lock it. type memSeries struct { @@ -2085,7 +2112,7 @@ type memSeries struct { // before compaction: mmappedChunks=[p5,p6,p7,p8,p9] firstChunkID=5 // after compaction: mmappedChunks=[p7,p8,p9] firstChunkID=7 // - // pN is the pointer to the mmappedChunk referered to by HeadChunkID=N + // pN is the pointer to the mmappedChunk referred to by HeadChunkID=N mmappedChunks []*mmappedChunk // Most recent chunks in memory that are still being built or waiting to be mmapped. // This is a linked list, headChunks points to the most recent chunk, headChunks.next points diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 988ce9397e..ea2a163f26 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -17,11 +17,9 @@ import ( "context" "errors" "fmt" + "log/slog" "math" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -42,6 +40,12 @@ type initAppender struct { var _ storage.GetRef = &initAppender{} +func (a *initAppender) SetOptions(opts *storage.AppendOptions) { + if a.app != nil { + a.app.SetOptions(opts) + } +} + func (a *initAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { if a.app != nil { return a.app.Append(ref, lset, t, v) @@ -79,6 +83,16 @@ func (a *initAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels, t return a.app.AppendHistogram(ref, l, t, h, fh) } +func (a *initAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if a.app != nil { + return a.app.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) + } + a.head.initTime(t) + a.app = a.head.appender() + + return a.app.AppendHistogramCTZeroSample(ref, l, t, ct, h, fh) +} + func (a *initAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) { if a.app != nil { return a.app.UpdateMetadata(ref, l, m) @@ -318,11 +332,16 @@ type headAppender struct { appendID, cleanupAppendIDsBelow uint64 closed bool + hints *storage.AppendOptions +} + +func (a *headAppender) SetOptions(opts *storage.AppendOptions) { + a.hints = opts } func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) { - // For OOO inserts, this restriction is irrelevant and will be checked later once we confirm the sample is an in-order append. - // If OOO inserts are disabled, we may as well as check this as early as we can and avoid more work. + // Fail fast if OOO is disabled and the sample is out of bounds. + // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. if a.oooTimeWindow == 0 && t < a.minValidTime { a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Inc() return 0, storage.ErrOutOfBounds @@ -331,29 +350,38 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { var err error - s, err = a.getOrCreate(lset) + s, _, err = a.getOrCreate(lset) if err != nil { return 0, err } } + s.Lock() if value.IsStaleNaN(v) { + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we could do this conversion + // in commit. This code should move into Commit(). switch { case s.lastHistogramValue != nil: + s.Unlock() return a.AppendHistogram(ref, lset, t, &histogram.Histogram{Sum: v}, nil) case s.lastFloatHistogramValue != nil: + s.Unlock() return a.AppendHistogram(ref, lset, t, nil, &histogram.FloatHistogram{Sum: v}) } } - s.Lock() + defer s.Unlock() // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise // to skip that sample from the WAL and write only in the WBL. - _, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) + isOOO, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow) if err == nil { + if isOOO && a.hints != nil && a.hints.DiscardOutOfOrder { + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Inc() + return 0, storage.ErrOutOfOrderSample + } s.pendingCommit = true } - s.Unlock() if delta > 0 { a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) } @@ -388,13 +416,13 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64 // storage.CreatedTimestampAppender.AppendCTZeroSample for further documentation. func (a *headAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64) (storage.SeriesRef, error) { if ct >= t { - return 0, fmt.Errorf("CT is newer or the same as sample's timestamp, ignoring") + return 0, storage.ErrCTNewerThanSample } s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { var err error - s, err = a.getOrCreate(lset) + s, _, err = a.getOrCreate(lset) if err != nil { return 0, err } @@ -424,20 +452,18 @@ func (a *headAppender) AppendCTZeroSample(ref storage.SeriesRef, lset labels.Lab return storage.SeriesRef(s.ref), nil } -func (a *headAppender) getOrCreate(lset labels.Labels) (*memSeries, error) { +func (a *headAppender) getOrCreate(lset labels.Labels) (s *memSeries, created bool, err error) { // Ensure no empty labels have gotten through. lset = lset.WithoutEmpty() if lset.IsEmpty() { - return nil, fmt.Errorf("empty labelset: %w", ErrInvalidSample) + return nil, false, fmt.Errorf("empty labelset: %w", ErrInvalidSample) } if l, dup := lset.HasDuplicateLabelNames(); dup { - return nil, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) + return nil, false, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) } - var created bool - var err error - s, created, err := a.head.getOrCreate(lset.Hash(), lset) + s, created, err = a.head.getOrCreate(lset.Hash(), lset) if err != nil { - return nil, err + return nil, false, err } if created { a.series = append(a.series, record.RefSeries{ @@ -445,12 +471,13 @@ func (a *headAppender) getOrCreate(lset labels.Labels) (*memSeries, error) { Labels: lset, }) } - return s, nil + return s, created, nil } -// appendable checks whether the given sample is valid for appending to the series. (if we return false and no error) -// The sample belongs to the out of order chunk if we return true and no error. -// An error signifies the sample cannot be handled. +// appendable checks whether the given sample is valid for appending to the series. +// If the sample is valid and in-order, it returns false with no error. +// If the sample belongs to the out-of-order chunk, it returns true with no error. +// If the sample cannot be handled, it returns an error. func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTimeWindow int64) (isOOO bool, oooDelta int64, err error) { // Check if we can append in the in-order chunk. if t >= minValidTime { @@ -493,46 +520,94 @@ func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTi return false, headMaxt - t, storage.ErrOutOfOrderSample } -// appendableHistogram checks whether the given histogram is valid for appending to the series. -func (s *memSeries) appendableHistogram(t int64, h *histogram.Histogram) error { - if s.headChunks == nil { - return nil +// appendableHistogram checks whether the given histogram sample is valid for appending to the series. (if we return false and no error) +// The sample belongs to the out of order chunk if we return true and no error. +// An error signifies the sample cannot be handled. +func (s *memSeries) appendableHistogram(t int64, h *histogram.Histogram, headMaxt, minValidTime, oooTimeWindow int64, oooHistogramsEnabled bool) (isOOO bool, oooDelta int64, err error) { + // Check if we can append in the in-order chunk. + if t >= minValidTime { + if s.headChunks == nil { + // The series has no sample and was freshly created. + return false, 0, nil + } + msMaxt := s.maxTime() + if t > msMaxt { + return false, 0, nil + } + if t == msMaxt { + // We are allowing exact duplicates as we can encounter them in valid cases + // like federation and erroring out at that time would be extremely noisy. + // This only checks against the latest in-order sample. + // The OOO headchunk has its own method to detect these duplicates. + if !h.Equals(s.lastHistogramValue) { + return false, 0, storage.ErrDuplicateSampleForTimestamp + } + // Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf. + return false, 0, nil + } } - if t > s.headChunks.maxTime { - return nil - } - if t < s.headChunks.maxTime { - return storage.ErrOutOfOrderSample + // The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk. + if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow { + if !oooHistogramsEnabled { + return true, headMaxt - t, storage.ErrOOONativeHistogramsDisabled + } + return true, headMaxt - t, nil } - // We are allowing exact duplicates as we can encounter them in valid cases - // like federation and erroring out at that time would be extremely noisy. - if !h.Equals(s.lastHistogramValue) { - return storage.ErrDuplicateSampleForTimestamp + // The sample cannot go in both in-order and out-of-order chunk. + if oooTimeWindow > 0 { + return true, headMaxt - t, storage.ErrTooOldSample } - return nil + if t < minValidTime { + return false, headMaxt - t, storage.ErrOutOfBounds + } + return false, headMaxt - t, storage.ErrOutOfOrderSample } -// appendableFloatHistogram checks whether the given float histogram is valid for appending to the series. -func (s *memSeries) appendableFloatHistogram(t int64, fh *histogram.FloatHistogram) error { - if s.headChunks == nil { - return nil +// appendableFloatHistogram checks whether the given float histogram sample is valid for appending to the series. (if we return false and no error) +// The sample belongs to the out of order chunk if we return true and no error. +// An error signifies the sample cannot be handled. +func (s *memSeries) appendableFloatHistogram(t int64, fh *histogram.FloatHistogram, headMaxt, minValidTime, oooTimeWindow int64, oooHistogramsEnabled bool) (isOOO bool, oooDelta int64, err error) { + // Check if we can append in the in-order chunk. + if t >= minValidTime { + if s.headChunks == nil { + // The series has no sample and was freshly created. + return false, 0, nil + } + msMaxt := s.maxTime() + if t > msMaxt { + return false, 0, nil + } + if t == msMaxt { + // We are allowing exact duplicates as we can encounter them in valid cases + // like federation and erroring out at that time would be extremely noisy. + // This only checks against the latest in-order sample. + // The OOO headchunk has its own method to detect these duplicates. + if !fh.Equals(s.lastFloatHistogramValue) { + return false, 0, storage.ErrDuplicateSampleForTimestamp + } + // Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf. + return false, 0, nil + } } - if t > s.headChunks.maxTime { - return nil - } - if t < s.headChunks.maxTime { - return storage.ErrOutOfOrderSample + // The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk. + if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow { + if !oooHistogramsEnabled { + return true, headMaxt - t, storage.ErrOOONativeHistogramsDisabled + } + return true, headMaxt - t, nil } - // We are allowing exact duplicates as we can encounter them in valid cases - // like federation and erroring out at that time would be extremely noisy. - if !fh.Equals(s.lastFloatHistogramValue) { - return storage.ErrDuplicateSampleForTimestamp + // The sample cannot go in both in-order and out-of-order chunk. + if oooTimeWindow > 0 { + return true, headMaxt - t, storage.ErrTooOldSample } - return nil + if t < minValidTime { + return false, headMaxt - t, storage.ErrOutOfBounds + } + return false, headMaxt - t, storage.ErrOutOfOrderSample } // AppendExemplar for headAppender assumes the series ref already exists, and so it doesn't @@ -577,7 +652,9 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels return 0, storage.ErrNativeHistogramsDisabled } - if t < a.minValidTime { + // Fail fast if OOO is disabled and the sample is out of bounds. + // Otherwise a full check will be done later to decide if the sample is in-order or out-of-order. + if (a.oooTimeWindow == 0 || !a.head.opts.EnableOOONativeHistograms.Load()) && t < a.minValidTime { a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() return 0, storage.ErrOutOfBounds } @@ -594,50 +671,48 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels } } + var created bool s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) if s == nil { - // Ensure no empty labels have gotten through. - lset = lset.WithoutEmpty() - if lset.IsEmpty() { - return 0, fmt.Errorf("empty labelset: %w", ErrInvalidSample) - } - - if l, dup := lset.HasDuplicateLabelNames(); dup { - return 0, fmt.Errorf(`label name "%s" is not unique: %w`, l, ErrInvalidSample) - } - - var created bool var err error - s, created, err = a.head.getOrCreate(lset.Hash(), lset) + s, created, err = a.getOrCreate(lset) if err != nil { return 0, err } - if created { - switch { - case h != nil: - s.lastHistogramValue = &histogram.Histogram{} - case fh != nil: - s.lastFloatHistogramValue = &histogram.FloatHistogram{} - } - a.series = append(a.series, record.RefSeries{ - Ref: s.ref, - Labels: lset, - }) - } } switch { case h != nil: s.Lock() - if err := s.appendableHistogram(t, h); err != nil { - s.Unlock() - if errors.Is(err, storage.ErrOutOfOrderSample) { + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastHistogramValue = &histogram.Histogram{} + } + + // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise + // to skip that sample from the WAL and write only in the WBL. + _, delta, err := s.appendableHistogram(t, h, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + s.pendingCommit = true + } + s.Unlock() + if delta > 0 { + a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) + } + if err != nil { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + fallthrough + case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + case errors.Is(err, storage.ErrTooOldSample): + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() } return 0, err } - s.pendingCommit = true - s.Unlock() a.histograms = append(a.histograms, record.RefHistogramSample{ Ref: s.ref, T: t, @@ -646,15 +721,35 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels a.histogramSeries = append(a.histogramSeries, s) case fh != nil: s.Lock() - if err := s.appendableFloatHistogram(t, fh); err != nil { - s.Unlock() - if errors.Is(err, storage.ErrOutOfOrderSample) { + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastFloatHistogramValue = &histogram.FloatHistogram{} + } + + // TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise + // to skip that sample from the WAL and write only in the WBL. + _, delta, err := s.appendableFloatHistogram(t, fh, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err == nil { + s.pendingCommit = true + } + s.Unlock() + if delta > 0 { + a.head.metrics.oooHistogram.Observe(float64(delta) / 1000) + } + if err != nil { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + fallthrough + case errors.Is(err, storage.ErrOOONativeHistogramsDisabled): a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() + case errors.Is(err, storage.ErrTooOldSample): + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeHistogram).Inc() } return 0, err } - s.pendingCommit = true - s.Unlock() a.floatHistograms = append(a.floatHistograms, record.RefFloatHistogramSample{ Ref: s.ref, T: t, @@ -673,6 +768,102 @@ func (a *headAppender) AppendHistogram(ref storage.SeriesRef, lset labels.Labels return storage.SeriesRef(s.ref), nil } +func (a *headAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, lset labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) { + if !a.head.opts.EnableNativeHistograms.Load() { + return 0, storage.ErrNativeHistogramsDisabled + } + + if ct >= t { + return 0, storage.ErrCTNewerThanSample + } + + var created bool + s := a.head.series.getByID(chunks.HeadSeriesRef(ref)) + if s == nil { + var err error + s, created, err = a.getOrCreate(lset) + if err != nil { + return 0, err + } + } + + switch { + case h != nil: + zeroHistogram := &histogram.Histogram{} + s.Lock() + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastHistogramValue = zeroHistogram + } + + // Although we call `appendableHistogram` with oooHistogramsEnabled=true, for CTZeroSamples OOO is not allowed. + // We set it to true to make this implementation as close as possible to the float implementation. + isOOO, _, err := s.appendableHistogram(ct, zeroHistogram, a.headMaxt, a.minValidTime, a.oooTimeWindow, true) + if err != nil { + s.Unlock() + if errors.Is(err, storage.ErrOutOfOrderSample) { + return 0, storage.ErrOutOfOrderCT + } + } + // OOO is not allowed because after the first scrape, CT will be the same for most (if not all) future samples. + // This is to prevent the injected zero from being marked as OOO forever. + if isOOO { + s.Unlock() + return 0, storage.ErrOutOfOrderCT + } + s.pendingCommit = true + s.Unlock() + a.histograms = append(a.histograms, record.RefHistogramSample{ + Ref: s.ref, + T: ct, + H: zeroHistogram, + }) + a.histogramSeries = append(a.histogramSeries, s) + case fh != nil: + zeroFloatHistogram := &histogram.FloatHistogram{} + s.Lock() + + // TODO(krajorama): reorganize Commit() to handle samples in append order + // not floats first and then histograms. Then we would not need to do this. + // This whole "if" should be removed. + if created && s.lastHistogramValue == nil && s.lastFloatHistogramValue == nil { + s.lastFloatHistogramValue = zeroFloatHistogram + } + + // Although we call `appendableFloatHistogram` with oooHistogramsEnabled=true, for CTZeroSamples OOO is not allowed. + // We set it to true to make this implementation as close as possible to the float implementation. + isOOO, _, err := s.appendableFloatHistogram(ct, zeroFloatHistogram, a.headMaxt, a.minValidTime, a.oooTimeWindow, true) // OOO is not allowed for CTZeroSamples. + if err != nil { + s.Unlock() + if errors.Is(err, storage.ErrOutOfOrderSample) { + return 0, storage.ErrOutOfOrderCT + } + } + // OOO is not allowed because after the first scrape, CT will be the same for most (if not all) future samples. + // This is to prevent the injected zero from being marked as OOO forever. + if isOOO { + s.Unlock() + return 0, storage.ErrOutOfOrderCT + } + s.pendingCommit = true + s.Unlock() + a.floatHistograms = append(a.floatHistograms, record.RefFloatHistogramSample{ + Ref: s.ref, + T: ct, + FH: zeroFloatHistogram, + }) + a.floatHistogramSeries = append(a.floatHistogramSeries, s) + } + + if ct > a.maxt { + a.maxt = ct + } + return storage.SeriesRef(s.ref), nil +} + // UpdateMetadata for headAppender assumes the series ref already exists, and so it doesn't // use getOrCreate or make any of the lset sanity checks that Append does. func (a *headAppender) UpdateMetadata(ref storage.SeriesRef, lset labels.Labels, meta metadata.Metadata) (storage.SeriesRef, error) { @@ -793,6 +984,427 @@ func exemplarsForEncoding(es []exemplarWithSeriesRef) []record.RefExemplar { return ret } +type appenderCommitContext struct { + floatsAppended int + histogramsAppended int + // Number of samples out of order but accepted: with ooo enabled and within time window. + oooFloatsAccepted int + oooHistogramAccepted int + // Number of samples rejected due to: out of order but OOO support disabled. + floatOOORejected int + histoOOORejected int + // Number of samples rejected due to: out of order but too old (OOO support enabled, but outside time window). + floatTooOldRejected int + histoTooOldRejected int + // Number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled). + floatOOBRejected int + histoOOBRejected int + inOrderMint int64 + inOrderMaxt int64 + oooMinT int64 + oooMaxT int64 + wblSamples []record.RefSample + wblHistograms []record.RefHistogramSample + wblFloatHistograms []record.RefFloatHistogramSample + oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef + oooMmapMarkersCount int + oooRecords [][]byte + oooCapMax int64 + appendChunkOpts chunkOpts + enc record.Encoder +} + +// commitExemplars adds all exemplars from headAppender to the head's exemplar storage. +func (a *headAppender) commitExemplars() { + // No errors logging to WAL, so pass the exemplars along to the in memory storage. + for _, e := range a.exemplars { + s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) + if s == nil { + // This is very unlikely to happen, but we have seen it in the wild. + // It means that the series was truncated between AppendExemplar and Commit. + // See TestHeadCompactionWhileAppendAndCommitExemplar. + continue + } + // We don't instrument exemplar appends here, all is instrumented by storage. + if err := a.head.exemplars.AddExemplar(s.labels(), e.exemplar); err != nil { + if errors.Is(err, storage.ErrOutOfOrderExemplar) { + continue + } + a.head.logger.Debug("Unknown error while adding exemplar", "err", err) + } + } +} + +func (acc *appenderCommitContext) collectOOORecords(a *headAppender) { + if a.head.wbl == nil { + // WBL is not enabled. So no need to collect. + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil + acc.oooMmapMarkersCount = 0 + return + } + + // The m-map happens before adding a new sample. So we collect + // the m-map markers first, and then samples. + // WBL Graphically: + // WBL Before this Commit(): [old samples before this commit for chunk 1] + // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] + if acc.oooMmapMarkers != nil { + markers := make([]record.RefMmapMarker, 0, acc.oooMmapMarkersCount) + for ref, mmapRefs := range acc.oooMmapMarkers { + for _, mmapRef := range mmapRefs { + markers = append(markers, record.RefMmapMarker{ + Ref: ref, + MmapRef: mmapRef, + }) + } + } + r := acc.enc.MmapMarkers(markers, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + if len(acc.wblSamples) > 0 { + r := acc.enc.Samples(acc.wblSamples, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblHistograms) > 0 { + r := acc.enc.HistogramSamples(acc.wblHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + if len(acc.wblFloatHistograms) > 0 { + r := acc.enc.FloatHistogramSamples(acc.wblFloatHistograms, a.head.getBytesBuffer()) + acc.oooRecords = append(acc.oooRecords, r) + } + + acc.wblSamples = nil + acc.wblHistograms = nil + acc.wblFloatHistograms = nil + acc.oooMmapMarkers = nil +} + +// handleAppendableError processes errors encountered during sample appending and updates +// the provided counters accordingly. +// +// Parameters: +// - err: The error encountered during appending. +// - appended: Pointer to the counter tracking the number of successfully appended samples. +// - oooRejected: Pointer to the counter tracking the number of out-of-order samples rejected. +// - oobRejected: Pointer to the counter tracking the number of out-of-bounds samples rejected. +// - tooOldRejected: Pointer to the counter tracking the number of too-old samples rejected. +func handleAppendableError(err error, appended, oooRejected, oobRejected, tooOldRejected *int) { + switch { + case errors.Is(err, storage.ErrOutOfOrderSample): + *appended-- + *oooRejected++ + case errors.Is(err, storage.ErrOutOfBounds): + *appended-- + *oobRejected++ + case errors.Is(err, storage.ErrTooOldSample): + *appended-- + *tooOldRejected++ + default: + *appended-- + } +} + +// commitSamples processes and commits the samples in the headAppender to the series. +// It handles both in-order and out-of-order samples, updating the appenderCommitContext +// with the results of the append operations. +// +// The function iterates over the samples in the headAppender and attempts to append each sample +// to its corresponding series. It handles various error cases such as out-of-order samples, +// out-of-bounds samples, and too-old samples, updating the appenderCommitContext accordingly. +// +// For out-of-order samples, it checks if the sample can be inserted into the series and updates +// the out-of-order mmap markers if necessary. It also updates the write-ahead log (WBL) samples +// and the minimum and maximum timestamps for out-of-order samples. +// +// For in-order samples, it attempts to append the sample to the series and updates the minimum +// and maximum timestamps for in-order samples. +// +// The function also increments the chunk metrics if a new chunk is created and performs cleanup +// operations on the series after appending the samples. +// +// There are also specific functions to commit histograms and float histograms. +func (a *headAppender) commitSamples(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.samples { + series = a.sampleSeries[i] + series.Lock() + + oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) + if err != nil { + handleAppendableError(err, &acc.floatsAppended, &acc.floatOOORejected, &acc.floatOOBRejected, &acc.floatTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != nil means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblSamples = append(acc.wblSamples, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooFloatsAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.floatsAppended-- + } + default: + ok, chunkCreated = series.append(s.T, s.V, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + // The sample is an exact duplicate, and should be silently dropped. + acc.floatsAppended-- + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitHistograms function, see the commitSamples docs. +func (a *headAppender) commitHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.histograms { + series = a.histogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableHistogram(s.T, s.H, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, s.H, nil, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblHistograms = append(acc.wblHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendHistogram(s.T, s.H, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// For details on the commitFloatHistograms function, see the commitSamples docs. +func (a *headAppender) commitFloatHistograms(acc *appenderCommitContext) { + var ok, chunkCreated bool + var series *memSeries + + for i, s := range a.floatHistograms { + series = a.floatHistogramSeries[i] + series.Lock() + + oooSample, _, err := series.appendableFloatHistogram(s.T, s.FH, a.headMaxt, a.minValidTime, a.oooTimeWindow, a.head.opts.EnableOOONativeHistograms.Load()) + if err != nil { + handleAppendableError(err, &acc.histogramsAppended, &acc.histoOOORejected, &acc.histoOOBRejected, &acc.histoTooOldRejected) + } + + switch { + case err != nil: + // Do nothing here. + case oooSample: + // Sample is OOO and OOO handling is enabled + // and the delta is within the OOO tolerance. + var mmapRefs []chunks.ChunkDiskMapperRef + ok, chunkCreated, mmapRefs = series.insert(s.T, 0, nil, s.FH, a.head.chunkDiskMapper, acc.oooCapMax, a.head.logger) + if chunkCreated { + r, ok := acc.oooMmapMarkers[series.ref] + if !ok || r != nil { + // !ok means there are no markers collected for these samples yet. So we first flush the samples + // before setting this m-map marker. + + // r != 0 means we have already m-mapped a chunk for this series in the same Commit(). + // Hence, before we m-map again, we should add the samples and m-map markers + // seen till now to the WBL records. + acc.collectOOORecords(a) + } + + if acc.oooMmapMarkers == nil { + acc.oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) + } + if len(mmapRefs) > 0 { + acc.oooMmapMarkers[series.ref] = mmapRefs + acc.oooMmapMarkersCount += len(mmapRefs) + } else { + // No chunk was written to disk, so we need to set an initial marker for this series. + acc.oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} + acc.oooMmapMarkersCount++ + } + } + if ok { + acc.wblFloatHistograms = append(acc.wblFloatHistograms, s) + if s.T < acc.oooMinT { + acc.oooMinT = s.T + } + if s.T > acc.oooMaxT { + acc.oooMaxT = s.T + } + acc.oooHistogramAccepted++ + } else { + // Sample is an exact duplicate of the last sample. + // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, + // not with samples in already flushed OOO chunks. + // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. + acc.histogramsAppended-- + } + default: + ok, chunkCreated = series.appendFloatHistogram(s.T, s.FH, a.appendID, acc.appendChunkOpts) + if ok { + if s.T < acc.inOrderMint { + acc.inOrderMint = s.T + } + if s.T > acc.inOrderMaxt { + acc.inOrderMaxt = s.T + } + } else { + acc.histogramsAppended-- + acc.histoOOORejected++ + } + } + + if chunkCreated { + a.head.metrics.chunks.Inc() + a.head.metrics.chunksCreated.Inc() + } + + series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) + series.pendingCommit = false + series.Unlock() + } +} + +// commitMetadata commits the metadata for each series in the headAppender. +// It iterates over the metadata slice and updates the corresponding series +// with the new metadata information. The series is locked during the update +// to ensure thread safety. +func (a *headAppender) commitMetadata() { + var series *memSeries + for i, m := range a.metadata { + series = a.metadataSeries[i] + series.Lock() + series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} + series.Unlock() + } +} + // Commit writes to the WAL and adds the data to the Head. // TODO(codesome): Refactor this method to reduce indentation and make it more readable. func (a *headAppender) Commit() (err error) { @@ -810,23 +1422,7 @@ func (a *headAppender) Commit() (err error) { a.head.writeNotified.Notify() } - // No errors logging to WAL, so pass the exemplars along to the in memory storage. - for _, e := range a.exemplars { - s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref)) - if s == nil { - // This is very unlikely to happen, but we have seen it in the wild. - // It means that the series was truncated between AppendExemplar and Commit. - // See TestHeadCompactionWhileAppendAndCommitExemplar. - continue - } - // We don't instrument exemplar appends here, all is instrumented by storage. - if err := a.head.exemplars.AddExemplar(s.labels(), e.exemplar); err != nil { - if errors.Is(err, storage.ErrOutOfOrderExemplar) { - continue - } - level.Debug(a.head.logger).Log("msg", "Unknown error while adding exemplar", "err", err) - } - } + a.commitExemplars() defer a.head.metrics.activeAppenders.Dec() defer a.head.putAppendBuffer(a.samples) @@ -837,254 +1433,58 @@ func (a *headAppender) Commit() (err error) { defer a.head.putMetadataBuffer(a.metadata) defer a.head.iso.closeAppend(a.appendID) - var ( - floatsAppended = len(a.samples) - histogramsAppended = len(a.histograms) + len(a.floatHistograms) - // number of samples out of order but accepted: with ooo enabled and within time window - oooFloatsAccepted int - // number of samples rejected due to: out of order but OOO support disabled. - floatOOORejected int - histoOOORejected int - // number of samples rejected due to: that are out of order but too old (OOO support enabled, but outside time window) - floatTooOldRejected int - // number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled) - floatOOBRejected int - - inOrderMint int64 = math.MaxInt64 - inOrderMaxt int64 = math.MinInt64 - oooMinT int64 = math.MaxInt64 - oooMaxT int64 = math.MinInt64 - wblSamples []record.RefSample - oooMmapMarkers map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef - oooMmapMarkersCount int - oooRecords [][]byte - oooCapMax = a.head.opts.OutOfOrderCapMax.Load() - series *memSeries - appendChunkOpts = chunkOpts{ + acc := &appenderCommitContext{ + floatsAppended: len(a.samples), + histogramsAppended: len(a.histograms) + len(a.floatHistograms), + inOrderMint: math.MaxInt64, + inOrderMaxt: math.MinInt64, + oooMinT: math.MaxInt64, + oooMaxT: math.MinInt64, + oooCapMax: a.head.opts.OutOfOrderCapMax.Load(), + appendChunkOpts: chunkOpts{ chunkDiskMapper: a.head.chunkDiskMapper, chunkRange: a.head.chunkRange.Load(), samplesPerChunk: a.head.opts.SamplesPerChunk, - } - enc record.Encoder - ) + }, + } + defer func() { - for i := range oooRecords { - a.head.putBytesBuffer(oooRecords[i][:0]) + for i := range acc.oooRecords { + a.head.putBytesBuffer(acc.oooRecords[i][:0]) } }() - collectOOORecords := func() { - if a.head.wbl == nil { - // WBL is not enabled. So no need to collect. - wblSamples = nil - oooMmapMarkers = nil - oooMmapMarkersCount = 0 - return - } - // The m-map happens before adding a new sample. So we collect - // the m-map markers first, and then samples. - // WBL Graphically: - // WBL Before this Commit(): [old samples before this commit for chunk 1] - // WBL After this Commit(): [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3] - if oooMmapMarkers != nil { - markers := make([]record.RefMmapMarker, 0, oooMmapMarkersCount) - for ref, mmapRefs := range oooMmapMarkers { - for _, mmapRef := range mmapRefs { - markers = append(markers, record.RefMmapMarker{ - Ref: ref, - MmapRef: mmapRef, - }) - } - } - r := enc.MmapMarkers(markers, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } - if len(wblSamples) > 0 { - r := enc.Samples(wblSamples, a.head.getBytesBuffer()) - oooRecords = append(oooRecords, r) - } + a.commitSamples(acc) + a.commitHistograms(acc) + a.commitFloatHistograms(acc) + a.commitMetadata() - wblSamples = nil - oooMmapMarkers = nil - } - for i, s := range a.samples { - series = a.sampleSeries[i] - series.Lock() + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOORejected)) + a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histoOOORejected)) + a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatOOBRejected)) + a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatTooOldRejected)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.floatsAppended)) + a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.histogramsAppended)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(acc.oooFloatsAccepted)) + a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(acc.oooHistogramAccepted)) + a.head.updateMinMaxTime(acc.inOrderMint, acc.inOrderMaxt) + a.head.updateMinOOOMaxOOOTime(acc.oooMinT, acc.oooMaxT) - oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow) - switch { - case err == nil: - // Do nothing. - case errors.Is(err, storage.ErrOutOfOrderSample): - floatsAppended-- - floatOOORejected++ - case errors.Is(err, storage.ErrOutOfBounds): - floatsAppended-- - floatOOBRejected++ - case errors.Is(err, storage.ErrTooOldSample): - floatsAppended-- - floatTooOldRejected++ - default: - floatsAppended-- - } - - var ok, chunkCreated bool - - switch { - case err != nil: - // Do nothing here. - case oooSample: - // Sample is OOO and OOO handling is enabled - // and the delta is within the OOO tolerance. - var mmapRefs []chunks.ChunkDiskMapperRef - ok, chunkCreated, mmapRefs = series.insert(s.T, s.V, nil, nil, a.head.chunkDiskMapper, oooCapMax, a.head.logger) - if chunkCreated { - r, ok := oooMmapMarkers[series.ref] - if !ok || r != nil { - // !ok means there are no markers collected for these samples yet. So we first flush the samples - // before setting this m-map marker. - - // r != nil means we have already m-mapped a chunk for this series in the same Commit(). - // Hence, before we m-map again, we should add the samples and m-map markers - // seen till now to the WBL records. - collectOOORecords() - } - - if oooMmapMarkers == nil { - oooMmapMarkers = make(map[chunks.HeadSeriesRef][]chunks.ChunkDiskMapperRef) - } - if len(mmapRefs) > 0 { - oooMmapMarkers[series.ref] = mmapRefs - oooMmapMarkersCount += len(mmapRefs) - } else { - // No chunk was written to disk, so we need to set an initial marker for this series. - oooMmapMarkers[series.ref] = []chunks.ChunkDiskMapperRef{0} - oooMmapMarkersCount++ - } - } - if ok { - wblSamples = append(wblSamples, s) - if s.T < oooMinT { - oooMinT = s.T - } - if s.T > oooMaxT { - oooMaxT = s.T - } - oooFloatsAccepted++ - } else { - // Sample is an exact duplicate of the last sample. - // NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk, - // not with samples in already flushed OOO chunks. - // TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305. - floatsAppended-- - } - default: - ok, chunkCreated = series.append(s.T, s.V, a.appendID, appendChunkOpts) - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - // The sample is an exact duplicate, and should be silently dropped. - floatsAppended-- - } - } - - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - } - - for i, s := range a.histograms { - series = a.histogramSeries[i] - series.Lock() - ok, chunkCreated := series.appendHistogram(s.T, s.H, a.appendID, appendChunkOpts) - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - } - - for i, s := range a.floatHistograms { - series = a.floatHistogramSeries[i] - series.Lock() - ok, chunkCreated := series.appendFloatHistogram(s.T, s.FH, a.appendID, appendChunkOpts) - series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow) - series.pendingCommit = false - series.Unlock() - - if ok { - if s.T < inOrderMint { - inOrderMint = s.T - } - if s.T > inOrderMaxt { - inOrderMaxt = s.T - } - } else { - histogramsAppended-- - histoOOORejected++ - } - if chunkCreated { - a.head.metrics.chunks.Inc() - a.head.metrics.chunksCreated.Inc() - } - } - - for i, m := range a.metadata { - series = a.metadataSeries[i] - series.Lock() - series.meta = &metadata.Metadata{Type: record.ToMetricType(m.Type), Unit: m.Unit, Help: m.Help} - series.Unlock() - } - - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOORejected)) - a.head.metrics.outOfOrderSamples.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histoOOORejected)) - a.head.metrics.outOfBoundSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatOOBRejected)) - a.head.metrics.tooOldSamples.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatTooOldRejected)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(floatsAppended)) - a.head.metrics.samplesAppended.WithLabelValues(sampleMetricTypeHistogram).Add(float64(histogramsAppended)) - a.head.metrics.outOfOrderSamplesAppended.WithLabelValues(sampleMetricTypeFloat).Add(float64(oooFloatsAccepted)) - a.head.updateMinMaxTime(inOrderMint, inOrderMaxt) - a.head.updateMinOOOMaxOOOTime(oooMinT, oooMaxT) - - collectOOORecords() + acc.collectOOORecords(a) if a.head.wbl != nil { - if err := a.head.wbl.Log(oooRecords...); err != nil { + if err := a.head.wbl.Log(acc.oooRecords...); err != nil { // TODO(codesome): Currently WBL logging of ooo samples is best effort here since we cannot try logging // until we have found what samples become OOO. We can try having a metric for this failure. // Returning the error here is not correct because we have already put the samples into the memory, // hence the append/insert was a success. - level.Error(a.head.logger).Log("msg", "Failed to log out of order samples into the WAL", "err", err) + a.head.logger.Error("Failed to log out of order samples into the WAL", "err", err) } } return nil } // insert is like append, except it inserts. Used for OOO samples. -func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger log.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) { +func (s *memSeries) insert(t int64, v float64, h *histogram.Histogram, fh *histogram.FloatHistogram, chunkDiskMapper *chunks.ChunkDiskMapper, oooCapMax int64, logger *slog.Logger) (inserted, chunkCreated bool, mmapRefs []chunks.ChunkDiskMapperRef) { if s.ooo == nil { s.ooo = &memSeriesOOOFields{} } @@ -1117,7 +1517,7 @@ type chunkOpts struct { // append adds the sample (t, v) to the series. The caller also has to provide // the appendID for isolation. (The appendID can be zero, which results in no // isolation for this append.) -// It is unsafe to call this concurrently with s.iterator(...) without holding the series lock. +// Series lock must be held when calling. func (s *memSeries) append(t int64, v float64, appendID uint64, o chunkOpts) (sampleInOrder, chunkCreated bool) { c, sampleInOrder, chunkCreated := s.appendPreprocessor(t, chunkenc.EncXOR, o) if !sampleInOrder { @@ -1446,7 +1846,7 @@ func (s *memSeries) cutNewHeadChunk(mint int64, e chunkenc.Encoding, chunkRange // cutNewOOOHeadChunk cuts a new OOO chunk and m-maps the old chunk. // The caller must ensure that s is locked and s.ooo is not nil. -func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) { +func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) (*oooHeadChunk, []chunks.ChunkDiskMapperRef) { ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper, logger) s.ooo.oooHeadChunk = &oooHeadChunk{ @@ -1459,7 +1859,7 @@ func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.Chunk } // s must be locked when calling. -func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger log.Logger) []chunks.ChunkDiskMapperRef { +func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper, logger *slog.Logger) []chunks.ChunkDiskMapperRef { if s.ooo == nil || s.ooo.oooHeadChunk == nil { // OOO is not enabled or there is no head chunk, so nothing to m-map here. return nil @@ -1469,13 +1869,13 @@ func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMap handleChunkWriteError(err) return nil } - chunkRefs := make([]chunks.ChunkDiskMapperRef, 0, 1) + chunkRefs := make([]chunks.ChunkDiskMapperRef, 0, len(chks)) for _, memchunk := range chks { if len(s.ooo.oooMmappedChunks) >= (oooChunkIDMask - 1) { - level.Error(logger).Log("msg", "Too many OOO chunks, dropping data", "series", s.lset.String()) + logger.Error("Too many OOO chunks, dropping data", "series", s.lset.String()) break } - chunkRef := chunkDiskMapper.WriteChunk(s.ref, s.ooo.oooHeadChunk.minTime, s.ooo.oooHeadChunk.maxTime, memchunk.chunk, true, handleChunkWriteError) + chunkRef := chunkDiskMapper.WriteChunk(s.ref, memchunk.minTime, memchunk.maxTime, memchunk.chunk, true, handleChunkWriteError) chunkRefs = append(chunkRefs, chunkRef) s.ooo.oooMmappedChunks = append(s.ooo.oooMmappedChunks, &mmappedChunk{ ref: chunkRef, diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go index a037948100..dc682602b1 100644 --- a/tsdb/head_bench_test.go +++ b/tsdb/head_bench_test.go @@ -14,15 +14,22 @@ package tsdb import ( + "context" "errors" + "fmt" + "math/rand" "strconv" "testing" "github.com/stretchr/testify/require" "go.uber.org/atomic" + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/wlog" ) func BenchmarkHeadStripeSeriesCreate(b *testing.B) { @@ -79,6 +86,86 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) { } } +func BenchmarkHead_WalCommit(b *testing.B) { + seriesCounts := []int{100, 1000, 10000} + series := genSeries(10000, 10, 0, 0) // Only using the generated labels. + + appendSamples := func(b *testing.B, app storage.Appender, seriesCount int, ts int64) { + var err error + for i, s := range series[:seriesCount] { + var ref storage.SeriesRef + // if i is even, append a sample, else append a histogram. + if i%2 == 0 { + ref, err = app.Append(ref, s.Labels(), ts, float64(ts)) + } else { + h := &histogram.Histogram{ + Count: 7 + uint64(ts*5), + ZeroCount: 2 + uint64(ts), + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{ts + 1, 1, -1, 0}, + } + ref, err = app.AppendHistogram(ref, s.Labels(), ts, h, nil) + } + require.NoError(b, err) + + _, err = app.AppendExemplar(ref, s.Labels(), exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }) + require.NoError(b, err) + } + } + + for _, seriesCount := range seriesCounts { + b.Run(fmt.Sprintf("%d series", seriesCount), func(b *testing.B) { + for _, commits := range []int64{1, 2} { // To test commits that create new series and when the series already exists. + b.Run(fmt.Sprintf("%d commits", commits), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + b.StopTimer() + h, w := newTestHead(b, 10000, wlog.CompressionNone, false) + b.Cleanup(func() { + if h != nil { + h.Close() + } + if w != nil { + w.Close() + } + }) + app := h.Appender(context.Background()) + + appendSamples(b, app, seriesCount, 0) + + b.StartTimer() + require.NoError(b, app.Commit()) + if commits == 2 { + b.StopTimer() + app = h.Appender(context.Background()) + appendSamples(b, app, seriesCount, 1) + b.StartTimer() + require.NoError(b, app.Commit()) + } + b.StopTimer() + h.Close() + h = nil + w.Close() + w = nil + } + }) + } + }) + } +} + type failingSeriesLifecycleCallback struct{} func (failingSeriesLifecycleCallback) PreCreation(labels.Labels) error { return errors.New("failed") } diff --git a/tsdb/head_dedupelabels.go b/tsdb/head_dedupelabels.go index a16d907261..a75f337224 100644 --- a/tsdb/head_dedupelabels.go +++ b/tsdb/head_dedupelabels.go @@ -16,8 +16,7 @@ package tsdb import ( - "github.com/go-kit/log" - "github.com/go-kit/log/level" + "log/slog" "github.com/prometheus/prometheus/model/labels" ) @@ -31,8 +30,8 @@ func (s *memSeries) labels() labels.Labels { // RebuildSymbolTable goes through all the series in h, build a SymbolTable with all names and values, // replace each series' Labels with one using that SymbolTable. -func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { - level.Info(logger).Log("msg", "RebuildSymbolTable starting") +func (h *Head) RebuildSymbolTable(logger *slog.Logger) *labels.SymbolTable { + logger.Info("RebuildSymbolTable starting") st := labels.NewSymbolTable() builder := labels.NewScratchBuilderWithSymbolTable(st, 0) rebuildLabels := func(lbls labels.Labels) labels.Labels { @@ -66,7 +65,7 @@ func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { if e, ok := h.exemplars.(withReset); ok { e.ResetSymbolTable(st) } - level.Info(logger).Log("msg", "RebuildSymbolTable finished", "size", st.Len()) + logger.Info("RebuildSymbolTable finished", "size", st.Len()) return st } diff --git a/tsdb/head_other.go b/tsdb/head_other.go index fea91530dc..c73872c12e 100644 --- a/tsdb/head_other.go +++ b/tsdb/head_other.go @@ -16,7 +16,7 @@ package tsdb import ( - "github.com/go-kit/log" + "log/slog" "github.com/prometheus/prometheus/model/labels" ) @@ -27,6 +27,6 @@ func (s *memSeries) labels() labels.Labels { } // RebuildSymbolTable is a no-op when not using dedupelabels. -func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable { +func (h *Head) RebuildSymbolTable(logger *slog.Logger) *labels.SymbolTable { return nil } diff --git a/tsdb/head_read.go b/tsdb/head_read.go index d81ffbb6a0..79ed0f0240 100644 --- a/tsdb/head_read.go +++ b/tsdb/head_read.go @@ -21,8 +21,6 @@ import ( "slices" "sync" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -125,6 +123,10 @@ func (h *headIndexReader) PostingsForLabelMatching(ctx context.Context, name str return h.head.postings.PostingsForLabelMatching(ctx, name, match) } +func (h *headIndexReader) PostingsForAllLabelValues(ctx context.Context, name string) index.Postings { + return h.head.postings.PostingsForAllLabelValues(ctx, name) +} + func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { series := make([]*memSeries, 0, 128) @@ -132,7 +134,7 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { for p.Next() { s := h.head.series.getByID(chunks.HeadSeriesRef(p.At())) if s == nil { - level.Debug(h.head.logger).Log("msg", "Looked up series not found") + h.head.logger.Debug("Looked up series not found") } else { series = append(series, s) } @@ -165,7 +167,7 @@ func (h *headIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCou for p.Next() { s := h.head.series.getByID(chunks.HeadSeriesRef(p.At())) if s == nil { - level.Debug(h.head.logger).Log("msg", "Looked up series not found") + h.head.logger.Debug("Looked up series not found") continue } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 18ec4f0aca..2ca3aeffc7 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -23,6 +23,7 @@ import ( "path" "path/filepath" "reflect" + "slices" "sort" "strconv" "strings" @@ -1060,7 +1061,7 @@ func TestMemSeries_truncateChunks_scenarios(t *testing.T) { tests := []struct { name string - headChunks int // the number of head chubks to create on memSeries by appending enough samples + headChunks int // the number of head chunks to create on memSeries by appending enough samples mmappedChunks int // the number of mmapped chunks to create on memSeries by appending enough samples truncateBefore int64 // the mint to pass to truncateChunksBefore() expectedTruncated int // the number of chunks that we're expecting be truncated and returned by truncateChunksBefore() @@ -2101,6 +2102,36 @@ func TestHead_LogRollback(t *testing.T) { } } +func TestHead_ReturnsSortedLabelValues(t *testing.T) { + h, _ := newTestHead(t, 1000, wlog.CompressionNone, false) + defer func() { + require.NoError(t, h.Close()) + }() + + h.initTime(0) + + app := h.appender() + for i := 100; i > 0; i-- { + for j := 0; j < 10; j++ { + lset := labels.FromStrings( + "__name__", fmt.Sprintf("metric_%d", i), + "label", fmt.Sprintf("value_%d", j), + ) + _, err := app.Append(0, lset, 2100, 1) + require.NoError(t, err) + } + } + + q, err := NewBlockQuerier(h, 1500, 2500) + require.NoError(t, err) + + res, _, err := q.LabelValues(context.Background(), "__name__", nil) + require.NoError(t, err) + + require.True(t, slices.IsSorted(res)) + require.NoError(t, q.Close()) +} + // TestWalRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWalRepair_DecodingError(t *testing.T) { @@ -2383,8 +2414,7 @@ func TestAddDuplicateLabelName(t *testing.T) { add := func(labels labels.Labels, labelName string) { app := h.Appender(context.Background()) _, err := app.Append(0, labels, 0, 0) - require.Error(t, err) - require.Equal(t, fmt.Sprintf(`label name "%s" is not unique: invalid sample`, labelName), err.Error()) + require.EqualError(t, err, fmt.Sprintf(`label name "%s" is not unique: invalid sample`, labelName)) } add(labels.FromStrings("a", "c", "a", "b"), "a") @@ -2692,15 +2722,32 @@ func TestIsolationWithoutAdd(t *testing.T) { func TestOutOfOrderSamplesMetric(t *testing.T) { for name, scenario := range sampleTypeScenarios { t.Run(name, func(t *testing.T) { - testOutOfOrderSamplesMetric(t, scenario) + options := DefaultOptions() + options.EnableNativeHistograms = true + options.EnableOOONativeHistograms = true + testOutOfOrderSamplesMetric(t, scenario, options, storage.ErrOutOfOrderSample) }) } } -func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) { - dir := t.TempDir() +func TestOutOfOrderSamplesMetricNativeHistogramOOODisabled(t *testing.T) { + for name, scenario := range sampleTypeScenarios { + if scenario.sampleType != "histogram" { + continue + } + t.Run(name, func(t *testing.T) { + options := DefaultOptions() + options.OutOfOrderTimeWindow = (1000 * time.Minute).Milliseconds() + options.EnableNativeHistograms = true + options.EnableOOONativeHistograms = false + testOutOfOrderSamplesMetric(t, scenario, options, storage.ErrOOONativeHistogramsDisabled) + }) + } +} - db, err := Open(dir, nil, nil, DefaultOptions(), nil) +func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario, options *Options, expectOutOfOrderError error) { + dir := t.TempDir() + db, err := Open(dir, nil, nil, options, nil) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) @@ -2724,15 +2771,15 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) { require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) app = db.Appender(ctx) _, err = appendSample(app, 2) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, 3) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 2.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, 4) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 3.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.NoError(t, app.Commit()) @@ -2767,15 +2814,15 @@ func testOutOfOrderSamplesMetric(t *testing.T, scenario sampleTypeScenario) { // Test out of order metric. app = db.Appender(ctx) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+2) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 4.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+3) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 5.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) _, err = appendSample(app, db.head.minValidTime.Load()+DefaultBlockDuration+4) - require.Equal(t, storage.ErrOutOfOrderSample, err) + require.Equal(t, expectOutOfOrderError, err) require.Equal(t, 6.0, prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples.WithLabelValues(scenario.sampleType))) require.NoError(t, app.Commit()) } @@ -3492,6 +3539,133 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) { } } +func TestQueryOOOHeadDuringTruncate(t *testing.T) { + testQueryOOOHeadDuringTruncate(t, + func(db *DB, minT, maxT int64) (storage.LabelQuerier, error) { + return db.Querier(minT, maxT) + }, + func(t *testing.T, lq storage.LabelQuerier, minT, _ int64) { + // Samples + q, ok := lq.(storage.Querier) + require.True(t, ok) + ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "a", "b")) + require.True(t, ss.Next()) + s := ss.At() + require.False(t, ss.Next()) // One series. + it := s.Iterator(nil) + require.NotEqual(t, chunkenc.ValNone, it.Next()) // Has some data. + require.Equal(t, minT, it.AtT()) // It is an in-order sample. + require.NotEqual(t, chunkenc.ValNone, it.Next()) // Has some data. + require.Equal(t, minT+50, it.AtT()) // it is an out-of-order sample. + require.NoError(t, it.Err()) + }, + ) +} + +func TestChunkQueryOOOHeadDuringTruncate(t *testing.T) { + testQueryOOOHeadDuringTruncate(t, + func(db *DB, minT, maxT int64) (storage.LabelQuerier, error) { + return db.ChunkQuerier(minT, maxT) + }, + func(t *testing.T, lq storage.LabelQuerier, minT, _ int64) { + // Chunks + q, ok := lq.(storage.ChunkQuerier) + require.True(t, ok) + ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchEqual, "a", "b")) + require.True(t, ss.Next()) + s := ss.At() + require.False(t, ss.Next()) // One series. + metaIt := s.Iterator(nil) + require.True(t, metaIt.Next()) + meta := metaIt.At() + // Samples + it := meta.Chunk.Iterator(nil) + require.NotEqual(t, chunkenc.ValNone, it.Next()) // Has some data. + require.Equal(t, minT, it.AtT()) // It is an in-order sample. + require.NotEqual(t, chunkenc.ValNone, it.Next()) // Has some data. + require.Equal(t, minT+50, it.AtT()) // it is an out-of-order sample. + require.NoError(t, it.Err()) + }, + ) +} + +func testQueryOOOHeadDuringTruncate(t *testing.T, makeQuerier func(db *DB, minT, maxT int64) (storage.LabelQuerier, error), verify func(t *testing.T, q storage.LabelQuerier, minT, maxT int64)) { + const maxT int64 = 6000 + + dir := t.TempDir() + opts := DefaultOptions() + opts.EnableNativeHistograms = true + opts.OutOfOrderTimeWindow = maxT + opts.MinBlockDuration = maxT / 2 // So that head will compact up to 3000. + + db, err := Open(dir, nil, nil, opts, nil) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, db.Close()) + }) + db.DisableCompactions() + + var ( + ref = storage.SeriesRef(0) + app = db.Appender(context.Background()) + ) + // Add in-order samples at every 100ms starting at 0ms. + for i := int64(0); i < maxT; i += 100 { + _, err := app.Append(ref, labels.FromStrings("a", "b"), i, 0) + require.NoError(t, err) + } + // Add out-of-order samples at every 100ms starting at 50ms. + for i := int64(50); i < maxT; i += 100 { + _, err := app.Append(ref, labels.FromStrings("a", "b"), i, 0) + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + + requireEqualOOOSamples(t, int(maxT/100-1), db) + + // Synchronization points. + allowQueryToStart := make(chan struct{}) + queryStarted := make(chan struct{}) + compactionFinished := make(chan struct{}) + + db.head.memTruncationCallBack = func() { + // Compaction has started, let the query start and wait for it to actually start to simulate race condition. + allowQueryToStart <- struct{}{} + <-queryStarted + } + + go func() { + db.Compact(context.Background()) // Compact and write blocks up to 3000 (maxtT/2). + compactionFinished <- struct{}{} + }() + + // Wait for the compaction to start. + <-allowQueryToStart + + q, err := makeQuerier(db, 1500, 2500) + require.NoError(t, err) + queryStarted <- struct{}{} // Unblock the compaction. + ctx := context.Background() + + // Label names. + res, annots, err := q.LabelNames(ctx, nil, labels.MustNewMatcher(labels.MatchEqual, "a", "b")) + require.NoError(t, err) + require.Empty(t, annots) + require.Equal(t, []string{"a"}, res) + + // Label values. + res, annots, err = q.LabelValues(ctx, "a", nil, labels.MustNewMatcher(labels.MatchEqual, "a", "b")) + require.NoError(t, err) + require.Empty(t, annots) + require.Equal(t, []string{"b"}, res) + + verify(t, q, 1500, 2500) + + require.NoError(t, q.Close()) // Cannot be deferred as the compaction waits for queries to close before finishing. + + <-compactionFinished // Wait for compaction otherwise Go test finds stray goroutines. +} + func TestAppendHistogram(t *testing.T) { l := labels.FromStrings("a", "b") for _, numHistograms := range []int{1, 10, 150, 200, 250, 300} { @@ -4499,10 +4673,172 @@ func TestHistogramCounterResetHeader(t *testing.T) { } } +func TestOOOHistogramCounterResetHeaders(t *testing.T) { + for _, floatHisto := range []bool{true, false} { + t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { + l := labels.FromStrings("a", "b") + head, _ := newTestHead(t, 1000, wlog.CompressionNone, true) + head.opts.OutOfOrderCapMax.Store(5) + head.opts.EnableOOONativeHistograms.Store(true) + + t.Cleanup(func() { + require.NoError(t, head.Close()) + }) + require.NoError(t, head.Init(0)) + + appendHistogram := func(ts int64, h *histogram.Histogram) { + app := head.Appender(context.Background()) + var err error + if floatHisto { + _, err = app.AppendHistogram(0, l, ts, nil, h.ToFloat(nil)) + } else { + _, err = app.AppendHistogram(0, l, ts, h.Copy(), nil) + } + require.NoError(t, err) + require.NoError(t, app.Commit()) + } + + type expOOOMmappedChunks struct { + header chunkenc.CounterResetHeader + mint, maxt int64 + numSamples uint16 + } + + var expChunks []expOOOMmappedChunks + checkOOOExpCounterResetHeader := func(newChunks ...expOOOMmappedChunks) { + expChunks = append(expChunks, newChunks...) + + ms, _, err := head.getOrCreate(l.Hash(), l) + require.NoError(t, err) + + require.Len(t, ms.ooo.oooMmappedChunks, len(expChunks)) + + for i, mmapChunk := range ms.ooo.oooMmappedChunks { + chk, err := head.chunkDiskMapper.Chunk(mmapChunk.ref) + require.NoError(t, err) + if floatHisto { + require.Equal(t, expChunks[i].header, chk.(*chunkenc.FloatHistogramChunk).GetCounterResetHeader()) + } else { + require.Equal(t, expChunks[i].header, chk.(*chunkenc.HistogramChunk).GetCounterResetHeader()) + } + require.Equal(t, expChunks[i].mint, mmapChunk.minTime) + require.Equal(t, expChunks[i].maxt, mmapChunk.maxTime) + require.Equal(t, expChunks[i].numSamples, mmapChunk.numSamples) + } + } + + // Append an in-order histogram, so the rest of the samples can be detected as OOO. + appendHistogram(1000, tsdbutil.GenerateTestHistogram(1000)) + + // OOO histogram + for i := 1; i <= 5; i++ { + appendHistogram(100+int64(i), tsdbutil.GenerateTestHistogram(1000+i)) + } + // Nothing mmapped yet. + checkOOOExpCounterResetHeader() + + // 6th observation (which triggers a head chunk mmapping). + appendHistogram(int64(112), tsdbutil.GenerateTestHistogram(1002)) + + // One mmapped chunk with (ts, val) [(101, 1001), (102, 1002), (103, 1003), (104, 1004), (105, 1005)]. + checkOOOExpCounterResetHeader(expOOOMmappedChunks{ + header: chunkenc.UnknownCounterReset, + mint: 101, + maxt: 105, + numSamples: 5, + }) + + // Add more samples, there's a counter reset at ts 122. + appendHistogram(int64(110), tsdbutil.GenerateTestHistogram(1001)) + appendHistogram(int64(124), tsdbutil.GenerateTestHistogram(904)) + appendHistogram(int64(123), tsdbutil.GenerateTestHistogram(903)) + appendHistogram(int64(122), tsdbutil.GenerateTestHistogram(902)) + + // New samples not mmapped yet. + checkOOOExpCounterResetHeader() + + // 11th observation (which triggers another head chunk mmapping). + appendHistogram(int64(200), tsdbutil.GenerateTestHistogram(2000)) + + // Two new mmapped chunks [(110, 1001), (112, 1002)], [(122, 902), (123, 903), (124, 904)]. + checkOOOExpCounterResetHeader( + expOOOMmappedChunks{ + header: chunkenc.UnknownCounterReset, + mint: 110, + maxt: 112, + numSamples: 2, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 122, + maxt: 124, + numSamples: 3, + }, + ) + + // Count is lower than previous sample at ts 200, and NotCounterReset is always ignored on append. + appendHistogram(int64(205), tsdbutil.SetHistogramNotCounterReset(tsdbutil.GenerateTestHistogram(1000))) + + appendHistogram(int64(210), tsdbutil.SetHistogramCounterReset(tsdbutil.GenerateTestHistogram(2010))) + + appendHistogram(int64(220), tsdbutil.GenerateTestHistogram(2020)) + + appendHistogram(int64(215), tsdbutil.GenerateTestHistogram(2005)) + + // 16th observation (which triggers another head chunk mmapping). + appendHistogram(int64(350), tsdbutil.GenerateTestHistogram(4000)) + + // Four new mmapped chunks: [(200, 2000)] [(205, 1000)], [(210, 2010)], [(215, 2015), (220, 2020)] + checkOOOExpCounterResetHeader( + expOOOMmappedChunks{ + header: chunkenc.UnknownCounterReset, + mint: 200, + maxt: 200, + numSamples: 1, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 205, + maxt: 205, + numSamples: 1, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 210, + maxt: 210, + numSamples: 1, + }, + expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 215, + maxt: 220, + numSamples: 2, + }, + ) + + // Adding five more samples (21 in total), so another mmapped chunk is created. + appendHistogram(300, tsdbutil.SetHistogramCounterReset(tsdbutil.GenerateTestHistogram(3000))) + + for i := 1; i <= 4; i++ { + appendHistogram(300+int64(i), tsdbutil.GenerateTestHistogram(3000+i)) + } + + // One mmapped chunk with (ts, val) [(300, 3000), (301, 3001), (302, 3002), (303, 3003), (350, 4000)]. + checkOOOExpCounterResetHeader(expOOOMmappedChunks{ + header: chunkenc.CounterReset, + mint: 300, + maxt: 350, + numSamples: 5, + }) + }) + } +} + func TestAppendingDifferentEncodingToSameSeries(t *testing.T) { dir := t.TempDir() opts := DefaultOptions() opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true db, err := Open(dir, nil, nil, opts, nil) require.NoError(t, err) t.Cleanup(func() { @@ -4773,6 +5109,8 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { opts.ChunkRange = 1000 opts.ChunkDirRoot = dir opts.OutOfOrderTimeWindow.Store(30 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -4782,13 +5120,12 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { l := labels.FromStrings("foo", "bar") appendSample := func(mins int64, val float64, isOOO bool) { app := h.Appender(context.Background()) - ts, v := mins*time.Minute.Milliseconds(), val - _, err := app.Append(0, l, ts, v) + _, s, err := scenario.appendFunc(app, l, mins*time.Minute.Milliseconds(), mins) require.NoError(t, err) require.NoError(t, app.Commit()) if isOOO { - expOOOSamples = append(expOOOSamples, sample{t: ts, f: v}) + expOOOSamples = append(expOOOSamples, s) } } @@ -4841,7 +5178,7 @@ func testWBLReplay(t *testing.T, scenario sampleTypeScenario) { // Passing in true for the 'ignoreCounterResets' parameter prevents differences in counter reset headers // from being factored in to the sample comparison // TODO(fionaliao): understand counter reset behaviour, might want to modify this later - requireEqualSamples(t, l.String(), expOOOSamples, actOOOSamples, true) + requireEqualSamples(t, l.String(), expOOOSamples, actOOOSamples, requireEqualSamplesIgnoreCounterResets) require.NoError(t, h.Close()) } @@ -4867,6 +5204,8 @@ func testOOOMmapReplay(t *testing.T, scenario sampleTypeScenario) { opts.ChunkDirRoot = dir opts.OutOfOrderCapMax.Store(30) opts.OutOfOrderTimeWindow.Store(1000 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -5168,6 +5507,8 @@ func testOOOAppendWithNoSeries(t *testing.T, appendFunc func(appender storage.Ap opts.ChunkDirRoot = dir opts.OutOfOrderCapMax.Store(30) opts.OutOfOrderTimeWindow.Store(120 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -5241,7 +5582,9 @@ func testOOOAppendWithNoSeries(t *testing.T, appendFunc func(appender storage.Ap func TestHeadMinOOOTimeUpdate(t *testing.T) { for name, scenario := range sampleTypeScenarios { t.Run(name, func(t *testing.T) { - testHeadMinOOOTimeUpdate(t, scenario) + if scenario.sampleType == sampleMetricTypeFloat { + testHeadMinOOOTimeUpdate(t, scenario) + } }) } } @@ -5256,6 +5599,8 @@ func testHeadMinOOOTimeUpdate(t *testing.T, scenario sampleTypeScenario) { opts := DefaultHeadOptions() opts.ChunkDirRoot = dir opts.OutOfOrderTimeWindow.Store(10 * time.Minute.Milliseconds()) + opts.EnableNativeHistograms.Store(true) + opts.EnableOOONativeHistograms.Store(true) h, err := NewHead(nil, nil, wal, oooWlog, opts, nil) require.NoError(t, err) @@ -5935,11 +6280,15 @@ func TestHeadAppender_AppendFloatWithSameTimestampAsPreviousHistogram(t *testing require.ErrorIs(t, err, storage.NewDuplicateHistogramToFloatErr(2_000, 10.0)) } -func TestHeadAppender_AppendCTZeroSample(t *testing.T) { +func TestHeadAppender_AppendCT(t *testing.T) { + testHistogram := tsdbutil.GenerateTestHistogram(1) + testFloatHistogram := tsdbutil.GenerateTestFloatHistogram(1) type appendableSamples struct { - ts int64 - val float64 - ct int64 + ts int64 + fSample float64 + h *histogram.Histogram + fh *histogram.FloatHistogram + ct int64 } for _, tc := range []struct { name string @@ -5947,20 +6296,10 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { expectedSamples []chunks.Sample }{ { - name: "In order ct+normal sample", + name: "In order ct+normal sample/floatSample", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - }, - expectedSamples: []chunks.Sample{ - sample{t: 1, f: 0}, - sample{t: 100, f: 10}, - }, - }, - { - name: "Consecutive appends with same ct ignore ct", - appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 101, val: 10, ct: 1}, + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 1}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -5969,10 +6308,86 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { }, }, { - name: "Consecutive appends with newer ct do not ignore ct", + name: "In order ct+normal sample/histogram", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 102, val: 10, ct: 101}, + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "In order ct+normal sample/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with same ct ignore ct/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 1}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, f: 0}, + sample{t: 100, f: 10}, + sample{t: 101, f: 10}, + }, + }, + { + name: "Consecutive appends with same ct ignore ct/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with same ct ignore ct/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 1}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, + { + name: "Consecutive appends with newer ct do not ignore ct/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 102, fSample: 10, ct: 101}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -5982,10 +6397,36 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { }, }, { - name: "CT equals to previous sample timestamp is ignored", + name: "Consecutive appends with newer ct do not ignore ct/histogram", appendableSamples: []appendableSamples{ - {ts: 100, val: 10, ct: 1}, - {ts: 101, val: 10, ct: 100}, + {ts: 100, h: testHistogram, ct: 1}, + {ts: 102, h: testHistogram, ct: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &histogram.Histogram{CounterResetHint: histogram.UnknownCounterReset}}, + sample{t: 102, h: testHistogram}, + }, + }, + { + name: "Consecutive appends with newer ct do not ignore ct/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 102, fh: testFloatHistogram, ct: 101}, + }, + expectedSamples: []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &histogram.FloatHistogram{CounterResetHint: histogram.UnknownCounterReset}}, + sample{t: 102, fh: testFloatHistogram}, + }, + }, + { + name: "CT equals to previous sample timestamp is ignored/floatSample", + appendableSamples: []appendableSamples{ + {ts: 100, fSample: 10, ct: 1}, + {ts: 101, fSample: 10, ct: 100}, }, expectedSamples: []chunks.Sample{ sample{t: 1, f: 0}, @@ -5993,6 +6434,38 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { sample{t: 101, f: 10}, }, }, + { + name: "CT equals to previous sample timestamp is ignored/histogram", + appendableSamples: []appendableSamples{ + {ts: 100, h: testHistogram, ct: 1}, + {ts: 101, h: testHistogram, ct: 100}, + }, + expectedSamples: func() []chunks.Sample { + hNoCounterReset := *testHistogram + hNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, h: &histogram.Histogram{}}, + sample{t: 100, h: testHistogram}, + sample{t: 101, h: &hNoCounterReset}, + } + }(), + }, + { + name: "CT equals to previous sample timestamp is ignored/floathistogram", + appendableSamples: []appendableSamples{ + {ts: 100, fh: testFloatHistogram, ct: 1}, + {ts: 101, fh: testFloatHistogram, ct: 100}, + }, + expectedSamples: func() []chunks.Sample { + fhNoCounterReset := *testFloatHistogram + fhNoCounterReset.CounterResetHint = histogram.NotCounterReset + return []chunks.Sample{ + sample{t: 1, fh: &histogram.FloatHistogram{}}, + sample{t: 100, fh: testFloatHistogram}, + sample{t: 101, fh: &fhNoCounterReset}, + } + }(), + }, } { t.Run(tc.name, func(t *testing.T) { h, _ := newTestHead(t, DefaultBlockDuration, wlog.CompressionNone, false) @@ -6002,10 +6475,21 @@ func TestHeadAppender_AppendCTZeroSample(t *testing.T) { a := h.Appender(context.Background()) lbls := labels.FromStrings("foo", "bar") for _, sample := range tc.appendableSamples { - _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) - require.NoError(t, err) - _, err = a.Append(0, lbls, sample.ts, sample.val) - require.NoError(t, err) + // Append float if it's a float test case + if sample.fSample != 0 { + _, err := a.AppendCTZeroSample(0, lbls, sample.ts, sample.ct) + require.NoError(t, err) + _, err = a.Append(0, lbls, sample.ts, sample.fSample) + require.NoError(t, err) + } + + // Append histograms if it's a histogram test case + if sample.h != nil || sample.fh != nil { + ref, err := a.AppendHistogramCTZeroSample(0, lbls, sample.ts, sample.ct, sample.h, sample.fh) + require.NoError(t, err) + _, err = a.AppendHistogram(ref, lbls, sample.ts, sample.h, sample.fh) + require.NoError(t, err) + } } require.NoError(t, a.Commit()) @@ -6040,3 +6524,60 @@ func (c *countSeriesLifecycleCallback) PostCreation(labels.Labels) { c.crea func (c *countSeriesLifecycleCallback) PostDeletion(s map[chunks.HeadSeriesRef]labels.Labels) { c.deleted.Add(int64(len(s))) } + +// Regression test for data race https://github.com/prometheus/prometheus/issues/15139. +func TestHeadAppendHistogramAndCommitConcurrency(t *testing.T) { + h := tsdbutil.GenerateTestHistogram(1) + fh := tsdbutil.GenerateTestFloatHistogram(1) + + testCases := map[string]func(storage.Appender, int) error{ + "integer histogram": func(app storage.Appender, i int) error { + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar", "serial", strconv.Itoa(i)), 1, h, nil) + return err + }, + "float histogram": func(app storage.Appender, i int) error { + _, err := app.AppendHistogram(0, labels.FromStrings("foo", "bar", "serial", strconv.Itoa(i)), 1, nil, fh) + return err + }, + } + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + testHeadAppendHistogramAndCommitConcurrency(t, tc) + }) + } +} + +func testHeadAppendHistogramAndCommitConcurrency(t *testing.T, appendFn func(storage.Appender, int) error) { + head, _ := newTestHead(t, 1000, wlog.CompressionNone, false) + defer func() { + require.NoError(t, head.Close()) + }() + + wg := sync.WaitGroup{} + wg.Add(2) + + // How this works: Commit() should be atomic, thus one of the commits will + // be first and the other second. The first commit will create a new series + // and write a sample. The second commit will see an exact duplicate sample + // which it should ignore. Unless there's a race that causes the + // memSeries.lastHistogram to be corrupt and fail the duplicate check. + go func() { + defer wg.Done() + for i := 0; i < 10000; i++ { + app := head.Appender(context.Background()) + require.NoError(t, appendFn(app, i)) + require.NoError(t, app.Commit()) + } + }() + + go func() { + defer wg.Done() + for i := 0; i < 10000; i++ { + app := head.Appender(context.Background()) + require.NoError(t, appendFn(app, i)) + require.NoError(t, app.Commit()) + } + }() + + wg.Wait() +} diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index ef96b53305..6744d582ae 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -24,7 +24,6 @@ import ( "sync" "time" - "github.com/go-kit/log/level" "go.uber.org/atomic" "github.com/prometheus/prometheus/model/exemplar" @@ -128,7 +127,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch // replaying the WAL, so lets just log the error if it's not that type. err = h.exemplars.AddExemplar(ms.labels(), exemplar.Exemplar{Ts: e.T, Value: e.V, Labels: e.Labels}) if err != nil && errors.Is(err, storage.ErrOutOfOrderExemplar) { - level.Warn(h.logger).Log("msg", "Unexpected error when replaying WAL on exemplar record", "err", err) + h.logger.Warn("Unexpected error when replaying WAL on exemplar record", "err", err) } } }(exemplarsInput) @@ -421,8 +420,8 @@ Outer: } if unknownRefs.Load()+unknownExemplarRefs.Load()+unknownHistogramRefs.Load()+unknownMetadataRefs.Load() > 0 { - level.Warn(h.logger).Log( - "msg", "Unknown series references", + h.logger.Warn( + "Unknown series references", "samples", unknownRefs.Load(), "exemplars", unknownExemplarRefs.Load(), "histograms", unknownHistogramRefs.Load(), @@ -430,7 +429,7 @@ Outer: ) } if count := mmapOverlappingChunks.Load(); count > 0 { - level.Info(h.logger).Log("msg", "Overlapping m-map chunks on duplicate series records", "count", count) + h.logger.Info("Overlapping m-map chunks on duplicate series records", "count", count) } return nil } @@ -446,8 +445,8 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*m mmc[0].minTime, mmc[len(mmc)-1].maxTime, ) { - level.Debug(h.logger).Log( - "msg", "M-mapped chunks overlap on a duplicate series record", + h.logger.Debug( + "M-mapped chunks overlap on a duplicate series record", "series", mSeries.labels().String(), "oldref", mSeries.ref, "oldmint", mSeries.mmappedChunks[0].minTime, @@ -646,9 +645,9 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp } func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, lastMmapRef chunks.ChunkDiskMapperRef) (err error) { - // Track number of samples, m-map markers, that referenced a series we don't know about + // Track number of samples, histogram samples, m-map markers, that referenced a series we don't know about // for error reporting. - var unknownRefs, mmapMarkerUnknownRefs atomic.Uint64 + var unknownRefs, unknownHistogramRefs, mmapMarkerUnknownRefs atomic.Uint64 lastSeq, lastOff := lastMmapRef.Unpack() // Start workers that each process samples for a partition of the series ID space. @@ -657,21 +656,15 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch concurrency = h.opts.WALReplayConcurrency processors = make([]wblSubsetProcessor, concurrency) - dec = record.NewDecoder(syms) - shards = make([][]record.RefSample, concurrency) + shards = make([][]record.RefSample, concurrency) + histogramShards = make([][]histogramRecord, concurrency) - decodedCh = make(chan interface{}, 10) - decodeErr error - samplesPool = sync.Pool{ - New: func() interface{} { - return []record.RefSample{} - }, - } - markersPool = sync.Pool{ - New: func() interface{} { - return []record.RefMmapMarker{} - }, - } + decodedCh = make(chan interface{}, 10) + decodeErr error + samplesPool zeropool.Pool[[]record.RefSample] + markersPool zeropool.Pool[[]record.RefMmapMarker] + histogramSamplesPool zeropool.Pool[[]record.RefHistogramSample] + floatHistogramSamplesPool zeropool.Pool[[]record.RefFloatHistogramSample] ) defer func() { @@ -692,19 +685,22 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch processors[i].setup() go func(wp *wblSubsetProcessor) { - unknown := wp.processWBLSamples(h) + unknown, unknownHistograms := wp.processWBLSamples(h) unknownRefs.Add(unknown) + unknownHistogramRefs.Add(unknownHistograms) wg.Done() }(&processors[i]) } go func() { defer close(decodedCh) + var err error + dec := record.NewDecoder(syms) for r.Next() { rec := r.Record() switch dec.Type(rec) { case record.Samples: - samples := samplesPool.Get().([]record.RefSample)[:0] + samples := samplesPool.Get()[:0] samples, err = dec.Samples(rec, samples) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -716,7 +712,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } decodedCh <- samples case record.MmapMarkers: - markers := markersPool.Get().([]record.RefMmapMarker)[:0] + markers := markersPool.Get()[:0] markers, err = dec.MmapMarkers(rec, markers) if err != nil { decodeErr = &wlog.CorruptionErr{ @@ -727,6 +723,30 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch return } decodedCh <- markers + case record.HistogramSamples: + hists := histogramSamplesPool.Get()[:0] + hists, err = dec.HistogramSamples(rec, hists) + if err != nil { + decodeErr = &wlog.CorruptionErr{ + Err: fmt.Errorf("decode histograms: %w", err), + Segment: r.Segment(), + Offset: r.Offset(), + } + return + } + decodedCh <- hists + case record.FloatHistogramSamples: + hists := floatHistogramSamplesPool.Get()[:0] + hists, err = dec.FloatHistogramSamples(rec, hists) + if err != nil { + decodeErr = &wlog.CorruptionErr{ + Err: fmt.Errorf("decode float histograms: %w", err), + Segment: r.Segment(), + Offset: r.Offset(), + } + return + } + decodedCh <- hists default: // Noop. } @@ -767,7 +787,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } samples = samples[m:] } - samplesPool.Put(d) + samplesPool.Put(v) case []record.RefMmapMarker: markers := v for _, rm := range markers { @@ -791,6 +811,70 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch idx := uint64(ms.ref) % uint64(concurrency) processors[idx].input <- wblSubsetProcessorInputItem{mmappedSeries: ms} } + case []record.RefHistogramSample: + samples := v + // We split up the samples into chunks of 5000 samples or less. + // With O(300 * #cores) in-flight sample batches, large scrapes could otherwise + // cause thousands of very large in flight buffers occupying large amounts + // of unused memory. + for len(samples) > 0 { + m := 5000 + if len(samples) < m { + m = len(samples) + } + for i := 0; i < concurrency; i++ { + if histogramShards[i] == nil { + histogramShards[i] = processors[i].reuseHistogramBuf() + } + } + for _, sam := range samples[:m] { + if r, ok := multiRef[sam.Ref]; ok { + sam.Ref = r + } + mod := uint64(sam.Ref) % uint64(concurrency) + histogramShards[mod] = append(histogramShards[mod], histogramRecord{ref: sam.Ref, t: sam.T, h: sam.H}) + } + for i := 0; i < concurrency; i++ { + if len(histogramShards[i]) > 0 { + processors[i].input <- wblSubsetProcessorInputItem{histogramSamples: histogramShards[i]} + histogramShards[i] = nil + } + } + samples = samples[m:] + } + histogramSamplesPool.Put(v) + case []record.RefFloatHistogramSample: + samples := v + // We split up the samples into chunks of 5000 samples or less. + // With O(300 * #cores) in-flight sample batches, large scrapes could otherwise + // cause thousands of very large in flight buffers occupying large amounts + // of unused memory. + for len(samples) > 0 { + m := 5000 + if len(samples) < m { + m = len(samples) + } + for i := 0; i < concurrency; i++ { + if histogramShards[i] == nil { + histogramShards[i] = processors[i].reuseHistogramBuf() + } + } + for _, sam := range samples[:m] { + if r, ok := multiRef[sam.Ref]; ok { + sam.Ref = r + } + mod := uint64(sam.Ref) % uint64(concurrency) + histogramShards[mod] = append(histogramShards[mod], histogramRecord{ref: sam.Ref, t: sam.T, fh: sam.FH}) + } + for i := 0; i < concurrency; i++ { + if len(histogramShards[i]) > 0 { + processors[i].input <- wblSubsetProcessorInputItem{histogramSamples: histogramShards[i]} + histogramShards[i] = nil + } + } + samples = samples[m:] + } + floatHistogramSamplesPool.Put(v) default: panic(fmt.Errorf("unexpected decodedCh type: %T", d)) } @@ -811,7 +895,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch } if unknownRefs.Load() > 0 || mmapMarkerUnknownRefs.Load() > 0 { - level.Warn(h.logger).Log("msg", "Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load()) + h.logger.Warn("Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load()) } return nil } @@ -833,17 +917,20 @@ func (e errLoadWbl) Unwrap() error { } type wblSubsetProcessor struct { - input chan wblSubsetProcessorInputItem - output chan []record.RefSample + input chan wblSubsetProcessorInputItem + output chan []record.RefSample + histogramsOutput chan []histogramRecord } type wblSubsetProcessorInputItem struct { - mmappedSeries *memSeries - samples []record.RefSample + mmappedSeries *memSeries + samples []record.RefSample + histogramSamples []histogramRecord } func (wp *wblSubsetProcessor) setup() { wp.output = make(chan []record.RefSample, 300) + wp.histogramsOutput = make(chan []histogramRecord, 300) wp.input = make(chan wblSubsetProcessorInputItem, 300) } @@ -851,6 +938,8 @@ func (wp *wblSubsetProcessor) closeAndDrain() { close(wp.input) for range wp.output { } + for range wp.histogramsOutput { + } } // If there is a buffer in the output chan, return it for reuse, otherwise return nil. @@ -863,10 +952,21 @@ func (wp *wblSubsetProcessor) reuseBuf() []record.RefSample { return nil } +// If there is a buffer in the output chan, return it for reuse, otherwise return nil. +func (wp *wblSubsetProcessor) reuseHistogramBuf() []histogramRecord { + select { + case buf := <-wp.histogramsOutput: + return buf[:0] + default: + } + return nil +} + // processWBLSamples adds the samples it receives to the head and passes // the buffer received to an output channel for reuse. -func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs uint64) { +func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs, unknownHistogramRefs uint64) { defer close(wp.output) + defer close(wp.histogramsOutput) oooCapMax := h.opts.OutOfOrderCapMax.Load() // We don't check for minValidTime for ooo samples. @@ -905,11 +1005,41 @@ func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs uint64) { case wp.output <- in.samples: default: } + for _, s := range in.histogramSamples { + ms := h.series.getByID(s.ref) + if ms == nil { + unknownHistogramRefs++ + continue + } + var chunkCreated bool + var ok bool + if s.h != nil { + ok, chunkCreated, _ = ms.insert(s.t, 0, s.h, nil, h.chunkDiskMapper, oooCapMax, h.logger) + } else { + ok, chunkCreated, _ = ms.insert(s.t, 0, nil, s.fh, h.chunkDiskMapper, oooCapMax, h.logger) + } + if chunkCreated { + h.metrics.chunksCreated.Inc() + h.metrics.chunks.Inc() + } + if ok { + if s.t > maxt { + maxt = s.t + } + if s.t < mint { + mint = s.t + } + } + } + select { + case wp.histogramsOutput <- in.histogramSamples: + default: + } } h.updateMinOOOMaxOOOTime(mint, maxt) - return unknownRefs + return unknownRefs, unknownHistogramRefs } const ( @@ -1066,7 +1196,7 @@ const chunkSnapshotPrefix = "chunk_snapshot." func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { if h.wal == nil { // If we are not storing any WAL, does not make sense to take a snapshot too. - level.Warn(h.logger).Log("msg", "skipping chunk snapshotting as WAL is disabled") + h.logger.Warn("skipping chunk snapshotting as WAL is disabled") return &ChunkSnapshotStats{}, nil } h.chunkSnapshotMtx.Lock() @@ -1215,7 +1345,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { // Leftover old chunk snapshots do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher chunk snapshot exists. - level.Error(h.logger).Log("msg", "delete old chunk snapshots", "err", err) + h.logger.Error("delete old chunk snapshots", "err", err) } return stats, nil } @@ -1225,12 +1355,12 @@ func chunkSnapshotDir(wlast, woffset int) string { } func (h *Head) performChunkSnapshot() error { - level.Info(h.logger).Log("msg", "creating chunk snapshot") + h.logger.Info("creating chunk snapshot") startTime := time.Now() stats, err := h.ChunkSnapshot() elapsed := time.Since(startTime) if err == nil { - level.Info(h.logger).Log("msg", "chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) + h.logger.Info("chunk snapshot complete", "duration", elapsed.String(), "num_series", stats.TotalSeries, "dir", stats.Dir) } if err != nil { return fmt.Errorf("chunk snapshot: %w", err) @@ -1345,7 +1475,7 @@ func (h *Head) loadChunkSnapshot() (int, int, map[chunks.HeadSeriesRef]*memSerie } defer func() { if err := sr.Close(); err != nil { - level.Warn(h.logger).Log("msg", "error while closing the wal segments reader", "err", err) + h.logger.Warn("error while closing the wal segments reader", "err", err) } }() @@ -1534,9 +1664,9 @@ Outer: } elapsed := time.Since(start) - level.Info(h.logger).Log("msg", "chunk snapshot loaded", "dir", dir, "num_series", numSeries, "duration", elapsed.String()) + h.logger.Info("chunk snapshot loaded", "dir", dir, "num_series", numSeries, "duration", elapsed.String()) if unknownRefs > 0 { - level.Warn(h.logger).Log("msg", "unknown series references during chunk snapshot replay", "count", unknownRefs) + h.logger.Warn("unknown series references during chunk snapshot replay", "count", unknownRefs) } return snapIdx, snapOffset, refSeries, nil diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 0e0e353719..911b1a6ecc 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -43,10 +43,12 @@ const ( // HeaderLen represents number of bytes reserved of index for header. HeaderLen = 5 - // FormatV1 represents 1 version of index. + // FormatV1 represents version 1 of index. FormatV1 = 1 - // FormatV2 represents 2 version of index. + // FormatV2 represents version 2 of index. FormatV2 = 2 + // FormatV3 represents version 3 of index. + FormatV3 = 3 indexFilename = "index" @@ -108,14 +110,10 @@ func newCRC32() hash.Hash32 { return crc32.New(castagnoliTable) } -type symbolCacheEntry struct { - index uint32 - lastValueIndex uint32 - lastValue string -} - type PostingsEncoder func(*encoding.Encbuf, []uint32) error +type PostingsDecoder func(encoding.Decbuf) (int, Postings, error) + // Writer implements the IndexWriter interface for the standard // serialization format. type Writer struct { @@ -142,7 +140,7 @@ type Writer struct { symbols *Symbols symbolFile *fileutil.MmapFile lastSymbol string - symbolCache map[string]symbolCacheEntry + symbolCache map[string]uint32 // From symbol to index in table. labelIndexes []labelIndexHashEntry // Label index offsets. labelNames map[string]uint64 // Label names, and their usage. @@ -242,7 +240,7 @@ func NewWriterWithEncoder(ctx context.Context, fn string, encoder PostingsEncode buf1: encoding.Encbuf{B: make([]byte, 0, 1<<22)}, buf2: encoding.Encbuf{B: make([]byte, 0, 1<<22)}, - symbolCache: make(map[string]symbolCacheEntry, 1<<8), + symbolCache: make(map[string]uint32, 1<<16), labelNames: make(map[string]uint64, 1<<8), crc32: newCRC32(), postingsEncoder: encoder, @@ -436,7 +434,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... return err } if labels.Compare(lset, w.lastSeries) <= 0 { - return fmt.Errorf("out-of-order series added with label set %q", lset) + return fmt.Errorf("out-of-order series added with label set %q, last label set %q", lset, w.lastSeries) } if ref < w.lastSeriesRef && !w.lastSeries.IsEmpty() { @@ -474,29 +472,16 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... w.buf2.PutUvarint(lset.Len()) if err := lset.Validate(func(l labels.Label) error { - var err error - cacheEntry, ok := w.symbolCache[l.Name] - nameIndex := cacheEntry.index + nameIndex, ok := w.symbolCache[l.Name] if !ok { - nameIndex, err = w.symbols.ReverseLookup(l.Name) - if err != nil { - return fmt.Errorf("symbol entry for %q does not exist, %w", l.Name, err) - } + return fmt.Errorf("symbol entry for %q does not exist", l.Name) } w.labelNames[l.Name]++ w.buf2.PutUvarint32(nameIndex) - valueIndex := cacheEntry.lastValueIndex - if !ok || cacheEntry.lastValue != l.Value { - valueIndex, err = w.symbols.ReverseLookup(l.Value) - if err != nil { - return fmt.Errorf("symbol entry for %q does not exist, %w", l.Value, err) - } - w.symbolCache[l.Name] = symbolCacheEntry{ - index: nameIndex, - lastValueIndex: valueIndex, - lastValue: l.Value, - } + valueIndex, ok := w.symbolCache[l.Value] + if !ok { + return fmt.Errorf("symbol entry for %q does not exist", l.Value) } w.buf2.PutUvarint32(valueIndex) return nil @@ -555,6 +540,7 @@ func (w *Writer) AddSymbol(sym string) error { return fmt.Errorf("symbol %q out-of-order", sym) } w.lastSymbol = sym + w.symbolCache[sym] = uint32(w.numSymbols) w.numSymbols++ w.buf1.Reset() w.buf1.PutUvarintStr(sym) @@ -624,10 +610,10 @@ func (w *Writer) writeLabelIndices() error { values := []uint32{} for d.Err() == nil && cnt > 0 { cnt-- - d.Uvarint() // Keycount. - name := d.UvarintBytes() // Label name. - value := yoloString(d.UvarintBytes()) // Label value. - d.Uvarint64() // Offset. + d.Uvarint() // Keycount. + name := d.UvarintBytes() // Label name. + value := d.UvarintBytes() // Label value. + d.Uvarint64() // Offset. if len(name) == 0 { continue // All index is ignored. } @@ -640,9 +626,9 @@ func (w *Writer) writeLabelIndices() error { values = values[:0] } current = name - sid, err := w.symbols.ReverseLookup(value) - if err != nil { - return err + sid, ok := w.symbolCache[string(value)] + if !ok { + return fmt.Errorf("symbol entry for %q does not exist", string(value)) } values = append(values, sid) } @@ -914,9 +900,9 @@ func (w *Writer) writePostingsToTmpFiles() error { nameSymbols := map[uint32]string{} for _, name := range batchNames { - sid, err := w.symbols.ReverseLookup(name) - if err != nil { - return err + sid, ok := w.symbolCache[name] + if !ok { + return fmt.Errorf("symbol entry for %q does not exist", name) } nameSymbols[sid] = name } @@ -953,9 +939,9 @@ func (w *Writer) writePostingsToTmpFiles() error { for _, name := range batchNames { // Write out postings for this label name. - sid, err := w.symbols.ReverseLookup(name) - if err != nil { - return err + sid, ok := w.symbolCache[name] + if !ok { + return fmt.Errorf("symbol entry for %q does not exist", name) } values := make([]uint32, 0, len(postings[sid])) for v := range postings[sid] { @@ -1155,17 +1141,17 @@ func (b realByteSlice) Sub(start, end int) ByteSlice { // NewReader returns a new index reader on the given byte slice. It automatically // handles different format versions. -func NewReader(b ByteSlice) (*Reader, error) { - return newReader(b, io.NopCloser(nil)) +func NewReader(b ByteSlice, decoder PostingsDecoder) (*Reader, error) { + return newReader(b, io.NopCloser(nil), decoder) } // NewFileReader returns a new index reader against the given index file. -func NewFileReader(path string) (*Reader, error) { +func NewFileReader(path string, decoder PostingsDecoder) (*Reader, error) { f, err := fileutil.OpenMmapFile(path) if err != nil { return nil, err } - r, err := newReader(realByteSlice(f.Bytes()), f) + r, err := newReader(realByteSlice(f.Bytes()), f, decoder) if err != nil { return nil, tsdb_errors.NewMulti( err, @@ -1176,7 +1162,7 @@ func NewFileReader(path string) (*Reader, error) { return r, nil } -func newReader(b ByteSlice, c io.Closer) (*Reader, error) { +func newReader(b ByteSlice, c io.Closer, postingsDecoder PostingsDecoder) (*Reader, error) { r := &Reader{ b: b, c: c, @@ -1193,7 +1179,9 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { } r.version = int(r.b.Range(4, 5)[0]) - if r.version != FormatV1 && r.version != FormatV2 { + switch r.version { + case FormatV1, FormatV2, FormatV3: + default: return nil, fmt.Errorf("unknown index file version %d", r.version) } @@ -1273,7 +1261,7 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { r.nameSymbols[off] = k } - r.dec = &Decoder{LookupSymbol: r.lookupSymbol} + r.dec = &Decoder{LookupSymbol: r.lookupSymbol, DecodePostings: postingsDecoder} return r, nil } @@ -1351,7 +1339,9 @@ func (s Symbols) Lookup(o uint32) (string, error) { B: s.bs.Range(0, s.bs.Len()), } - if s.version == FormatV2 { + if s.version == FormatV1 { + d.Skip(int(o)) + } else { if int(o) >= s.seen { return "", fmt.Errorf("unknown symbol offset %d", o) } @@ -1360,8 +1350,6 @@ func (s Symbols) Lookup(o uint32) (string, error) { for i := o - (o / symbolFactor * symbolFactor); i > 0; i-- { d.UvarintBytes() } - } else { - d.Skip(int(o)) } sym := d.UvarintStr() if d.Err() != nil { @@ -1407,10 +1395,10 @@ func (s Symbols) ReverseLookup(sym string) (uint32, error) { if lastSymbol != sym { return 0, fmt.Errorf("unknown symbol %q", sym) } - if s.version == FormatV2 { - return uint32(res), nil + if s.version == FormatV1 { + return uint32(s.bs.Len() - lastLen), nil } - return uint32(s.bs.Len() - lastLen), nil + return uint32(res), nil } func (s Symbols) Size() int { @@ -1569,7 +1557,7 @@ func (r *Reader) LabelNamesFor(ctx context.Context, postings Postings) ([]string offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } @@ -1608,7 +1596,7 @@ func (r *Reader) LabelValueFor(ctx context.Context, id storage.SeriesRef, label offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable) @@ -1634,7 +1622,7 @@ func (r *Reader) Series(id storage.SeriesRef, builder *labels.ScratchBuilder, ch offset := id // In version 2 series IDs are no longer exact references but series are 16-byte padded // and the ID is the multiple of 16 of the actual position. - if r.version == FormatV2 { + if r.version != FormatV1 { offset = id * seriesByteAlign } d := encoding.NewDecbufUvarintAt(r.b, int(offset), castagnoliTable) @@ -1702,7 +1690,7 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P } // Read from the postings table. d := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) - _, p, err := r.dec.Postings(d.Get()) + _, p, err := r.dec.DecodePostings(d) if err != nil { return nil, fmt.Errorf("decode postings: %w", err) } @@ -1745,7 +1733,7 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P if val == value { // Read from the postings table. d2 := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) - _, p, err := r.dec.Postings(d2.Get()) + _, p, err := r.dec.DecodePostings(d2) if err != nil { return false, fmt.Errorf("decode postings: %w", err) } @@ -1771,6 +1759,15 @@ func (r *Reader) Postings(ctx context.Context, name string, values ...string) (P } func (r *Reader) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { + return r.postingsForLabelMatching(ctx, name, match) +} + +func (r *Reader) PostingsForAllLabelValues(ctx context.Context, name string) Postings { + return r.postingsForLabelMatching(ctx, name, nil) +} + +// postingsForLabelMatching implements PostingsForLabelMatching if match is non-nil, and PostingsForAllLabelValues otherwise. +func (r *Reader) postingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { if r.version == FormatV1 { return r.postingsForLabelMatchingV1(ctx, name, match) } @@ -1780,13 +1777,19 @@ func (r *Reader) PostingsForLabelMatching(ctx context.Context, name string, matc return EmptyPostings() } + postingsEstimate := 0 + if match == nil { + // The caller wants all postings for name. + postingsEstimate = len(e) * symbolFactor + } + lastVal := e[len(e)-1].value - var its []Postings + its := make([]Postings, 0, postingsEstimate) if err := r.traversePostingOffsets(ctx, e[0].off, func(val string, postingsOff uint64) (bool, error) { - if match(val) { - // We want this postings iterator since the value is a match + if match == nil || match(val) { + // We want this postings iterator since the value is a match. postingsDec := encoding.NewDecbufAt(r.b, int(postingsOff), castagnoliTable) - _, p, err := r.dec.PostingsFromDecbuf(postingsDec) + _, p, err := r.dec.DecodePostings(postingsDec) if err != nil { return false, fmt.Errorf("decode postings: %w", err) } @@ -1813,13 +1816,13 @@ func (r *Reader) postingsForLabelMatchingV1(ctx context.Context, name string, ma return ErrPostings(ctx.Err()) } count++ - if !match(val) { + if match != nil && !match(val) { continue } // Read from the postings table. d := encoding.NewDecbufAt(r.b, int(offset), castagnoliTable) - _, p, err := r.dec.PostingsFromDecbuf(d) + _, p, err := r.dec.DecodePostings(d) if err != nil { return ErrPostings(fmt.Errorf("decode postings: %w", err)) } @@ -1914,17 +1917,12 @@ func (s stringListIter) Err() error { return nil } // It currently does not contain decoding methods for all entry types but can be extended // by them if there's demand. type Decoder struct { - LookupSymbol func(context.Context, uint32) (string, error) + LookupSymbol func(context.Context, uint32) (string, error) + DecodePostings PostingsDecoder } -// Postings returns a postings list for b and its number of elements. -func (dec *Decoder) Postings(b []byte) (int, Postings, error) { - d := encoding.Decbuf{B: b} - return dec.PostingsFromDecbuf(d) -} - -// PostingsFromDecbuf returns a postings list for d and its number of elements. -func (dec *Decoder) PostingsFromDecbuf(d encoding.Decbuf) (int, Postings, error) { +// DecodePostingsRaw returns a postings list for d and its number of elements. +func DecodePostingsRaw(d encoding.Decbuf) (int, Postings, error) { n := d.Be32int() l := d.Get() if d.Err() != nil { @@ -2063,5 +2061,5 @@ func (dec *Decoder) Series(b []byte, builder *labels.ScratchBuilder, chks *[]chu } func yoloString(b []byte) string { - return *((*string)(unsafe.Pointer(&b))) + return unsafe.String(unsafe.SliceData(b), len(b)) } diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index d81dd8696c..ee186c1d95 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -146,7 +146,7 @@ func TestIndexRW_Create_Open(t *testing.T) { require.NoError(t, err) require.NoError(t, iw.Close()) - ir, err := NewFileReader(fn) + ir, err := NewFileReader(fn, DecodePostingsRaw) require.NoError(t, err) require.NoError(t, ir.Close()) @@ -157,7 +157,7 @@ func TestIndexRW_Create_Open(t *testing.T) { require.NoError(t, err) f.Close() - _, err = NewFileReader(dir) + _, err = NewFileReader(dir, DecodePostingsRaw) require.Error(t, err) } @@ -218,7 +218,7 @@ func TestIndexRW_Postings(t *testing.T) { }, labelIndices) t.Run("ShardedPostings()", func(t *testing.T) { - ir, err := NewFileReader(fn) + ir, err := NewFileReader(fn, DecodePostingsRaw) require.NoError(t, err) t.Cleanup(func() { require.NoError(t, ir.Close()) @@ -331,7 +331,7 @@ func TestPostingsMany(t *testing.T) { exp = append(exp, e) } } - require.Equal(t, exp, got, fmt.Sprintf("input: %v", c.in)) + require.Equalf(t, exp, got, "input: %v", c.in) } } @@ -469,7 +469,7 @@ func TestDecbufUvarintWithInvalidBuffer(t *testing.T) { func TestReaderWithInvalidBuffer(t *testing.T) { b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81}) - _, err := NewReader(b) + _, err := NewReader(b, DecodePostingsRaw) require.Error(t, err) } @@ -481,7 +481,7 @@ func TestNewFileReaderErrorNoOpenFiles(t *testing.T) { err := os.WriteFile(idxName, []byte("corrupted contents"), 0o666) require.NoError(t, err) - _, err = NewFileReader(idxName) + _, err = NewFileReader(idxName, DecodePostingsRaw) require.Error(t, err) // dir.Close will fail on Win if idxName fd is not closed on error path. @@ -560,7 +560,8 @@ func BenchmarkReader_ShardedPostings(b *testing.B) { } func TestDecoder_Postings_WrongInput(t *testing.T) { - _, _, err := (&Decoder{}).Postings([]byte("the cake is a lie")) + d := encoding.Decbuf{B: []byte("the cake is a lie")} + _, _, err := (&Decoder{DecodePostings: DecodePostingsRaw}).DecodePostings(d) require.Error(t, err) } @@ -612,6 +613,52 @@ func TestChunksTimeOrdering(t *testing.T) { require.NoError(t, idx.Close()) } +func TestReader_PostingsForLabelMatching(t *testing.T) { + const seriesCount = 9 + var input indexWriterSeriesSlice + for i := 1; i <= seriesCount; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("__name__", strconv.Itoa(i)), + chunks: []chunks.Meta{ + {Ref: 1, MinTime: 0, MaxTime: 10}, + }, + }) + } + ir, _, _ := createFileReader(context.Background(), t, input) + + p := ir.PostingsForLabelMatching(context.Background(), "__name__", func(v string) bool { + iv, err := strconv.Atoi(v) + if err != nil { + panic(err) + } + return iv%2 == 0 + }) + require.NoError(t, p.Err()) + refs, err := ExpandPostings(p) + require.NoError(t, err) + require.Equal(t, []storage.SeriesRef{4, 6, 8, 10}, refs) +} + +func TestReader_PostingsForAllLabelValues(t *testing.T) { + const seriesCount = 9 + var input indexWriterSeriesSlice + for i := 1; i <= seriesCount; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("__name__", strconv.Itoa(i)), + chunks: []chunks.Meta{ + {Ref: 1, MinTime: 0, MaxTime: 10}, + }, + }) + } + ir, _, _ := createFileReader(context.Background(), t, input) + + p := ir.PostingsForAllLabelValues(context.Background(), "__name__") + require.NoError(t, p.Err()) + refs, err := ExpandPostings(p) + require.NoError(t, err) + require.Equal(t, []storage.SeriesRef{3, 4, 5, 6, 7, 8, 9, 10, 11}, refs) +} + func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { const seriesCount = 1000 var input indexWriterSeriesSlice @@ -690,7 +737,7 @@ func createFileReader(ctx context.Context, tb testing.TB, input indexWriterSerie } require.NoError(tb, iw.Close()) - ir, err := NewFileReader(fn) + ir, err := NewFileReader(fn, DecodePostingsRaw) require.NoError(tb, err) tb.Cleanup(func() { require.NoError(tb, ir.Close()) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index bfe74c323d..ea32ba5632 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -18,12 +18,14 @@ import ( "context" "encoding/binary" "fmt" + "maps" "math" "runtime" "slices" "sort" "strings" "sync" + "time" "github.com/bboreham/go-loser" @@ -31,6 +33,8 @@ import ( "github.com/prometheus/prometheus/storage" ) +const exponentialSliceGrowthFactor = 2 + var allPostingsKey = labels.Label{} // AllPostingsKey returns the label key that is used to store the postings list of all existing IDs. @@ -54,15 +58,33 @@ var ensureOrderBatchPool = sync.Pool{ // EnsureOrder() must be called once before any reads are done. This allows for quick // unordered batch fills on startup. type MemPostings struct { - mtx sync.RWMutex - m map[string]map[string][]storage.SeriesRef + mtx sync.RWMutex + + // m holds the postings lists for each label-value pair, indexed first by label name, and then by label value. + // + // mtx must be held when interacting with m (the appropriate one for reading or writing). + // It is safe to retain a reference to a postings list after releasing the lock. + // + // BUG: There's currently a data race in addFor, which might modify the tail of the postings list: + // https://github.com/prometheus/prometheus/issues/15317 + m map[string]map[string][]storage.SeriesRef + + // lvs holds the label values for each label name. + // lvs[name] is essentially an unsorted append-only list of all keys in m[name] + // mtx must be held when interacting with lvs. + // Since it's append-only, it is safe to the label values slice after releasing the lock. + lvs map[string][]string + ordered bool } +const defaultLabelNamesMapSize = 512 + // NewMemPostings returns a memPostings that's ready for reads and writes. func NewMemPostings() *MemPostings { return &MemPostings{ - m: make(map[string]map[string][]storage.SeriesRef, 512), + m: make(map[string]map[string][]storage.SeriesRef, defaultLabelNamesMapSize), + lvs: make(map[string][]string, defaultLabelNamesMapSize), ordered: true, } } @@ -71,7 +93,8 @@ func NewMemPostings() *MemPostings { // until EnsureOrder() was called once. func NewUnorderedMemPostings() *MemPostings { return &MemPostings{ - m: make(map[string]map[string][]storage.SeriesRef, 512), + m: make(map[string]map[string][]storage.SeriesRef, defaultLabelNamesMapSize), + lvs: make(map[string][]string, defaultLabelNamesMapSize), ordered: false, } } @@ -79,16 +102,19 @@ func NewUnorderedMemPostings() *MemPostings { // Symbols returns an iterator over all unique name and value strings, in order. func (p *MemPostings) Symbols() StringIter { p.mtx.RLock() + // Make a quick clone of the map to avoid holding the lock while iterating. + // It's safe to use the values of the map after releasing the lock, as they're append-only slices. + lvs := maps.Clone(p.lvs) + p.mtx.RUnlock() // Add all the strings to a map to de-duplicate. - symbols := make(map[string]struct{}, 512) - for n, e := range p.m { + symbols := make(map[string]struct{}, defaultLabelNamesMapSize) + for n, labelValues := range lvs { symbols[n] = struct{}{} - for v := range e { + for _, v := range labelValues { symbols[v] = struct{}{} } } - p.mtx.RUnlock() res := make([]string, 0, len(symbols)) for k := range symbols { @@ -144,13 +170,14 @@ func (p *MemPostings) LabelNames() []string { // LabelValues returns label values for the given name. func (p *MemPostings) LabelValues(_ context.Context, name string) []string { p.mtx.RLock() - defer p.mtx.RUnlock() + values := p.lvs[name] + p.mtx.RUnlock() - values := make([]string, 0, len(p.m[name])) - for v := range p.m[name] { - values = append(values, v) - } - return values + // The slice from p.lvs[name] is shared between all readers, and it is append-only. + // Since it's shared, we need to make a copy of it before returning it to make + // sure that no caller modifies the original one by sorting it or filtering it. + // Since it's append-only, we can do this while not holding the mutex anymore. + return slices.Clone(values) } // PostingsStats contains cardinality based statistics for postings. @@ -293,6 +320,7 @@ func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected ma p.mtx.Lock() defer p.mtx.Unlock() + affectedLabelNames := map[string]struct{}{} process := func(l labels.Label) { orig := p.m[l.Name][l.Value] repl := make([]storage.SeriesRef, 0, len(orig)) @@ -305,17 +333,66 @@ func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}, affected ma p.m[l.Name][l.Value] = repl } else { delete(p.m[l.Name], l.Value) - // Delete the key if we removed all values. - if len(p.m[l.Name]) == 0 { - delete(p.m, l.Name) - } + affectedLabelNames[l.Name] = struct{}{} } } + i := 0 for l := range affected { + i++ process(l) + + // From time to time we want some readers to go through and read their postings. + // It takes around 50ms to process a 1K series batch, and 120ms to process a 10K series batch (local benchmarks on an M3). + // Note that a read query will most likely want to read multiple postings lists, say 5, 10 or 20 (depending on the number of matchers) + // And that read query will most likely evaluate only one of those matchers before we unpause here, so we want to pause often. + if i%512 == 0 { + p.unlockWaitAndLockAgain() + } } process(allPostingsKey) + + // Now we need to update the label values slices. + i = 0 + for name := range affectedLabelNames { + i++ + // From time to time we want some readers to go through and read their postings. + if i%512 == 0 { + p.unlockWaitAndLockAgain() + } + + if len(p.m[name]) == 0 { + // Delete the label name key if we deleted all values. + delete(p.m, name) + delete(p.lvs, name) + continue + } + + // Create the new slice with enough room to grow without reallocating. + // We have deleted values here, so there's definitely some churn, so be prepared for it. + lvs := make([]string, 0, exponentialSliceGrowthFactor*len(p.m[name])) + for v := range p.m[name] { + lvs = append(lvs, v) + } + p.lvs[name] = lvs + } +} + +// unlockWaitAndLockAgain will unlock an already locked p.mtx.Lock() and then wait a little bit before locking it again, +// letting the RLock()-waiting goroutines to get the lock. +func (p *MemPostings) unlockWaitAndLockAgain() { + p.mtx.Unlock() + // While it's tempting to just do a `time.Sleep(time.Millisecond)` here, + // it wouldn't ensure use that readers actually were able to get the read lock, + // because if there are writes waiting on same mutex, readers won't be able to get it. + // So we just grab one RLock ourselves. + p.mtx.RLock() + // We shouldn't wait here, because we would be blocking a potential write for no reason. + // Note that if there's a writer waiting for us to unlock, no reader will be able to get the read lock. + p.mtx.RUnlock() //nolint:staticcheck // SA2001: this is an intentionally empty critical section. + // Now we can wait a little bit just to increase the chance of a reader getting the lock. + time.Sleep(time.Millisecond) + p.mtx.Lock() } // Iter calls f for each postings list. It aborts if f returns an error and returns it. @@ -345,13 +422,26 @@ func (p *MemPostings) Add(id storage.SeriesRef, lset labels.Labels) { p.mtx.Unlock() } +func appendWithExponentialGrowth[T any](a []T, v T) []T { + if cap(a) < len(a)+1 { + newList := make([]T, len(a), len(a)*exponentialSliceGrowthFactor+1) + copy(newList, a) + a = newList + } + return append(a, v) +} + func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { nm, ok := p.m[l.Name] if !ok { nm = map[string][]storage.SeriesRef{} p.m[l.Name] = nm } - list := append(nm[l.Value], id) + vm, ok := nm[l.Value] + if !ok { + p.lvs[l.Name] = appendWithExponentialGrowth(p.lvs[l.Name], l.Value) + } + list := appendWithExponentialGrowth(vm, id) nm[l.Value] = list if !p.ordered { @@ -370,25 +460,27 @@ func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) { } func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, match func(string) bool) Postings { - // We'll copy the values into a slice and then match over that, + // We'll take the label values slice and then match over that, // this way we don't need to hold the mutex while we're matching, // which can be slow (seconds) if the match function is a huge regex. // Holding this lock prevents new series from being added (slows down the write path) // and blocks the compaction process. - vals := p.labelValues(name) - for i, count := 0, 1; i < len(vals); count++ { - if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + // + // We just need to make sure we don't modify the slice we took, + // so we'll append matching values to a different one. + p.mtx.RLock() + readOnlyLabelValues := p.lvs[name] + p.mtx.RUnlock() + + vals := make([]string, 0, len(readOnlyLabelValues)) + for i, v := range readOnlyLabelValues { + if i%checkContextEveryNIterations == 0 && ctx.Err() != nil { return ErrPostings(ctx.Err()) } - if match(vals[i]) { - i++ - continue + if match(v) { + vals = append(vals, v) } - - // Didn't match, bring the last value to this position, make the slice shorter and check again. - // The order of the slice doesn't matter as it comes from a map iteration. - vals[i], vals = vals[len(vals)-1], vals[:len(vals)-1] } // If none matched (or this label had no values), no need to grab the lock again. @@ -398,15 +490,17 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, // Now `vals` only contains the values that matched, get their postings. its := make([]Postings, 0, len(vals)) + lps := make([]ListPostings, len(vals)) p.mtx.RLock() e := p.m[name] - for _, v := range vals { + for i, v := range vals { if refs, ok := e[v]; ok { // Some of the values may have been garbage-collected in the meantime this is fine, we'll just skip them. // If we didn't let the mutex go, we'd have these postings here, but they would be pointing nowhere // because there would be a `MemPostings.Delete()` call waiting for the lock to delete these labels, // because the series were deleted already. - its = append(its, NewListPostings(refs)) + lps[i] = ListPostings{list: refs} + its = append(its, &lps[i]) } } // Let the mutex go before merging. @@ -415,25 +509,24 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, return Merge(ctx, its...) } -// labelValues returns a slice of label values for the given label name. -// It will take the read lock. -func (p *MemPostings) labelValues(name string) []string { +func (p *MemPostings) PostingsForAllLabelValues(ctx context.Context, name string) Postings { p.mtx.RLock() - defer p.mtx.RUnlock() e := p.m[name] - if len(e) == 0 { - return nil - } - - vals := make([]string, 0, len(e)) - for v, srs := range e { - if len(srs) > 0 { - vals = append(vals, v) + its := make([]Postings, 0, len(e)) + lps := make([]ListPostings, len(e)) + i := 0 + for _, refs := range e { + if len(refs) > 0 { + lps[i] = ListPostings{list: refs} + its = append(its, &lps[i]) } + i++ } - return vals + // Let the mutex go before merging. + p.mtx.RUnlock() + return Merge(ctx, its...) } // ExpandPostings returns the postings expanded as a slice. diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 96c9ed124b..6ff5b9c060 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -1460,6 +1460,21 @@ func TestMemPostings_PostingsForLabelMatching(t *testing.T) { require.Equal(t, []storage.SeriesRef{2, 4}, refs) } +func TestMemPostings_PostingsForAllLabelValues(t *testing.T) { + mp := NewMemPostings() + mp.Add(1, labels.FromStrings("foo", "1")) + mp.Add(2, labels.FromStrings("foo", "2")) + mp.Add(3, labels.FromStrings("foo", "3")) + mp.Add(4, labels.FromStrings("foo", "4")) + + p := mp.PostingsForAllLabelValues(context.Background(), "foo") + require.NoError(t, p.Err()) + refs, err := ExpandPostings(p) + require.NoError(t, err) + // All postings for the label should be returned. + require.Equal(t, []storage.SeriesRef{1, 2, 3, 4}, refs) +} + func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { memP := NewMemPostings() seriesCount := 10 * checkContextEveryNIterations diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go index 7b58ec566f..2a1a44d18e 100644 --- a/tsdb/ooo_head_read.go +++ b/tsdb/ooo_head_read.go @@ -35,6 +35,7 @@ var _ IndexReader = &HeadAndOOOIndexReader{} type HeadAndOOOIndexReader struct { *headIndexReader // A reference to the headIndexReader so we can reuse as many interface implementation as possible. + inoMint int64 lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef } @@ -49,13 +50,13 @@ func (o mergedOOOChunks) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator return storage.ChainSampleIteratorFromIterables(iterator, o.chunkIterables) } -func NewHeadAndOOOIndexReader(head *Head, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader { +func NewHeadAndOOOIndexReader(head *Head, inoMint, mint, maxt int64, lastGarbageCollectedMmapRef chunks.ChunkDiskMapperRef) *HeadAndOOOIndexReader { hr := &headIndexReader{ head: head, mint: mint, maxt: maxt, } - return &HeadAndOOOIndexReader{hr, lastGarbageCollectedMmapRef} + return &HeadAndOOOIndexReader{hr, inoMint, lastGarbageCollectedMmapRef} } func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.ScratchBuilder, chks *[]chunks.Meta) error { @@ -76,9 +77,9 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S *chks = (*chks)[:0] if s.ooo != nil { - return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, chks) + return getOOOSeriesChunks(s, oh.mint, oh.maxt, oh.lastGarbageCollectedMmapRef, 0, true, oh.inoMint, chks) } - *chks = appendSeriesChunks(s, oh.mint, oh.maxt, *chks) + *chks = appendSeriesChunks(s, oh.inoMint, oh.maxt, *chks) return nil } @@ -87,7 +88,7 @@ func (oh *HeadAndOOOIndexReader) Series(ref storage.SeriesRef, builder *labels.S // // maxMmapRef tells upto what max m-map chunk that we can consider. If it is non-0, then // the oooHeadChunk will not be considered. -func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, chks *[]chunks.Meta) error { +func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmapRef, maxMmapRef chunks.ChunkDiskMapperRef, includeInOrder bool, inoMint int64, chks *[]chunks.Meta) error { tmpChks := make([]chunks.Meta, 0, len(s.ooo.oooMmappedChunks)) addChunk := func(minT, maxT int64, ref chunks.ChunkRef, chunk chunkenc.Chunk) { @@ -111,7 +112,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap return nil } for _, chk := range chks { - addChunk(c.minTime, c.maxTime, ref, chk.chunk) + addChunk(chk.minTime, chk.maxTime, ref, chk.chunk) } } else { var emptyChunk chunkenc.Chunk @@ -128,7 +129,7 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap } if includeInOrder { - tmpChks = appendSeriesChunks(s, mint, maxt, tmpChks) + tmpChks = appendSeriesChunks(s, inoMint, maxt, tmpChks) } // There is nothing to do if we did not collect any chunk. @@ -449,6 +450,10 @@ func (ir *OOOCompactionHeadIndexReader) PostingsForLabelMatching(context.Context return index.ErrPostings(errors.New("not supported")) } +func (ir *OOOCompactionHeadIndexReader) PostingsForAllLabelValues(context.Context, string) index.Postings { + return index.ErrPostings(errors.New("not supported")) +} + func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.Postings { // This will already be sorted from the Postings() call above. return p @@ -476,7 +481,7 @@ func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, builder *l return nil } - return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, chks) + return getOOOSeriesChunks(s, ir.ch.mint, ir.ch.maxt, 0, ir.ch.lastMmapRef, false, 0, chks) } func (ir *OOOCompactionHeadIndexReader) SortedLabelValues(_ context.Context, name string, matchers ...*labels.Matcher) ([]string, error) { @@ -513,10 +518,10 @@ type HeadAndOOOQuerier struct { head *Head index IndexReader chunkr ChunkReader - querier storage.Querier + querier storage.Querier // Used for LabelNames, LabelValues, but may be nil if head was truncated in the mean time, in which case we ignore it and not close it in the end. } -func NewHeadAndOOOQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.Querier) storage.Querier { +func NewHeadAndOOOQuerier(inoMint, mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.Querier) storage.Querier { cr := &headChunkReader{ head: head, mint: mint, @@ -527,22 +532,31 @@ func NewHeadAndOOOQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolatio mint: mint, maxt: maxt, head: head, - index: NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef), + index: NewHeadAndOOOIndexReader(head, inoMint, mint, maxt, oooIsoState.minRef), chunkr: NewHeadAndOOOChunkReader(head, mint, maxt, cr, oooIsoState, 0), querier: querier, } } func (q *HeadAndOOOQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + if q.querier == nil { + return nil, nil, nil + } return q.querier.LabelValues(ctx, name, hints, matchers...) } func (q *HeadAndOOOQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + if q.querier == nil { + return nil, nil, nil + } return q.querier.LabelNames(ctx, hints, matchers...) } func (q *HeadAndOOOQuerier) Close() error { q.chunkr.Close() + if q.querier == nil { + return nil + } return q.querier.Close() } @@ -559,7 +573,7 @@ type HeadAndOOOChunkQuerier struct { querier storage.ChunkQuerier } -func NewHeadAndOOOChunkQuerier(mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.ChunkQuerier) storage.ChunkQuerier { +func NewHeadAndOOOChunkQuerier(inoMint, mint, maxt int64, head *Head, oooIsoState *oooIsolationState, querier storage.ChunkQuerier) storage.ChunkQuerier { cr := &headChunkReader{ head: head, mint: mint, @@ -570,22 +584,31 @@ func NewHeadAndOOOChunkQuerier(mint, maxt int64, head *Head, oooIsoState *oooIso mint: mint, maxt: maxt, head: head, - index: NewHeadAndOOOIndexReader(head, mint, maxt, oooIsoState.minRef), + index: NewHeadAndOOOIndexReader(head, inoMint, mint, maxt, oooIsoState.minRef), chunkr: NewHeadAndOOOChunkReader(head, mint, maxt, cr, oooIsoState, 0), querier: querier, } } func (q *HeadAndOOOChunkQuerier) LabelValues(ctx context.Context, name string, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + if q.querier == nil { + return nil, nil, nil + } return q.querier.LabelValues(ctx, name, hints, matchers...) } func (q *HeadAndOOOChunkQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints, matchers ...*labels.Matcher) ([]string, annotations.Annotations, error) { + if q.querier == nil { + return nil, nil, nil + } return q.querier.LabelNames(ctx, hints, matchers...) } func (q *HeadAndOOOChunkQuerier) Close() error { q.chunkr.Close() + if q.querier == nil { + return nil + } return q.querier.Close() } diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go index 40e37043b8..bc1cb67d1e 100644 --- a/tsdb/ooo_head_read_test.go +++ b/tsdb/ooo_head_read_test.go @@ -360,7 +360,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) { }) } - ir := NewHeadAndOOOIndexReader(h, tc.queryMinT, tc.queryMaxT, 0) + ir := NewHeadAndOOOIndexReader(h, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) var chks []chunks.Meta var b labels.ScratchBuilder @@ -389,6 +389,7 @@ func TestOOOHeadChunkReader_LabelValues(t *testing.T) { func testOOOHeadChunkReader_LabelValues(t *testing.T, scenario sampleTypeScenario) { chunkRange := int64(2000) head, _ := newTestHead(t, chunkRange, wlog.CompressionNone, true) + head.opts.EnableOOONativeHistograms.Store(true) t.Cleanup(func() { require.NoError(t, head.Close()) }) ctx := context.Background() @@ -450,7 +451,7 @@ func testOOOHeadChunkReader_LabelValues(t *testing.T, scenario sampleTypeScenari for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { // We first want to test using a head index reader that covers the biggest query interval - oh := NewHeadAndOOOIndexReader(head, tc.queryMinT, tc.queryMaxT, 0) + oh := NewHeadAndOOOIndexReader(head, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) matchers := []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")} values, err := oh.LabelValues(ctx, "foo", matchers...) sort.Strings(values) @@ -493,6 +494,8 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { opts := DefaultOptions() opts.OutOfOrderCapMax = 5 opts.OutOfOrderTimeWindow = 120 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true s1 := labels.FromStrings("l", "v1") minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } @@ -506,7 +509,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { Ref: 0x1800000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300, }) require.Nil(t, iterable) - require.Equal(t, err, fmt.Errorf("not found")) + require.EqualError(t, err, "not found") require.Nil(t, c) }) @@ -854,7 +857,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { // The Series method populates the chunk metas, taking a copy of the // head OOO chunk if necessary. These are then used by the ChunkReader. - ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0) + ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) var chks []chunks.Meta var b labels.ScratchBuilder err = ir.Series(s1Ref, &b, &chks) @@ -875,7 +878,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) { } resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) - requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) + requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, requireEqualSamplesIgnoreCounterResets) } }) } @@ -902,6 +905,8 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( opts := DefaultOptions() opts.OutOfOrderCapMax = 5 opts.OutOfOrderTimeWindow = 120 * time.Minute.Milliseconds() + opts.EnableNativeHistograms = true + opts.EnableOOONativeHistograms = true s1 := labels.FromStrings("l", "v1") minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() } @@ -1023,7 +1028,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( // The Series method populates the chunk metas, taking a copy of the // head OOO chunk if necessary. These are then used by the ChunkReader. - ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMaxT, 0) + ir := NewHeadAndOOOIndexReader(db.head, tc.queryMinT, tc.queryMinT, tc.queryMaxT, 0) var chks []chunks.Meta var b labels.ScratchBuilder err = ir.Series(s1Ref, &b, &chks) @@ -1049,7 +1054,7 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding( it := iterable.Iterator(nil) resultSamples, err := storage.ExpandSamples(it, nil) require.NoError(t, err) - requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true) + requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, requireEqualSamplesIgnoreCounterResets) } }) } diff --git a/tsdb/ooo_head_test.go b/tsdb/ooo_head_test.go index d3cd5f6016..37a46e76da 100644 --- a/tsdb/ooo_head_test.go +++ b/tsdb/ooo_head_test.go @@ -28,15 +28,13 @@ import ( const testMaxSize int = 32 // Formulas chosen to make testing easy. -func valEven(pos int) int { return pos*2 + 2 } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values -func valOdd(pos int) int { return pos*2 + 1 } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals. +func valEven(pos int) int64 { return int64(pos*2 + 2) } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values +func valOdd(pos int) int64 { return int64(pos*2 + 1) } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals. -func samplify(v int) sample { return sample{int64(v), float64(v), nil, nil} } - -func makeEvenSampleSlice(n int) []sample { +func makeEvenSampleSlice(n int, sampleFunc func(ts int64) sample) []sample { s := make([]sample, n) for i := 0; i < n; i++ { - s[i] = samplify(valEven(i)) + s[i] = sampleFunc(valEven(i)) } return s } @@ -45,8 +43,36 @@ func makeEvenSampleSlice(n int) []sample { // - Number of pre-existing samples anywhere from 0 to testMaxSize-1. // - Insert new sample before first pre-existing samples, after the last, and anywhere in between. // - With a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves. -// Note: In all samples used, t always equals v in numeric value. when we talk about 'value' we just refer to a value that will be used for both sample.t and sample.v. func TestOOOInsert(t *testing.T) { + scenarios := map[string]struct { + sampleFunc func(ts int64) sample + }{ + "float": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testOOOInsert(t, scenario.sampleFunc) + }) + } +} + +func testOOOInsert(t *testing.T, + sampleFunc func(ts int64) sample, +) { for numPreExisting := 0; numPreExisting <= testMaxSize; numPreExisting++ { // For example, if we have numPreExisting 2, then: // chunk.samples indexes filled 0 1 @@ -56,20 +82,21 @@ func TestOOOInsert(t *testing.T) { for insertPos := 0; insertPos <= numPreExisting; insertPos++ { chunk := NewOOOChunk() - chunk.samples = makeEvenSampleSlice(numPreExisting) - newSample := samplify(valOdd(insertPos)) - chunk.Insert(newSample.t, newSample.f, nil, nil) + chunk.samples = make([]sample, numPreExisting) + chunk.samples = makeEvenSampleSlice(numPreExisting, sampleFunc) + newSample := sampleFunc(valOdd(insertPos)) + chunk.Insert(newSample.t, newSample.f, newSample.h, newSample.fh) var expSamples []sample // Our expected new samples slice, will be first the original samples. for i := 0; i < insertPos; i++ { - expSamples = append(expSamples, samplify(valEven(i))) + expSamples = append(expSamples, sampleFunc(valEven(i))) } // Then the new sample. expSamples = append(expSamples, newSample) // Followed by any original samples that were pushed back by the new one. for i := insertPos; i < numPreExisting; i++ { - expSamples = append(expSamples, samplify(valEven(i))) + expSamples = append(expSamples, sampleFunc(valEven(i))) } require.Equal(t, expSamples, chunk.samples, "numPreExisting %d, insertPos %d", numPreExisting, insertPos) @@ -81,17 +108,46 @@ func TestOOOInsert(t *testing.T) { // pre-existing samples, with between 1 and testMaxSize pre-existing samples and // with a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves. func TestOOOInsertDuplicate(t *testing.T) { + scenarios := map[string]struct { + sampleFunc func(ts int64) sample + }{ + "float": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, f: float64(ts)} + }, + }, + "integer histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(ts))} + }, + }, + "float histogram": { + sampleFunc: func(ts int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(ts))} + }, + }, + } + for name, scenario := range scenarios { + t.Run(name, func(t *testing.T) { + testOOOInsertDuplicate(t, scenario.sampleFunc) + }) + } +} + +func testOOOInsertDuplicate(t *testing.T, + sampleFunc func(ts int64) sample, +) { for num := 1; num <= testMaxSize; num++ { for dupPos := 0; dupPos < num; dupPos++ { chunk := NewOOOChunk() - chunk.samples = makeEvenSampleSlice(num) + chunk.samples = makeEvenSampleSlice(num, sampleFunc) dupSample := chunk.samples[dupPos] dupSample.f = 0.123 - ok := chunk.Insert(dupSample.t, dupSample.f, nil, nil) + ok := chunk.Insert(dupSample.t, dupSample.f, dupSample.h, dupSample.fh) - expSamples := makeEvenSampleSlice(num) // We expect no change. + expSamples := makeEvenSampleSlice(num, sampleFunc) // We expect no change. require.False(t, ok) require.Equal(t, expSamples, chunk.samples, "num %d, dupPos %d", num, dupPos) } @@ -110,6 +166,8 @@ func TestOOOChunks_ToEncodedChunks(t *testing.T) { h2 := h1.Copy() h2.PositiveSpans = append(h2.PositiveSpans, histogram.Span{Offset: 1, Length: 1}) h2.PositiveBuckets = append(h2.PositiveBuckets, 12) + h2explicit := h2.Copy() + h2explicit.CounterResetHint = histogram.CounterReset testCases := map[string]struct { samples []sample @@ -142,12 +200,32 @@ func TestOOOChunks_ToEncodedChunks(t *testing.T) { {encoding: chunkenc.EncXOR, minTime: 1200, maxTime: 1200}, }, }, - "has a counter reset": { + "has an implicit counter reset": { samples: []sample{ {t: 1000, h: h2}, {t: 1100, h: h1}, }, - expectedCounterResets: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.CounterReset}, + expectedCounterResets: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.UnknownCounterReset}, + expectedChunks: []chunkVerify{ + {encoding: chunkenc.EncHistogram, minTime: 1000, maxTime: 1000}, + {encoding: chunkenc.EncHistogram, minTime: 1100, maxTime: 1100}, + }, + }, + "has an explicit counter reset": { + samples: []sample{ + {t: 1100, h: h2explicit}, + }, + expectedCounterResets: []histogram.CounterResetHint{histogram.UnknownCounterReset}, + expectedChunks: []chunkVerify{ + {encoding: chunkenc.EncHistogram, minTime: 1100, maxTime: 1100}, + }, + }, + "has an explicit counter reset inside": { + samples: []sample{ + {t: 1000, h: h1}, + {t: 1100, h: h2explicit}, + }, + expectedCounterResets: []histogram.CounterResetHint{histogram.UnknownCounterReset, histogram.UnknownCounterReset}, expectedChunks: []chunkVerify{ {encoding: chunkenc.EncHistogram, minTime: 1000, maxTime: 1000}, {encoding: chunkenc.EncHistogram, minTime: 1100, maxTime: 1100}, diff --git a/tsdb/querier.go b/tsdb/querier.go index 912c950329..34e9a81231 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -193,6 +193,11 @@ func selectChunkSeriesSet(ctx context.Context, sortSeries bool, hints *storage.S // PostingsForMatchers assembles a single postings iterator against the index reader // based on the given matchers. The resulting postings are not ordered by series. func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matcher) (index.Postings, error) { + if len(ms) == 1 && ms[0].Name == "" && ms[0].Value == "" { + k, v := index.AllPostingsKey() + return ix.Postings(ctx, k, v) + } + var its, notIts []index.Postings // See which label must be non-empty. // Optimization for case like {l=~".", l!="1"}. @@ -247,13 +252,27 @@ func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matc return nil, ctx.Err() } switch { - case m.Name == "" && m.Value == "": // Special-case for AllPostings, used in tests at least. - k, v := index.AllPostingsKey() - allPostings, err := ix.Postings(ctx, k, v) - if err != nil { - return nil, err + case m.Name == "" && m.Value == "": + // We already handled the case at the top of the function, + // and it is unexpected to get all postings again here. + return nil, errors.New("unexpected all postings") + + case m.Type == labels.MatchRegexp && m.Value == ".*": + // .* regexp matches any string: do nothing. + case m.Type == labels.MatchNotRegexp && m.Value == ".*": + return index.EmptyPostings(), nil + + case m.Type == labels.MatchRegexp && m.Value == ".+": + // .+ regexp matches any non-empty string: get postings for all label values. + it := ix.PostingsForAllLabelValues(ctx, m.Name) + if index.IsEmptyPostingsType(it) { + return index.EmptyPostings(), nil } - its = append(its, allPostings) + its = append(its, it) + case m.Type == labels.MatchNotRegexp && m.Value == ".+": + // .+ regexp matches any non-empty string: get postings for all label values and remove them. + its = append(notIts, ix.PostingsForAllLabelValues(ctx, m.Name)) + case labelMustBeSet[m.Name]: // If this matcher must be non-empty, we can be smarter. matchesEmpty := m.Matches("") @@ -288,7 +307,7 @@ func PostingsForMatchers(ctx context.Context, ix IndexReader, ms ...*labels.Matc return index.EmptyPostings(), nil } its = append(its, it) - default: // l="a" + default: // l="a", l=~"a|b", l=~"a.b", etc. // Non-Not matcher, use normal postingsForMatcher. it, err := postingsForMatcher(ctx, ix, m) if err != nil { @@ -359,29 +378,16 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma return ix.Postings(ctx, m.Name, m.Value) } - vals, err := ix.LabelValues(ctx, m.Name) - if err != nil { - return nil, err - } - - res := vals[:0] - // If the match before inversion was !="" or !~"", we just want all the values. + // If the matcher being inverted is =~"" or ="", we just want all the values. if m.Value == "" && (m.Type == labels.MatchRegexp || m.Type == labels.MatchEqual) { - res = vals - } else { - count := 1 - for _, val := range vals { - if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { - return nil, ctx.Err() - } - count++ - if !m.Matches(val) { - res = append(res, val) - } - } + it := ix.PostingsForAllLabelValues(ctx, m.Name) + return it, it.Err() } - return ix.Postings(ctx, m.Name, res...) + it := ix.PostingsForLabelMatching(ctx, m.Name, func(s string) bool { + return !m.Matches(s) + }) + return it, it.Err() } func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, matchers ...*labels.Matcher) ([]string, error) { @@ -1018,9 +1024,9 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool { if newChunk != nil { if !recoded { p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt}) + cmint = t } currentChunk = newChunk - cmint = t } cmaxt = t diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go index 43accc253b..8b8f092a81 100644 --- a/tsdb/querier_bench_test.go +++ b/tsdb/querier_bench_test.go @@ -75,7 +75,7 @@ func BenchmarkQuerier(b *testing.B) { b.Run("Block", func(b *testing.B) { blockdir := createBlockFromHead(b, b.TempDir(), h) - block, err := OpenBlock(nil, blockdir, nil) + block, err := OpenBlock(nil, blockdir, nil, nil) require.NoError(b, err) defer func() { require.NoError(b, block.Close()) @@ -105,17 +105,17 @@ func benchmarkPostingsForMatchers(b *testing.B, ir IndexReader) { jFoo := labels.MustNewMatcher(labels.MatchEqual, "j", "foo") jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") - iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") - i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.*$") - iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1$") - iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*1.*$") - iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.+$") - i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "^1.+$") - iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "^$") + iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*") + i1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.*") + iStar1 := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*1") + iStar1Star := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*1.*") + iPlus := labels.MustNewMatcher(labels.MatchRegexp, "i", ".+") + i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+") + iEmptyRe := labels.MustNewMatcher(labels.MatchRegexp, "i", "") iNotEmpty := labels.MustNewMatcher(labels.MatchNotEqual, "i", "") iNot2 := labels.MustNewMatcher(labels.MatchNotEqual, "i", "2"+postingsBenchSuffix) - iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^2.*$") - iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*2.*$") + iNot2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", "2.*") + iNotStar2Star := labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*2.*") jFooBar := labels.MustNewMatcher(labels.MatchRegexp, "j", "foo|bar") jXXXYYY := labels.MustNewMatcher(labels.MatchRegexp, "j", "XXX|YYY") jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+") @@ -186,13 +186,13 @@ func benchmarkLabelValuesWithMatchers(b *testing.B, ir IndexReader) { i1Plus := labels.MustNewMatcher(labels.MatchRegexp, "i", "1.+") i1PostingsBenchSuffix := labels.MustNewMatcher(labels.MatchEqual, "i", "1"+postingsBenchSuffix) iSuffix := labels.MustNewMatcher(labels.MatchRegexp, "i", ".+ddd") - iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$") + iStar := labels.MustNewMatcher(labels.MatchRegexp, "i", ".*") jNotFoo := labels.MustNewMatcher(labels.MatchNotEqual, "j", "foo") jXXXYYY := labels.MustNewMatcher(labels.MatchRegexp, "j", "XXX|YYY") jXplus := labels.MustNewMatcher(labels.MatchRegexp, "j", "X.+") n1 := labels.MustNewMatcher(labels.MatchEqual, "n", "1"+postingsBenchSuffix) nX := labels.MustNewMatcher(labels.MatchNotEqual, "n", "X"+postingsBenchSuffix) - nPlus := labels.MustNewMatcher(labels.MatchRegexp, "n", "^.+$") + nPlus := labels.MustNewMatcher(labels.MatchRegexp, "n", ".+") ctx := context.Background() @@ -205,12 +205,12 @@ func benchmarkLabelValuesWithMatchers(b *testing.B, ir IndexReader) { {`i with i="1"`, "i", []*labels.Matcher{i1}}, // i has 100k values. {`i with n="1"`, "i", []*labels.Matcher{n1}}, - {`i with n="^.+$"`, "i", []*labels.Matcher{nPlus}}, + {`i with n=".+"`, "i", []*labels.Matcher{nPlus}}, {`i with n="1",j!="foo"`, "i", []*labels.Matcher{n1, jNotFoo}}, {`i with n="1",j=~"X.+"`, "i", []*labels.Matcher{n1, jXplus}}, {`i with n="1",j=~"XXX|YYY"`, "i", []*labels.Matcher{n1, jXXXYYY}}, {`i with n="X",j!="foo"`, "i", []*labels.Matcher{nX, jNotFoo}}, - {`i with n="1",i=~"^.*$",j!="foo"`, "i", []*labels.Matcher{n1, iStar, jNotFoo}}, + {`i with n="1",i=~".*",j!="foo"`, "i", []*labels.Matcher{n1, iStar, jNotFoo}}, // matchers on i itself {`i with i="1aaa...ddd"`, "i", []*labels.Matcher{i1PostingsBenchSuffix}}, {`i with i=~"1.+"`, "i", []*labels.Matcher{i1Plus}}, @@ -315,7 +315,7 @@ func BenchmarkQuerierSelect(b *testing.B) { tmpdir := b.TempDir() blockdir := createBlockFromHead(b, tmpdir, h) - block, err := OpenBlock(nil, blockdir, nil) + block, err := OpenBlock(nil, blockdir, nil, nil) require.NoError(b, err) defer func() { require.NoError(b, block.Close()) diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 50525f65f4..482907757a 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2030,7 +2030,7 @@ func TestPopulateWithDelSeriesIterator_NextWithMinTime(t *testing.T) { // TODO(bwplotka): Merge with storage merged series set benchmark. func BenchmarkMergedSeriesSet(b *testing.B) { sel := func(sets []storage.SeriesSet) storage.SeriesSet { - return storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) + return storage.NewMergeSeriesSet(sets, 0, storage.ChainedSeriesMerge) } for _, k := range []int{ @@ -2340,6 +2340,16 @@ func (m mockIndex) PostingsForLabelMatching(ctx context.Context, name string, ma return index.Merge(ctx, res...) } +func (m mockIndex) PostingsForAllLabelValues(ctx context.Context, name string) index.Postings { + var res []index.Postings + for l, srs := range m.postings { + if l.Name == name { + res = append(res, index.NewListPostings(srs)) + } + } + return index.Merge(ctx, res...) +} + func (m mockIndex) ShardedPostings(p index.Postings, shardIndex, shardCount uint64) index.Postings { out := make([]storage.SeriesRef, 0, 128) @@ -2443,7 +2453,7 @@ func BenchmarkQueryIterator(b *testing.B) { } else { generatedSeries = populateSeries(prefilledLabels, mint, maxt) } - block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer block.Close() @@ -2506,7 +2516,7 @@ func BenchmarkQuerySeek(b *testing.B) { } else { generatedSeries = populateSeries(prefilledLabels, mint, maxt) } - block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer block.Close() @@ -2641,7 +2651,7 @@ func BenchmarkSetMatcher(b *testing.B) { } else { generatedSeries = populateSeries(prefilledLabels, mint, maxt) } - block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil) + block, err := OpenBlock(nil, createBlock(b, dir, generatedSeries), nil, nil) require.NoError(b, err) blocks = append(blocks, block) defer block.Close() @@ -2689,6 +2699,7 @@ func TestPostingsForMatchers(t *testing.T) { app.Append(0, labels.FromStrings("n", "1"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "a"), 0, 0) app.Append(0, labels.FromStrings("n", "1", "i", "b"), 0, 0) + app.Append(0, labels.FromStrings("n", "1", "i", "\n"), 0, 0) app.Append(0, labels.FromStrings("n", "2"), 0, 0) app.Append(0, labels.FromStrings("n", "2.5"), 0, 0) require.NoError(t, app.Commit()) @@ -2704,6 +2715,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2722,6 +2734,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), labels.FromStrings("n", "2"), labels.FromStrings("n", "2.5"), }, @@ -2739,6 +2752,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2750,6 +2764,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2757,6 +2772,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, // Regex. @@ -2766,6 +2782,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2801,6 +2818,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2808,6 +2826,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, // Not regex. @@ -2816,6 +2835,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2849,12 +2869,14 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^a?$")}, exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2862,6 +2884,7 @@ func TestPostingsForMatchers(t *testing.T) { exp: []labels.Labels{ labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), }, }, { @@ -2895,6 +2918,7 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "1"), labels.FromStrings("n", "1", "i", "a"), labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), labels.FromStrings("n", "2"), }, }, @@ -2942,6 +2966,57 @@ func TestPostingsForMatchers(t *testing.T) { labels.FromStrings("n", "2.5"), }, }, + // Test shortcut for i=~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "i", ".*")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, + // Test shortcut for n=~".*" and i=~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchRegexp, "i", "^.*$")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1"), + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + labels.FromStrings("n", "1", "i", "\n"), + labels.FromStrings("n", "2"), + labels.FromStrings("n", "2.5"), + }, + }, + // Test shortcut for n=~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchEqual, "i", "a")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + }, + }, + // Test shortcut for i!~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut for n!~"^.*$", i!~".*". First one triggers empty result. + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^.*$"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut i!~".*" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "n", ".*"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", ".*")}, + exp: []labels.Labels{}, + }, + // Test shortcut i!~"^.*$" + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "n", "1"), labels.MustNewMatcher(labels.MatchNotRegexp, "i", "^.*$")}, + exp: []labels.Labels{}, + }, } ir, err := h.Index() @@ -3144,7 +3219,7 @@ func BenchmarkQueries(b *testing.B) { qs := make([]storage.Querier, 0, 10) for x := 0; x <= 10; x++ { - block, err := OpenBlock(nil, createBlock(b, dir, series), nil) + block, err := OpenBlock(nil, createBlock(b, dir, series), nil, nil) require.NoError(b, err) q, err := NewBlockQuerier(block, 1, nSamples) require.NoError(b, err) @@ -3170,7 +3245,7 @@ func BenchmarkQueries(b *testing.B) { qHead, err := NewBlockQuerier(NewRangeHead(head, 1, nSamples), 1, nSamples) require.NoError(b, err) isoState := head.oooIso.TrackReadAfter(0) - qOOOHead := NewHeadAndOOOQuerier(1, nSamples, head, isoState, qHead) + qOOOHead := NewHeadAndOOOQuerier(1, 1, nSamples, head, isoState, qHead) queryTypes = append(queryTypes, qt{ fmt.Sprintf("_Head_oooPercent:%d", oooPercentage), qOOOHead, @@ -3235,7 +3310,7 @@ func (m mockMatcherIndex) LabelValueFor(context.Context, storage.SeriesRef, stri } func (m mockMatcherIndex) LabelNamesFor(ctx context.Context, postings index.Postings) ([]string, error) { - return nil, errors.New("label names for for called") + return nil, errors.New("label names for called") } func (m mockMatcherIndex) Postings(context.Context, string, ...string) (index.Postings, error) { @@ -3259,7 +3334,11 @@ func (m mockMatcherIndex) LabelNames(context.Context, ...*labels.Matcher) ([]str } func (m mockMatcherIndex) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { - return index.ErrPostings(fmt.Errorf("PostingsForLabelMatching called")) + return index.ErrPostings(errors.New("PostingsForLabelMatching called")) +} + +func (m mockMatcherIndex) PostingsForAllLabelValues(context.Context, string) index.Postings { + return index.ErrPostings(errors.New("PostingsForAllLabelValues called")) } func TestPostingsForMatcher(t *testing.T) { @@ -3660,17 +3739,6 @@ func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. } -func TestReader_InversePostingsForMatcherHonorsContextCancel(t *testing.T) { - ir := mockReaderOfLabels{} - - failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) - ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} - _, err := inversePostingsForMatcher(ctx, ir, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*")) - - require.Error(t, err) - require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. -} - type mockReaderOfLabels struct{} const mockReaderOfLabelsSeriesCount = checkContextEveryNIterations * 10 @@ -3703,6 +3771,10 @@ func (m mockReaderOfLabels) PostingsForLabelMatching(context.Context, string, fu panic("PostingsForLabelMatching called") } +func (m mockReaderOfLabels) PostingsForAllLabelValues(context.Context, string) index.Postings { + panic("PostingsForAllLabelValues called") +} + func (m mockReaderOfLabels) Postings(context.Context, string, ...string) (index.Postings, error) { panic("Postings called") } @@ -3722,3 +3794,35 @@ func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[ func (m mockReaderOfLabels) Symbols() index.StringIter { panic("Series called") } + +// TestMergeQuerierConcurrentSelectMatchers reproduces the data race bug from +// https://github.com/prometheus/prometheus/issues/14723, when one of the queriers (blockQuerier in this case) +// alters the passed matchers. +func TestMergeQuerierConcurrentSelectMatchers(t *testing.T) { + block, err := OpenBlock(nil, createBlock(t, t.TempDir(), genSeries(1, 1, 0, 1)), nil, nil) + require.NoError(t, err) + defer func() { + require.NoError(t, block.Close()) + }() + p, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + // A secondary querier is required to enable concurrent select; a blockQuerier is used for simplicity. + s, err := NewBlockQuerier(block, 0, 1) + require.NoError(t, err) + + originalMatchers := []*labels.Matcher{ + labels.MustNewMatcher(labels.MatchRegexp, "baz", ".*"), + labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"), + } + matchers := append([]*labels.Matcher{}, originalMatchers...) + + mergedQuerier := storage.NewMergeQuerier([]storage.Querier{p}, []storage.Querier{s}, storage.ChainedSeriesMerge) + defer func() { + require.NoError(t, mergedQuerier.Close()) + }() + + mergedQuerier.Select(context.Background(), false, nil, matchers...) + + require.Equal(t, originalMatchers, matchers) +} diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index da7748e187..f3a657aecb 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -166,7 +166,7 @@ func TestRecord_EncodeDecode(t *testing.T) { require.NoError(t, err) require.Equal(t, floatHistograms, decFloatHistograms) - // Gauge ingeger histograms. + // Gauge integer histograms. for i := range histograms { histograms[i].H.CounterResetHint = histogram.GaugeType } diff --git a/tsdb/repair.go b/tsdb/repair.go index 9d2c5738d1..8bdc645b5e 100644 --- a/tsdb/repair.go +++ b/tsdb/repair.go @@ -17,19 +17,17 @@ import ( "encoding/json" "fmt" "io" + "log/slog" "os" "path/filepath" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" "github.com/prometheus/prometheus/tsdb/fileutil" ) // repairBadIndexVersion repairs an issue in index and meta.json persistence introduced in // commit 129773b41a565fde5156301e37f9a87158030443. -func repairBadIndexVersion(logger log.Logger, dir string) error { +func repairBadIndexVersion(logger *slog.Logger, dir string) error { // All blocks written by Prometheus 2.1 with a meta.json version of 2 are affected. // We must actually set the index file version to 2 and revert the meta.json version back to 1. dirs, err := blockDirs(dir) @@ -41,7 +39,7 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { defer func() { for _, tmp := range tmpFiles { if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } } }() @@ -49,20 +47,20 @@ func repairBadIndexVersion(logger log.Logger, dir string) error { for _, d := range dirs { meta, err := readBogusMetaFile(d) if err != nil { - level.Error(logger).Log("msg", "failed to read meta.json for a block during repair process; skipping", "dir", d, "err", err) + logger.Error("failed to read meta.json for a block during repair process; skipping", "dir", d, "err", err) continue } if meta.Version == metaVersion1 { - level.Info(logger).Log( - "msg", "Found healthy block", + logger.Info( + "Found healthy block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, ) continue } - level.Info(logger).Log( - "msg", "Fixing broken block", + logger.Info( + "Fixing broken block", "mint", meta.MinTime, "maxt", meta.MaxTime, "ulid", meta.ULID, diff --git a/tsdb/repair_test.go b/tsdb/repair_test.go index 8a70e05f31..8a192c4f78 100644 --- a/tsdb/repair_test.go +++ b/tsdb/repair_test.go @@ -79,7 +79,7 @@ func TestRepairBadIndexVersion(t *testing.T) { require.NoError(t, os.MkdirAll(filepath.Join(tmpDbDir, "chunks"), 0o777)) // Read current index to check integrity. - r, err := index.NewFileReader(filepath.Join(tmpDbDir, indexFilename)) + r, err := index.NewFileReader(filepath.Join(tmpDbDir, indexFilename), index.DecodePostingsRaw) require.NoError(t, err) p, err := r.Postings(ctx, "b", "1") require.NoError(t, err) @@ -97,7 +97,7 @@ func TestRepairBadIndexVersion(t *testing.T) { require.NoError(t, err) db.Close() - r, err = index.NewFileReader(filepath.Join(tmpDbDir, indexFilename)) + r, err = index.NewFileReader(filepath.Join(tmpDbDir, indexFilename), index.DecodePostingsRaw) require.NoError(t, err) defer r.Close() p, err = r.Postings(ctx, "b", "1") diff --git a/tsdb/testutil.go b/tsdb/testutil.go index 9730e47132..c39eb133c7 100644 --- a/tsdb/testutil.go +++ b/tsdb/testutil.go @@ -16,6 +16,8 @@ package tsdb import ( "testing" + "github.com/prometheus/prometheus/tsdb/tsdbutil" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" @@ -27,7 +29,11 @@ import ( ) const ( - float = "float" + float = "float" + intHistogram = "integer histogram" + floatHistogram = "float histogram" + gaugeIntHistogram = "gauge int histogram" + gaugeFloatHistogram = "gauge float histogram" ) type testValue struct { @@ -42,7 +48,6 @@ type sampleTypeScenario struct { sampleFunc func(ts, value int64) sample } -// TODO: native histogram sample types will be added as part of out-of-order native histogram support; see #11220. var sampleTypeScenarios = map[string]sampleTypeScenario{ float: { sampleType: sampleMetricTypeFloat, @@ -55,50 +60,50 @@ var sampleTypeScenarios = map[string]sampleTypeScenario{ return sample{t: ts, f: float64(value)} }, }, - // intHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} - // }, - // }, - // floatHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} - // }, - // }, - // gaugeIntHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} - // }, - // }, - // gaugeFloatHistogram: { - // sampleType: sampleMetricTypeHistogram, - // appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { - // s := sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} - // ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) - // return ref, s, err - // }, - // sampleFunc: func(ts, value int64) sample { - // return sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} - // }, - // }, + intHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestHistogram(int(value))} + }, + }, + floatHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestFloatHistogram(int(value))} + }, + }, + gaugeIntHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, s.h, nil) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, h: tsdbutil.GenerateTestGaugeHistogram(int(value))} + }, + }, + gaugeFloatHistogram: { + sampleType: sampleMetricTypeHistogram, + appendFunc: func(appender storage.Appender, lbls labels.Labels, ts, value int64) (storage.SeriesRef, sample, error) { + s := sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} + ref, err := appender.AppendHistogram(0, lbls, ts, nil, s.fh) + return ref, s, err + }, + sampleFunc: func(ts, value int64) sample { + return sample{t: ts, fh: tsdbutil.GenerateTestGaugeFloatHistogram(int(value))} + }, + }, } // requireEqualSeries checks that the actual series are equal to the expected ones. It ignores the counter reset hints for histograms. @@ -106,7 +111,11 @@ func requireEqualSeries(t *testing.T, expected, actual map[string][]chunks.Sampl for name, expectedItem := range expected { actualItem, ok := actual[name] require.True(t, ok, "Expected series %s not found", name) - requireEqualSamples(t, name, expectedItem, actualItem, ignoreCounterResets) + if ignoreCounterResets { + requireEqualSamples(t, name, expectedItem, actualItem, requireEqualSamplesIgnoreCounterResets) + } else { + requireEqualSamples(t, name, expectedItem, actualItem) + } } for name := range actual { _, ok := expected[name] @@ -121,7 +130,28 @@ func requireEqualOOOSamples(t *testing.T, expectedSamples int, db *DB) { "number of ooo appended samples mismatch") } -func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sample, ignoreCounterResets bool) { +type requireEqualSamplesOption int + +const ( + requireEqualSamplesNoOption requireEqualSamplesOption = iota + requireEqualSamplesIgnoreCounterResets + requireEqualSamplesInUseBucketCompare +) + +func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sample, options ...requireEqualSamplesOption) { + var ( + ignoreCounterResets bool + inUseBucketCompare bool + ) + for _, option := range options { + switch option { + case requireEqualSamplesIgnoreCounterResets: + ignoreCounterResets = true + case requireEqualSamplesInUseBucketCompare: + inUseBucketCompare = true + } + } + require.Equal(t, len(expected), len(actual), "Length not equal to expected for %s", name) for i, s := range expected { expectedSample := s @@ -139,6 +169,10 @@ func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sa } else { require.Equal(t, expectedHist.CounterResetHint, actualHist.CounterResetHint, "Sample header doesn't match for %s[%d] at ts %d, expected: %s, actual: %s", name, i, expectedSample.T(), counterResetAsString(expectedHist.CounterResetHint), counterResetAsString(actualHist.CounterResetHint)) } + if inUseBucketCompare { + expectedSample.H().Compact(0) + actualSample.H().Compact(0) + } require.Equal(t, expectedHist, actualHist, "Sample doesn't match for %s[%d] at ts %d", name, i, expectedSample.T()) } case s.FH() != nil: @@ -151,6 +185,10 @@ func requireEqualSamples(t *testing.T, name string, expected, actual []chunks.Sa } else { require.Equal(t, expectedHist.CounterResetHint, actualHist.CounterResetHint, "Sample header doesn't match for %s[%d] at ts %d, expected: %s, actual: %s", name, i, expectedSample.T(), counterResetAsString(expectedHist.CounterResetHint), counterResetAsString(actualHist.CounterResetHint)) } + if inUseBucketCompare { + expectedSample.FH().Compact(0) + actualSample.FH().Compact(0) + } require.Equal(t, expectedHist, actualHist, "Sample doesn't match for %s[%d] at ts %d", name, i, expectedSample.T()) } default: diff --git a/tsdb/tombstones/tombstones.go b/tsdb/tombstones/tombstones.go index 4cea5005db..dcba298f3b 100644 --- a/tsdb/tombstones/tombstones.go +++ b/tsdb/tombstones/tombstones.go @@ -19,15 +19,13 @@ import ( "fmt" "hash" "hash/crc32" + "log/slog" "math" "os" "path/filepath" "sort" "sync" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/encoding" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -76,7 +74,7 @@ type Reader interface { Close() error } -func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) { +func WriteFile(logger *slog.Logger, dir string, tr Reader) (int64, error) { path := filepath.Join(dir, TombstonesFilename) tmp := path + ".tmp" hash := newCRC32() @@ -89,11 +87,11 @@ func WriteFile(logger log.Logger, dir string, tr Reader) (int64, error) { defer func() { if f != nil { if err := f.Close(); err != nil { - level.Error(logger).Log("msg", "close tmp file", "err", err.Error()) + logger.Error("close tmp file", "err", err.Error()) } } if err := os.RemoveAll(tmp); err != nil { - level.Error(logger).Log("msg", "remove tmp file", "err", err.Error()) + logger.Error("remove tmp file", "err", err.Error()) } }() diff --git a/tsdb/tombstones/tombstones_test.go b/tsdb/tombstones/tombstones_test.go index 36c9f1c1e3..cbf686e4bb 100644 --- a/tsdb/tombstones/tombstones_test.go +++ b/tsdb/tombstones/tombstones_test.go @@ -20,10 +20,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" "go.uber.org/goleak" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/storage" ) @@ -50,7 +51,7 @@ func TestWriteAndReadbackTombstones(t *testing.T) { stones.AddInterval(storage.SeriesRef(ref), dranges...) } - _, err := WriteFile(log.NewNopLogger(), tmpdir, stones) + _, err := WriteFile(promslog.NewNopLogger(), tmpdir, stones) require.NoError(t, err) restr, _, err := ReadTombstones(tmpdir) diff --git a/tsdb/tsdbblockutil.go b/tsdb/tsdbblockutil.go index f7b27c2e08..af2348019a 100644 --- a/tsdb/tsdbblockutil.go +++ b/tsdb/tsdbblockutil.go @@ -15,19 +15,19 @@ package tsdb import ( "context" + "errors" "fmt" + "log/slog" "path/filepath" - "github.com/go-kit/log" - "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" ) -var ErrInvalidTimes = fmt.Errorf("max time is lesser than min time") +var ErrInvalidTimes = errors.New("max time is lesser than min time") // CreateBlock creates a chunkrange block from the samples passed to it, and writes it to disk. -func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger log.Logger) (string, error) { +func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger *slog.Logger) (string, error) { if chunkRange == 0 { chunkRange = DefaultBlockDuration } @@ -41,7 +41,7 @@ func CreateBlock(series []storage.Series, dir string, chunkRange int64, logger l } defer func() { if err := w.Close(); err != nil { - logger.Log("err closing blockwriter", err.Error()) + logger.Error("err closing blockwriter", "err", err.Error()) } }() diff --git a/tsdb/tsdbutil/dir_locker.go b/tsdb/tsdbutil/dir_locker.go index fa939879ca..4b69e1f9d6 100644 --- a/tsdb/tsdbutil/dir_locker.go +++ b/tsdb/tsdbutil/dir_locker.go @@ -16,11 +16,10 @@ package tsdbutil import ( "errors" "fmt" + "log/slog" "os" "path/filepath" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -34,7 +33,7 @@ const ( ) type DirLocker struct { - logger log.Logger + logger *slog.Logger createdCleanly prometheus.Gauge @@ -43,7 +42,7 @@ type DirLocker struct { } // NewDirLocker creates a DirLocker that can obtain an exclusive lock on dir. -func NewDirLocker(dir, subsystem string, l log.Logger, r prometheus.Registerer) (*DirLocker, error) { +func NewDirLocker(dir, subsystem string, l *slog.Logger, r prometheus.Registerer) (*DirLocker, error) { lock := &DirLocker{ logger: l, createdCleanly: prometheus.NewGauge(prometheus.GaugeOpts{ @@ -74,7 +73,7 @@ func (l *DirLocker) Lock() error { } if _, err := os.Stat(l.path); err == nil { - level.Warn(l.logger).Log("msg", "A lockfile from a previous execution already existed. It was replaced", "file", l.path) + l.logger.Warn("A lockfile from a previous execution already existed. It was replaced", "file", l.path) l.createdCleanly.Set(lockfileReplaced) } else { diff --git a/tsdb/tsdbutil/dir_locker_test.go b/tsdb/tsdbutil/dir_locker_test.go index fc7d905b2d..65e2761692 100644 --- a/tsdb/tsdbutil/dir_locker_test.go +++ b/tsdb/tsdbutil/dir_locker_test.go @@ -16,15 +16,16 @@ package tsdbutil import ( "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/util/testutil" ) func TestLockfile(t *testing.T) { TestDirLockerUsage(t, func(t *testing.T, data string, createLock bool) (*DirLocker, testutil.Closer) { - locker, err := NewDirLocker(data, "tsdbutil", log.NewNopLogger(), nil) + locker, err := NewDirLocker(data, "tsdbutil", promslog.NewNopLogger(), nil) require.NoError(t, err) if createLock { diff --git a/tsdb/tsdbutil/dir_locker_testutil.go b/tsdb/tsdbutil/dir_locker_testutil.go index a4cf5abd68..7228dbafed 100644 --- a/tsdb/tsdbutil/dir_locker_testutil.go +++ b/tsdb/tsdbutil/dir_locker_testutil.go @@ -18,8 +18,8 @@ import ( "os" "testing" - "github.com/go-kit/log" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/util/testutil" @@ -68,7 +68,7 @@ func TestDirLockerUsage(t *testing.T, open func(t *testing.T, data string, creat // Test preconditions (file already exists + lockfile option) if c.fileAlreadyExists { - tmpLocker, err := NewDirLocker(tmpdir, "tsdb", log.NewNopLogger(), nil) + tmpLocker, err := NewDirLocker(tmpdir, "tsdb", promslog.NewNopLogger(), nil) require.NoError(t, err) err = os.WriteFile(tmpLocker.path, []byte{}, 0o644) require.NoError(t, err) diff --git a/tsdb/wlog/checkpoint.go b/tsdb/wlog/checkpoint.go index a16cd5fc74..58e11c770e 100644 --- a/tsdb/wlog/checkpoint.go +++ b/tsdb/wlog/checkpoint.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "os" "path/filepath" @@ -25,9 +26,6 @@ import ( "strconv" "strings" - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" @@ -94,11 +92,11 @@ const checkpointPrefix = "checkpoint." // segmented format as the original WAL itself. // This makes it easy to read it through the WAL package and concatenate // it with the original WAL. -func Checkpoint(logger log.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { +func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { stats := &CheckpointStats{} var sgmReader io.ReadCloser - level.Info(logger).Log("msg", "Creating checkpoint", "from_segment", from, "to_segment", to, "mint", mint) + logger.Info("Creating checkpoint", "from_segment", from, "to_segment", to, "mint", mint) { var sgmRange []SegmentRange diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index a9786454de..8ee193f5ac 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -23,9 +23,10 @@ import ( "strings" "testing" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/tsdb/chunks" @@ -244,7 +245,7 @@ func TestCheckpoint(t *testing.T) { } require.NoError(t, w.Close()) - stats, err := Checkpoint(log.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { + stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { return x%2 == 0 }, last/2) require.NoError(t, err) @@ -354,7 +355,7 @@ func TestCheckpointNoTmpFolderAfterError(t *testing.T) { require.NoError(t, f.Close()) // Run the checkpoint and since the wlog contains corrupt data this should return an error. - _, err = Checkpoint(log.NewNopLogger(), w, 0, 1, nil, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0) require.Error(t, err) // Walk the wlog dir to make sure there are no tmp folder left behind after the error. diff --git a/tsdb/wlog/live_reader.go b/tsdb/wlog/live_reader.go index 6eaef5f396..a017d362d1 100644 --- a/tsdb/wlog/live_reader.go +++ b/tsdb/wlog/live_reader.go @@ -20,9 +20,8 @@ import ( "fmt" "hash/crc32" "io" + "log/slog" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" @@ -51,7 +50,7 @@ func NewLiveReaderMetrics(reg prometheus.Registerer) *LiveReaderMetrics { } // NewLiveReader returns a new live reader. -func NewLiveReader(logger log.Logger, metrics *LiveReaderMetrics, r io.Reader) *LiveReader { +func NewLiveReader(logger *slog.Logger, metrics *LiveReaderMetrics, r io.Reader) *LiveReader { // Calling zstd.NewReader with a nil io.Reader and no options cannot return an error. zstdReader, _ := zstd.NewReader(nil) @@ -73,7 +72,7 @@ func NewLiveReader(logger log.Logger, metrics *LiveReaderMetrics, r io.Reader) * // that are still in the process of being written, and returns records as soon // as they can be read. type LiveReader struct { - logger log.Logger + logger *slog.Logger rdr io.Reader err error rec []byte @@ -311,7 +310,7 @@ func (r *LiveReader) readRecord() ([]byte, int, error) { return nil, 0, fmt.Errorf("record would overflow current page: %d > %d", r.readIndex+recordHeaderSize+length, pageSize) } r.metrics.readerCorruptionErrors.WithLabelValues("record_span_page").Inc() - level.Warn(r.logger).Log("msg", "Record spans page boundaries", "start", r.readIndex, "end", recordHeaderSize+length, "pageSize", pageSize) + r.logger.Warn("Record spans page boundaries", "start", r.readIndex, "end", recordHeaderSize+length, "pageSize", pageSize) } if recordHeaderSize+length > pageSize { return nil, 0, fmt.Errorf("record length greater than a single page: %d > %d", recordHeaderSize+length, pageSize) diff --git a/tsdb/wlog/reader_test.go b/tsdb/wlog/reader_test.go index 484eff3664..2ac63cbf15 100644 --- a/tsdb/wlog/reader_test.go +++ b/tsdb/wlog/reader_test.go @@ -29,11 +29,11 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/prometheus/common/promslog" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" - "github.com/prometheus/prometheus/util/testutil" ) type reader interface { @@ -53,7 +53,7 @@ var readerConstructors = map[string]func(io.Reader) reader{ return NewReader(r) }, "LiveReader": func(r io.Reader) reader { - lr := NewLiveReader(log.NewNopLogger(), NewLiveReaderMetrics(nil), r) + lr := NewLiveReader(promslog.NewNopLogger(), NewLiveReaderMetrics(nil), r) lr.eofNonErr = true return lr }, @@ -196,7 +196,7 @@ func TestReader(t *testing.T) { } func TestReader_Live(t *testing.T) { - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() for i := range testReaderCases { t.Run(strconv.Itoa(i), func(t *testing.T) { @@ -353,7 +353,7 @@ func TestReaderFuzz(t *testing.T) { } func TestReaderFuzz_Live(t *testing.T) { - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() for _, compress := range []CompressionType{CompressionNone, CompressionSnappy, CompressionZstd} { t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { dir := t.TempDir() @@ -441,7 +441,7 @@ func TestReaderFuzz_Live(t *testing.T) { func TestLiveReaderCorrupt_ShortFile(t *testing.T) { // Write a corrupt WAL segment, there is one record of pageSize in length, // but the segment is only half written. - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() dir := t.TempDir() w, err := NewSize(nil, nil, dir, pageSize, CompressionNone) @@ -481,7 +481,7 @@ func TestLiveReaderCorrupt_ShortFile(t *testing.T) { func TestLiveReaderCorrupt_RecordTooLongAndShort(t *testing.T) { // Write a corrupt WAL segment, when record len > page size. - logger := testutil.NewLogger(t) + logger := promslog.NewNopLogger() dir := t.TempDir() w, err := NewSize(nil, nil, dir, pageSize*2, CompressionNone) diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go index ac5041e87b..89db5d2dd7 100644 --- a/tsdb/wlog/watcher.go +++ b/tsdb/wlog/watcher.go @@ -17,6 +17,7 @@ import ( "errors" "fmt" "io" + "log/slog" "math" "os" "path/filepath" @@ -24,9 +25,8 @@ import ( "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/timestamp" @@ -84,7 +84,7 @@ type WatcherMetrics struct { type Watcher struct { name string writer WriteTo - logger log.Logger + logger *slog.Logger walDir string lastCheckpoint string sendExemplars bool @@ -172,9 +172,9 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics { } // NewWatcher creates a new WAL watcher for a given WriteTo. -func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logger log.Logger, name string, writer WriteTo, dir string, sendExemplars, sendHistograms, sendMetadata bool) *Watcher { +func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logger *slog.Logger, name string, writer WriteTo, dir string, sendExemplars, sendHistograms, sendMetadata bool) *Watcher { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } return &Watcher{ logger: logger, @@ -206,7 +206,7 @@ func (w *Watcher) Notify() { } } -func (w *Watcher) setMetrics() { +func (w *Watcher) SetMetrics() { // Setup the WAL Watchers metrics. We do this here rather than in the // constructor because of the ordering of creating Queue Managers's, // stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig. @@ -221,8 +221,8 @@ func (w *Watcher) setMetrics() { // Start the Watcher. func (w *Watcher) Start() { - w.setMetrics() - level.Info(w.logger).Log("msg", "Starting WAL watcher", "queue", w.name) + w.SetMetrics() + w.logger.Info("Starting WAL watcher", "queue", w.name) go w.loop() } @@ -241,7 +241,7 @@ func (w *Watcher) Stop() { w.metrics.currentSegment.DeleteLabelValues(w.name) } - level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name) + w.logger.Info("WAL watcher stopped", "queue", w.name) } func (w *Watcher) loop() { @@ -251,7 +251,7 @@ func (w *Watcher) loop() { for !isClosed(w.quit) { w.SetStartTime(time.Now()) if err := w.Run(); err != nil { - level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) + w.logger.Error("error tailing WAL", "err", err) } select { @@ -274,7 +274,7 @@ func (w *Watcher) Run() error { // Run will be called again if there was a failure to read the WAL. w.sendSamples = false - level.Info(w.logger).Log("msg", "Replaying WAL", "queue", w.name) + w.logger.Info("Replaying WAL", "queue", w.name) // Backfill from the checkpoint first if it exists. lastCheckpoint, checkpointIndex, err := LastCheckpoint(w.walDir) @@ -294,13 +294,13 @@ func (w *Watcher) Run() error { return err } - level.Debug(w.logger).Log("msg", "Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) + w.logger.Debug("Tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) for !isClosed(w.quit) { w.currentSegmentMetric.Set(float64(currentSegment)) // On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment. // On subsequent calls to this function, currentSegment will have been incremented and we should open that segment. - level.Debug(w.logger).Log("msg", "Processing segment", "currentSegment", currentSegment) + w.logger.Debug("Processing segment", "currentSegment", currentSegment) if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil && !errors.Is(err, ErrIgnorable) { return err } @@ -338,9 +338,9 @@ func (w *Watcher) readAndHandleError(r *LiveReader, segmentNum int, tail bool, s // Ignore all errors reading to end of segment whilst replaying the WAL. if !tail { if err != nil && !errors.Is(err, io.EOF) { - level.Warn(w.logger).Log("msg", "Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) + w.logger.Warn("Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) } else if r.Offset() != size { - level.Warn(w.logger).Log("msg", "Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size) + w.logger.Warn("Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size) } return ErrIgnorable } @@ -403,7 +403,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { <-gcSem }() if err := w.garbageCollectSeries(segmentNum); err != nil { - level.Warn(w.logger).Log("msg", "Error process checkpoint", "err", err) + w.logger.Warn("Error process checkpoint", "err", err) } }() default: @@ -424,7 +424,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { // we haven't read due to a notification in quite some time, try reading anyways case <-readTicker.C: - level.Debug(w.logger).Log("msg", "Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout) + w.logger.Debug("Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout) err := w.readAndHandleError(reader, segmentNum, tail, size) if err != nil { return err @@ -460,11 +460,11 @@ func (w *Watcher) garbageCollectSeries(segmentNum int) error { } if index >= segmentNum { - level.Debug(w.logger).Log("msg", "Current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) + w.logger.Debug("Current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) return nil } - level.Debug(w.logger).Log("msg", "New checkpoint detected", "new", dir, "currentSegment", segmentNum) + w.logger.Debug("New checkpoint detected", "new", dir, "currentSegment", segmentNum) if err = w.readCheckpoint(dir, (*Watcher).readSegmentForGC); err != nil { return fmt.Errorf("readCheckpoint: %w", err) @@ -519,7 +519,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } samplesToSend = append(samplesToSend, s) } @@ -564,7 +564,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } histogramsToSend = append(histogramsToSend, h) } @@ -592,7 +592,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if !w.sendSamples { w.sendSamples = true duration := time.Since(w.startTime) - level.Info(w.logger).Log("msg", "Done replaying WAL", "duration", duration) + w.logger.Info("Done replaying WAL", "duration", duration) } floatHistogramsToSend = append(floatHistogramsToSend, fh) } @@ -670,7 +670,7 @@ type segmentReadFn func(w *Watcher, r *LiveReader, segmentNum int, tail bool) er // Read all the series records from a Checkpoint directory. func (w *Watcher) readCheckpoint(checkpointDir string, readFn segmentReadFn) error { - level.Debug(w.logger).Log("msg", "Reading checkpoint", "dir", checkpointDir) + w.logger.Debug("Reading checkpoint", "dir", checkpointDir) index, err := checkpointNum(checkpointDir) if err != nil { return fmt.Errorf("checkpointNum: %w", err) @@ -704,7 +704,7 @@ func (w *Watcher) readCheckpoint(checkpointDir string, readFn segmentReadFn) err } } - level.Debug(w.logger).Log("msg", "Read series references from checkpoint", "checkpoint", checkpointDir) + w.logger.Debug("Read series references from checkpoint", "checkpoint", checkpointDir) return nil } diff --git a/tsdb/wlog/watcher_test.go b/tsdb/wlog/watcher_test.go index dc0314e8c9..398b0f4414 100644 --- a/tsdb/wlog/watcher_test.go +++ b/tsdb/wlog/watcher_test.go @@ -22,9 +22,10 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" @@ -52,6 +53,13 @@ func retry(t *testing.T, interval time.Duration, n int, f func() bool) { t.Logf("function returned false") } +// Overwrite readTimeout defined in watcher.go. +func overwriteReadTimeout(t *testing.T, val time.Duration) { + initialVal := readTimeout + readTimeout = val + t.Cleanup(func() { readTimeout = initialVal }) +} + type writeToMock struct { samplesAppended int exemplarsAppended int @@ -226,7 +234,7 @@ func TestTailSamples(t *testing.T) { watcher.SetStartTime(now) // Set the Watcher's metrics so they're not nil pointers. - watcher.setMetrics() + watcher.SetMetrics() for i := first; i <= last; i++ { segment, err := OpenReadSegment(SegmentName(watcher.walDir, i)) require.NoError(t, err) @@ -302,7 +310,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { } } require.NoError(t, w.Log(recs...)) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) _, _, err = Segments(w.Dir()) require.NoError(t, err) @@ -367,7 +375,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { } } - Checkpoint(log.NewNopLogger(), w, 0, 1, func(x chunks.HeadSeriesRef) bool { return true }, 0) + Checkpoint(promslog.NewNopLogger(), w, 0, 1, func(x chunks.HeadSeriesRef) bool { return true }, 0) w.Truncate(1) // Write more records after checkpointing. @@ -394,7 +402,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = Segments(w.Dir()) require.NoError(t, err) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) go watcher.Start() @@ -458,7 +466,7 @@ func TestReadCheckpoint(t *testing.T) { } _, err = w.NextSegmentSync() require.NoError(t, err) - _, err = Checkpoint(log.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0) require.NoError(t, err) require.NoError(t, w.Truncate(32)) @@ -540,7 +548,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { watcher.MaxSegment = -1 // Set the Watcher's metrics so they're not nil pointers. - watcher.setMetrics() + watcher.SetMetrics() lastCheckpoint, _, err := LastCheckpoint(watcher.walDir) require.NoError(t, err) @@ -607,7 +615,7 @@ func TestCheckpointSeriesReset(t *testing.T) { _, _, err = Segments(w.Dir()) require.NoError(t, err) - readTimeout = time.Second + overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) watcher.MaxSegment = -1 @@ -621,7 +629,7 @@ func TestCheckpointSeriesReset(t *testing.T) { return wt.checkNumSeries() == seriesCount }, 10*time.Second, 1*time.Second) - _, err = Checkpoint(log.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0) require.NoError(t, err) err = w.Truncate(5) @@ -691,7 +699,7 @@ func TestRun_StartupTime(t *testing.T) { watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) watcher.MaxSegment = segments - watcher.setMetrics() + watcher.SetMetrics() startTime := time.Now() err = watcher.Run() @@ -742,9 +750,6 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { const seriesCount = 10 const samplesCount = 50 - // This test can take longer than intended to finish in cloud CI. - readTimeout := 10 * time.Second - for _, compress := range []CompressionType{CompressionNone, CompressionSnappy, CompressionZstd} { t.Run(string(compress), func(t *testing.T) { dir := t.TempDir() @@ -755,36 +760,50 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { w, err := NewSize(nil, nil, wdir, segmentSize, compress) require.NoError(t, err) - var wg sync.WaitGroup - // Generate one segment initially to ensure that watcher.Run() finds at least one segment on disk. + // Write to 00000000, the watcher will read series from it. require.NoError(t, generateWALRecords(w, 0, seriesCount, samplesCount)) - w.NextSegment() // Force creation of the next segment - wg.Add(1) - go func() { - defer wg.Done() - for i := 1; i < segmentsToWrite; i++ { - require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) - w.NextSegment() - } - }() + // Create 00000001, the watcher will tail it once started. + w.NextSegment() + // Set up the watcher and run it in the background. wt := newWriteToMock(time.Millisecond) watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher.SetMetrics() watcher.MaxSegment = segmentsToRead - watcher.setMetrics() - startTime := time.Now() - err = watcher.Run() - wg.Wait() - require.Less(t, time.Since(startTime), readTimeout) - - // But samples records shouldn't get dropped - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumSeries() > 0 + var g errgroup.Group + g.Go(func() error { + startTime := time.Now() + err = watcher.Run() + if err != nil { + return err + } + // If the watcher was to wait for readTicker to read every new segment, it would need readTimeout * segmentsToRead. + d := time.Since(startTime) + if d > readTimeout { + return fmt.Errorf("watcher ran for %s, it shouldn't rely on readTicker=%s to read the new segments", d, readTimeout) + } + return nil }) - require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) - require.NoError(t, err) + // The watcher went through 00000000 and is tailing the next one. + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumSeries() == seriesCount + }) + + // In the meantime, add some new segments in bulk. + // We should end up with segmentsToWrite + 1 segments now. + for i := 1; i < segmentsToWrite; i++ { + require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) + w.NextSegment() + } + + // Wait for the watcher. + require.NoError(t, g.Wait()) + + // All series and samples were read. + require.Equal(t, (segmentsToRead+1)*seriesCount, wt.checkNumSeries()) // Series from 00000000 are also read. + require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) require.NoError(t, w.Close()) }) } diff --git a/tsdb/wlog/wlog.go b/tsdb/wlog/wlog.go index b14521f358..54c257d61a 100644 --- a/tsdb/wlog/wlog.go +++ b/tsdb/wlog/wlog.go @@ -21,6 +21,7 @@ import ( "fmt" "hash/crc32" "io" + "log/slog" "os" "path/filepath" "slices" @@ -28,11 +29,10 @@ import ( "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/golang/snappy" "github.com/klauspost/compress/zstd" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/prometheus/tsdb/fileutil" ) @@ -121,7 +121,7 @@ func (e *CorruptionErr) Unwrap() error { } // OpenWriteSegment opens segment k in dir. The returned segment is ready for new appends. -func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { +func OpenWriteSegment(logger *slog.Logger, dir string, k int) (*Segment, error) { segName := SegmentName(dir, k) f, err := os.OpenFile(segName, os.O_WRONLY|os.O_APPEND, 0o666) if err != nil { @@ -138,7 +138,7 @@ func OpenWriteSegment(logger log.Logger, dir string, k int) (*Segment, error) { // If it was torn mid-record, a full read (which the caller should do anyway // to ensure integrity) will detect it as a corruption by the end. if d := stat.Size() % pageSize; d != 0 { - level.Warn(logger).Log("msg", "Last page of the wlog is torn, filling it with zeros", "segment", segName) + logger.Warn("Last page of the wlog is torn, filling it with zeros", "segment", segName) if _, err := f.Write(make([]byte, pageSize-d)); err != nil { f.Close() return nil, fmt.Errorf("zero-pad torn page: %w", err) @@ -201,7 +201,7 @@ func ParseCompressionType(compress bool, compressType string) CompressionType { // beyond the most recent segment. type WL struct { dir string - logger log.Logger + logger *slog.Logger segmentSize int mtx sync.RWMutex segment *Segment // Active segment. @@ -286,7 +286,7 @@ func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { }, func() float64 { val, err := w.Size() if err != nil { - level.Error(w.logger).Log("msg", "Failed to calculate size of \"wal\" dir", + w.logger.Error("Failed to calculate size of \"wal\" dir", "err", err.Error()) } return float64(val) @@ -309,13 +309,13 @@ func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { } // New returns a new WAL over the given directory. -func New(logger log.Logger, reg prometheus.Registerer, dir string, compress CompressionType) (*WL, error) { +func New(logger *slog.Logger, reg prometheus.Registerer, dir string, compress CompressionType) (*WL, error) { return NewSize(logger, reg, dir, DefaultSegmentSize, compress) } // NewSize returns a new write log over the given directory. // New segments are created with the specified size. -func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSize int, compress CompressionType) (*WL, error) { +func NewSize(logger *slog.Logger, reg prometheus.Registerer, dir string, segmentSize int, compress CompressionType) (*WL, error) { if segmentSize%pageSize != 0 { return nil, errors.New("invalid segment size") } @@ -323,7 +323,7 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi return nil, fmt.Errorf("create dir: %w", err) } if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } var zstdWriter *zstd.Encoder @@ -378,9 +378,9 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi } // Open an existing WAL. -func Open(logger log.Logger, dir string) (*WL, error) { +func Open(logger *slog.Logger, dir string) (*WL, error) { if logger == nil { - logger = log.NewNopLogger() + logger = promslog.NewNopLogger() } zstdWriter, err := zstd.NewWriter(nil) if err != nil { @@ -443,7 +443,7 @@ func (w *WL) Repair(origErr error) error { if cerr.Segment < 0 { return errors.New("corruption error does not specify position") } - level.Warn(w.logger).Log("msg", "Starting corruption repair", + w.logger.Warn("Starting corruption repair", "segment", cerr.Segment, "offset", cerr.Offset) // All segments behind the corruption can no longer be used. @@ -451,7 +451,7 @@ func (w *WL) Repair(origErr error) error { if err != nil { return fmt.Errorf("list segments: %w", err) } - level.Warn(w.logger).Log("msg", "Deleting all segments newer than corrupted segment", "segment", cerr.Segment) + w.logger.Warn("Deleting all segments newer than corrupted segment", "segment", cerr.Segment) for _, s := range segs { if w.segment.i == s.index { @@ -473,7 +473,7 @@ func (w *WL) Repair(origErr error) error { // Regardless of the corruption offset, no record reaches into the previous segment. // So we can safely repair the WAL by removing the segment and re-inserting all // its records up to the corruption. - level.Warn(w.logger).Log("msg", "Rewrite corrupted segment", "segment", cerr.Segment) + w.logger.Warn("Rewrite corrupted segment", "segment", cerr.Segment) fn := SegmentName(w.Dir(), cerr.Segment) tmpfn := fn + ".repair" @@ -583,10 +583,10 @@ func (w *WL) nextSegment(async bool) (int, error) { // Don't block further writes by fsyncing the last segment. f := func() { if err := w.fsync(prev); err != nil { - level.Error(w.logger).Log("msg", "sync previous segment", "err", err) + w.logger.Error("sync previous segment", "err", err) } if err := prev.Close(); err != nil { - level.Error(w.logger).Log("msg", "close previous segment", "err", err) + w.logger.Error("close previous segment", "err", err) } } if async { @@ -890,10 +890,10 @@ func (w *WL) Close() (err error) { <-donec if err = w.fsync(w.segment); err != nil { - level.Error(w.logger).Log("msg", "sync previous segment", "err", err) + w.logger.Error("sync previous segment", "err", err) } if err := w.segment.Close(); err != nil { - level.Error(w.logger).Log("msg", "close previous segment", "err", err) + w.logger.Error("close previous segment", "err", err) } w.metrics.Unregister() diff --git a/tsdb/wlog/wlog_test.go b/tsdb/wlog/wlog_test.go index 165d2758f0..d195aaee2f 100644 --- a/tsdb/wlog/wlog_test.go +++ b/tsdb/wlog/wlog_test.go @@ -23,14 +23,13 @@ import ( "path/filepath" "testing" - "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" client_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" "go.uber.org/goleak" "github.com/prometheus/prometheus/tsdb/fileutil" - "github.com/prometheus/prometheus/util/testutil" ) func TestMain(m *testing.M) { @@ -215,7 +214,7 @@ func TestCorruptAndCarryOn(t *testing.T) { dir := t.TempDir() var ( - logger = testutil.NewLogger(t) + logger = promslog.NewNopLogger() segmentSize = pageSize * 3 recordSize = (pageSize / 3) - recordHeaderSize ) @@ -568,7 +567,7 @@ func TestUnregisterMetrics(t *testing.T) { reg := prometheus.NewRegistry() for i := 0; i < 2; i++ { - wl, err := New(log.NewNopLogger(), reg, t.TempDir(), CompressionNone) + wl, err := New(promslog.NewNopLogger(), reg, t.TempDir(), CompressionNone) require.NoError(t, err) require.NoError(t, wl.Close()) } diff --git a/ui-commits b/ui-commits new file mode 100644 index 0000000000..7f34e1f95a --- /dev/null +++ b/ui-commits @@ -0,0 +1,12 @@ +dfec29d8e Fix border color for target pools with one target that is failing +65743bf9b ui: drop template readme +a7c1a951d Add general Mantine overrides CSS file +0757fbbec Make sure that alert element table headers are not wrapped +0180cf31a Factor out common icon and card styles +50af7d589 Fix tree line drawing by using a callback ref +ac01dc903 Explain, vector-to-vector: Do not compute results for set operators +9b0dc68d0 PromQL explain view: Support set operators +57898c792 Refactor and fix time formatting functions, add tests +091fc403c Fiddle with targets table styles to try and improve things a bit +a1908df92 Don't wrap action buttons below metric name in metrics explorer +ac5377873 mantine UI: Distinguish between Not Ready and Stopping diff --git a/util/annotations/annotations.go b/util/annotations/annotations.go index b0272b7fee..1b743f7057 100644 --- a/util/annotations/annotations.go +++ b/util/annotations/annotations.go @@ -146,6 +146,8 @@ var ( PossibleNonCounterInfo = fmt.Errorf("%w: metric might not be a counter, name does not end in _total/_sum/_count/_bucket:", PromQLInfo) HistogramQuantileForcedMonotonicityInfo = fmt.Errorf("%w: input to histogram_quantile needed to be fixed for monotonicity (see https://prometheus.io/docs/prometheus/latest/querying/functions/#histogram_quantile) for metric name", PromQLInfo) + IncompatibleTypesInBinOpInfo = fmt.Errorf("%w: incompatible sample types encountered for binary operator", PromQLInfo) + HistogramIgnoredInAggregationInfo = fmt.Errorf("%w: ignored histogram in", PromQLInfo) ) type annoErr struct { @@ -273,3 +275,21 @@ func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange. Err: fmt.Errorf("%w %q", HistogramQuantileForcedMonotonicityInfo, metricName), } } + +// NewIncompatibleTypesInBinOpInfo is used if binary operators act on a +// combination of types that doesn't work and therefore returns no result. +func NewIncompatibleTypesInBinOpInfo(lhsType, operator, rhsType string, pos posrange.PositionRange) error { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %q: %s %s %s", IncompatibleTypesInBinOpInfo, operator, lhsType, operator, rhsType), + } +} + +// NewHistogramIgnoredInAggregationInfo is used when a histogram is ignored by +// an aggregation operator that cannot handle histograms. +func NewHistogramIgnoredInAggregationInfo(aggregation string, pos posrange.PositionRange) error { + return annoErr{ + PositionRange: pos, + Err: fmt.Errorf("%w %s aggregation", HistogramIgnoredInAggregationInfo, aggregation), + } +} diff --git a/util/convertnhcb/convertnhcb.go b/util/convertnhcb/convertnhcb.go new file mode 100644 index 0000000000..21ae62b3cb --- /dev/null +++ b/util/convertnhcb/convertnhcb.go @@ -0,0 +1,262 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package convertnhcb + +import ( + "errors" + "fmt" + "math" + "sort" + "strings" + + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" +) + +var ( + errNegativeBucketCount = errors.New("bucket count must be non-negative") + errNegativeCount = errors.New("count must be non-negative") + errCountMismatch = errors.New("count mismatch") + errCountNotCumulative = errors.New("count is not cumulative") +) + +type tempHistogramBucket struct { + le float64 + count float64 +} + +// TempHistogram is used to collect information about classic histogram +// samples incrementally before creating a histogram.Histogram or +// histogram.FloatHistogram based on the values collected. +type TempHistogram struct { + buckets []tempHistogramBucket + count float64 + sum float64 + err error + hasCount bool +} + +// NewTempHistogram creates a new TempHistogram to +// collect information about classic histogram samples. +func NewTempHistogram() TempHistogram { + return TempHistogram{ + buckets: make([]tempHistogramBucket, 0, 10), + } +} + +func (h TempHistogram) Err() error { + return h.err +} + +func (h *TempHistogram) Reset() { + h.buckets = h.buckets[:0] + h.count = 0 + h.sum = 0 + h.err = nil + h.hasCount = false +} + +func (h *TempHistogram) SetBucketCount(boundary, count float64) error { + if h.err != nil { + return h.err + } + if count < 0 { + h.err = fmt.Errorf("%w: le=%g, count=%g", errNegativeBucketCount, boundary, count) + return h.err + } + // Assume that the elements are added in order. + switch { + case len(h.buckets) == 0: + h.buckets = append(h.buckets, tempHistogramBucket{le: boundary, count: count}) + case h.buckets[len(h.buckets)-1].le < boundary: + // Happy case is "<". + if count < h.buckets[len(h.buckets)-1].count { + h.err = fmt.Errorf("%w: %g < %g", errCountNotCumulative, count, h.buckets[len(h.buckets)-1].count) + return h.err + } + h.buckets = append(h.buckets, tempHistogramBucket{le: boundary, count: count}) + case h.buckets[len(h.buckets)-1].le == boundary: + // Ignore this, as it is a duplicate sample. + default: + // Find the correct position to insert. + i := sort.Search(len(h.buckets), func(i int) bool { + return h.buckets[i].le >= boundary + }) + if h.buckets[i].le == boundary { + // Ignore this, as it is a duplicate sample. + return nil + } + if i > 0 && count < h.buckets[i-1].count { + h.err = fmt.Errorf("%w: %g < %g", errCountNotCumulative, count, h.buckets[i-1].count) + return h.err + } + if count > h.buckets[i].count { + h.err = fmt.Errorf("%w: %g > %g", errCountNotCumulative, count, h.buckets[i].count) + return h.err + } + // Insert at the correct position unless duplicate. + h.buckets = append(h.buckets, tempHistogramBucket{}) + copy(h.buckets[i+1:], h.buckets[i:]) + h.buckets[i] = tempHistogramBucket{le: boundary, count: count} + } + return nil +} + +func (h *TempHistogram) SetCount(count float64) error { + if h.err != nil { + return h.err + } + if count < 0 { + h.err = fmt.Errorf("%w: count=%g", errNegativeCount, count) + return h.err + } + h.count = count + h.hasCount = true + return nil +} + +func (h *TempHistogram) SetSum(sum float64) error { + if h.err != nil { + return h.err + } + h.sum = sum + return nil +} + +func (h TempHistogram) Convert() (*histogram.Histogram, *histogram.FloatHistogram, error) { + if h.err != nil { + return nil, nil, h.err + } + + if !h.hasCount && len(h.buckets) > 0 { + // No count, so set count to the highest known bucket's count. + h.count = h.buckets[len(h.buckets)-1].count + h.hasCount = true + } + + if len(h.buckets) == 0 || h.buckets[len(h.buckets)-1].le != math.Inf(1) { + // No +Inf bucket. + // Let the last bucket be +Inf with the overall count. + h.buckets = append(h.buckets, tempHistogramBucket{le: math.Inf(1), count: h.count}) + } + + for _, b := range h.buckets { + intCount := int64(math.Round(b.count)) + if b.count != float64(intCount) { + return h.convertToFloatHistogram() + } + } + + intCount := uint64(math.Round(h.count)) + if h.count != float64(intCount) { + return h.convertToFloatHistogram() + } + return h.convertToIntegerHistogram(intCount) +} + +func (h TempHistogram) convertToIntegerHistogram(count uint64) (*histogram.Histogram, *histogram.FloatHistogram, error) { + rh := &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: count, + Sum: h.sum, + PositiveSpans: []histogram.Span{{Length: uint32(len(h.buckets))}}, + PositiveBuckets: make([]int64, len(h.buckets)), + } + + if len(h.buckets) > 1 { + rh.CustomValues = make([]float64, len(h.buckets)-1) // Not storing the last +Inf bucket. + } + + prevCount := int64(0) + prevDelta := int64(0) + for i, b := range h.buckets { + // delta is the actual bucket count as the input is cumulative. + delta := int64(b.count) - prevCount + rh.PositiveBuckets[i] = delta - prevDelta + prevCount = int64(b.count) + prevDelta = delta + if b.le != math.Inf(1) { + rh.CustomValues[i] = b.le + } + } + + if count != uint64(h.buckets[len(h.buckets)-1].count) { + h.err = fmt.Errorf("%w: count=%d != le=%g count=%g", errCountMismatch, count, h.buckets[len(h.buckets)-1].le, h.buckets[len(h.buckets)-1].count) + return nil, nil, h.err + } + + return rh.Compact(2), nil, nil +} + +func (h TempHistogram) convertToFloatHistogram() (*histogram.Histogram, *histogram.FloatHistogram, error) { + rh := &histogram.FloatHistogram{ + Schema: histogram.CustomBucketsSchema, + Count: h.count, + Sum: h.sum, + PositiveSpans: []histogram.Span{{Length: uint32(len(h.buckets))}}, + PositiveBuckets: make([]float64, len(h.buckets)), + } + + if len(h.buckets) > 1 { + rh.CustomValues = make([]float64, len(h.buckets)-1) // Not storing the last +Inf bucket. + } + + prevCount := 0.0 + for i, b := range h.buckets { + rh.PositiveBuckets[i] = b.count - prevCount + prevCount = b.count + if b.le != math.Inf(1) { + rh.CustomValues[i] = b.le + } + } + + if h.count != h.buckets[len(h.buckets)-1].count { + h.err = fmt.Errorf("%w: count=%g != le=%g count=%g", errCountMismatch, h.count, h.buckets[len(h.buckets)-1].le, h.buckets[len(h.buckets)-1].count) + return nil, nil, h.err + } + + return nil, rh.Compact(0), nil +} + +func GetHistogramMetricBase(m labels.Labels, name string) labels.Labels { + return labels.NewBuilder(m). + Set(labels.MetricName, name). + Del(labels.BucketLabel). + Labels() +} + +type SuffixType int + +const ( + SuffixNone SuffixType = iota + SuffixBucket + SuffixSum + SuffixCount +) + +// GetHistogramMetricBaseName removes the suffixes _bucket, _sum, _count from +// the metric name. We specifically do not remove the _created suffix as that +// should be removed by the caller. +func GetHistogramMetricBaseName(s string) (SuffixType, string) { + if r, ok := strings.CutSuffix(s, "_bucket"); ok { + return SuffixBucket, r + } + if r, ok := strings.CutSuffix(s, "_sum"); ok { + return SuffixSum, r + } + if r, ok := strings.CutSuffix(s, "_count"); ok { + return SuffixCount, r + } + return SuffixNone, s +} diff --git a/util/convertnhcb/convertnhcb_test.go b/util/convertnhcb/convertnhcb_test.go new file mode 100644 index 0000000000..7486ac18bb --- /dev/null +++ b/util/convertnhcb/convertnhcb_test.go @@ -0,0 +1,189 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package convertnhcb + +import ( + "math" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/model/histogram" +) + +func TestNHCBConvert(t *testing.T) { + tests := map[string]struct { + setup func() *TempHistogram + expectedErr error + expectedH *histogram.Histogram + expectedFH *histogram.FloatHistogram + }{ + "empty": { + setup: func() *TempHistogram { + h := NewTempHistogram() + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + PositiveSpans: []histogram.Span{}, + PositiveBuckets: []int64{}, + }, + }, + "sum only": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetSum(1000.25) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Sum: 1000.25, + PositiveSpans: []histogram.Span{}, + PositiveBuckets: []int64{}, + }, + }, + "single integer bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetSum(1000.25) + h.SetBucketCount(0.5, 1000) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 1}}, + PositiveBuckets: []int64{1000}, + CustomValues: []float64{0.5}, + }, + }, + "single float bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetSum(1000.25) + h.SetBucketCount(0.5, 1337.42) + return &h + }, + expectedFH: &histogram.FloatHistogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1337.42, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 1}}, + PositiveBuckets: []float64{1337.42}, + CustomValues: []float64{0.5}, + }, + }, + "happy case integer bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950) + h.SetBucketCount(math.Inf(1), 1000) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 3}}, + PositiveBuckets: []int64{50, 850, -850}, + CustomValues: []float64{0.5, 1.0}, + }, + }, + "happy case float bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950.5) + h.SetBucketCount(math.Inf(1), 1000) + return &h + }, + expectedFH: &histogram.FloatHistogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 3}}, + PositiveBuckets: []float64{50, 900.5, 49.5}, + CustomValues: []float64{0.5, 1.0}, + }, + }, + "non cumulative bucket": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950) + h.SetBucketCount(math.Inf(1), 900) + return &h + }, + expectedErr: errCountNotCumulative, + }, + "negative count": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetCount(-1000) + h.SetSum(1000.25) + h.SetBucketCount(0.5, 50) + h.SetBucketCount(1.0, 950) + h.SetBucketCount(math.Inf(1), 900) + return &h + }, + expectedErr: errNegativeCount, + }, + "mixed order": { + setup: func() *TempHistogram { + h := NewTempHistogram() + h.SetBucketCount(0.5, 50) + h.SetBucketCount(math.Inf(1), 1000) + h.SetBucketCount(1.0, 950) + h.SetCount(1000) + h.SetSum(1000.25) + return &h + }, + expectedH: &histogram.Histogram{ + Schema: histogram.CustomBucketsSchema, + Count: 1000, + Sum: 1000.25, + PositiveSpans: []histogram.Span{{Length: 3}}, + PositiveBuckets: []int64{50, 850, -850}, + CustomValues: []float64{0.5, 1.0}, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + th := test.setup() + h, fh, err := th.Convert() + if test.expectedErr != nil { + require.ErrorIs(t, err, test.expectedErr) + return + } + require.Equal(t, test.expectedH, h) + if h != nil { + require.NoError(t, h.Validate()) + } + require.Equal(t, test.expectedFH, fh) + if fh != nil { + require.NoError(t, fh.Validate()) + } + }) + } +} diff --git a/util/fmtutil/format.go b/util/fmtutil/format.go index 9034a90fa7..a10908bb8c 100644 --- a/util/fmtutil/format.go +++ b/util/fmtutil/format.go @@ -113,7 +113,7 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, labels, timestamp, m.GetCounter().GetValue()) case m.Summary != nil: metricName := labels[model.MetricNameLabel] - // Preserve metric name order with first quantile labels timeseries then sum suffix timeserie and finally count suffix timeserie + // Preserve metric name order with first quantile labels timeseries then sum suffix timeseries and finally count suffix timeseries // Add Summary quantile timeseries quantileLabels := make(map[string]string, len(labels)+1) for key, value := range labels { @@ -125,16 +125,16 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, quantileLabels, timestamp, q.GetValue()) } // Overwrite label model.MetricNameLabel for count and sum metrics - // Add Summary sum timeserie + // Add Summary sum timeseries labels[model.MetricNameLabel] = metricName + sumStr toTimeseries(wr, labels, timestamp, m.GetSummary().GetSampleSum()) - // Add Summary count timeserie + // Add Summary count timeseries labels[model.MetricNameLabel] = metricName + countStr toTimeseries(wr, labels, timestamp, float64(m.GetSummary().GetSampleCount())) case m.Histogram != nil: metricName := labels[model.MetricNameLabel] - // Preserve metric name order with first bucket suffix timeseries then sum suffix timeserie and finally count suffix timeserie + // Preserve metric name order with first bucket suffix timeseries then sum suffix timeseries and finally count suffix timeseries // Add Histogram bucket timeseries bucketLabels := make(map[string]string, len(labels)+1) for key, value := range labels { @@ -146,10 +146,10 @@ func makeTimeseries(wr *prompb.WriteRequest, labels map[string]string, m *dto.Me toTimeseries(wr, bucketLabels, timestamp, float64(b.GetCumulativeCount())) } // Overwrite label model.MetricNameLabel for count and sum metrics - // Add Histogram sum timeserie + // Add Histogram sum timeseries labels[model.MetricNameLabel] = metricName + sumStr toTimeseries(wr, labels, timestamp, m.GetHistogram().GetSampleSum()) - // Add Histogram count timeserie + // Add Histogram count timeseries labels[model.MetricNameLabel] = metricName + countStr toTimeseries(wr, labels, timestamp, float64(m.GetHistogram().GetSampleCount())) diff --git a/util/logging/dedupe.go b/util/logging/dedupe.go index d490a6afdf..e7dff20f78 100644 --- a/util/logging/dedupe.go +++ b/util/logging/dedupe.go @@ -14,12 +14,10 @@ package logging import ( - "bytes" + "context" + "log/slog" "sync" "time" - - "github.com/go-kit/log" - "github.com/go-logfmt/logfmt" ) const ( @@ -28,22 +26,11 @@ const ( maxEntries = 1024 ) -type logfmtEncoder struct { - *logfmt.Encoder - buf bytes.Buffer -} +var _ slog.Handler = (*Deduper)(nil) -var logfmtEncoderPool = sync.Pool{ - New: func() interface{} { - var enc logfmtEncoder - enc.Encoder = logfmt.NewEncoder(&enc.buf) - return &enc - }, -} - -// Deduper implement log.Logger, dedupes log lines. +// Deduper implements *slog.Handler, dedupes log lines based on a time duration. type Deduper struct { - next log.Logger + next *slog.Logger repeat time.Duration quit chan struct{} mtx sync.RWMutex @@ -51,7 +38,7 @@ type Deduper struct { } // Dedupe log lines to next, only repeating every repeat duration. -func Dedupe(next log.Logger, repeat time.Duration) *Deduper { +func Dedupe(next *slog.Logger, repeat time.Duration) *Deduper { d := &Deduper{ next: next, repeat: repeat, @@ -62,6 +49,63 @@ func Dedupe(next log.Logger, repeat time.Duration) *Deduper { return d } +// Enabled returns true if the Deduper's internal slog.Logger is enabled at the +// provided context and log level, and returns false otherwise. It implements +// slog.Handler. +func (d *Deduper) Enabled(ctx context.Context, level slog.Level) bool { + return d.next.Enabled(ctx, level) +} + +// Handle uses the provided context and slog.Record to deduplicate messages +// every 1m. Log records received within the interval are not acted on, and +// thus dropped. Log records that pass deduplication and need action invoke the +// Handle() method on the Deduper's internal slog.Logger's handler, effectively +// chaining log calls to the internal slog.Logger. +func (d *Deduper) Handle(ctx context.Context, r slog.Record) error { + line := r.Message + d.mtx.RLock() + last, ok := d.seen[line] + d.mtx.RUnlock() + + if ok && time.Since(last) < d.repeat { + return nil + } + + d.mtx.Lock() + if len(d.seen) < maxEntries { + d.seen[line] = time.Now() + } + d.mtx.Unlock() + + return d.next.Handler().Handle(ctx, r.Clone()) +} + +// WithAttrs adds the provided attributes to the Deduper's internal +// slog.Logger. It implements slog.Handler. +func (d *Deduper) WithAttrs(attrs []slog.Attr) slog.Handler { + return &Deduper{ + next: slog.New(d.next.Handler().WithAttrs(attrs)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } +} + +// WithGroup adds the provided group name to the Deduper's internal +// slog.Logger. It implements slog.Handler. +func (d *Deduper) WithGroup(name string) slog.Handler { + if name == "" { + return d + } + + return &Deduper{ + next: slog.New(d.next.Handler().WithGroup(name)), + repeat: d.repeat, + quit: d.quit, + seen: d.seen, + } +} + // Stop the Deduper. func (d *Deduper) Stop() { close(d.quit) @@ -87,44 +131,3 @@ func (d *Deduper) run() { } } } - -// Log implements log.Logger. -func (d *Deduper) Log(keyvals ...interface{}) error { - line, err := encode(keyvals...) - if err != nil { - return err - } - - d.mtx.RLock() - last, ok := d.seen[line] - d.mtx.RUnlock() - - if ok && time.Since(last) < d.repeat { - return nil - } - - d.mtx.Lock() - if len(d.seen) < maxEntries { - d.seen[line] = time.Now() - } - d.mtx.Unlock() - - return d.next.Log(keyvals...) -} - -func encode(keyvals ...interface{}) (string, error) { - enc := logfmtEncoderPool.Get().(*logfmtEncoder) - enc.buf.Reset() - defer logfmtEncoderPool.Put(enc) - - if err := enc.EncodeKeyvals(keyvals...); err != nil { - return "", err - } - - // Add newline to the end of the buffer - if err := enc.EndRecord(); err != nil { - return "", err - } - - return enc.buf.String(), nil -} diff --git a/util/logging/dedupe_test.go b/util/logging/dedupe_test.go index e05d6454c5..5baa90b038 100644 --- a/util/logging/dedupe_test.go +++ b/util/logging/dedupe_test.go @@ -14,34 +14,45 @@ package logging import ( + "bytes" + "log/slog" + "strings" "testing" "time" + "github.com/prometheus/common/promslog" "github.com/stretchr/testify/require" ) -type counter int - -func (c *counter) Log(...interface{}) error { - (*c)++ - return nil -} - func TestDedupe(t *testing.T) { - var c counter - d := Dedupe(&c, 100*time.Millisecond) + var buf bytes.Buffer + d := Dedupe(promslog.New(&promslog.Config{Writer: &buf}), 100*time.Millisecond) + dlog := slog.New(d) defer d.Stop() // Log 10 times quickly, ensure they are deduped. for i := 0; i < 10; i++ { - err := d.Log("msg", "hello") - require.NoError(t, err) + dlog.Info("test", "hello", "world") } - require.Equal(t, 1, int(c)) + + // Trim empty lines + lines := []string{} + for _, line := range strings.Split(buf.String(), "\n") { + if line != "" { + lines = append(lines, line) + } + } + require.Len(t, lines, 1) // Wait, then log again, make sure it is logged. time.Sleep(200 * time.Millisecond) - err := d.Log("msg", "hello") - require.NoError(t, err) - require.Equal(t, 2, int(c)) + dlog.Info("test", "hello", "world") + // Trim empty lines + lines = []string{} + for _, line := range strings.Split(buf.String(), "\n") { + if line != "" { + lines = append(lines, line) + } + } + require.Len(t, lines, 2) } diff --git a/util/logging/file.go b/util/logging/file.go index 2afa828547..9db7fb722b 100644 --- a/util/logging/file.go +++ b/util/logging/file.go @@ -14,21 +14,17 @@ package logging import ( + "context" "fmt" + "log/slog" "os" - "time" - "github.com/go-kit/log" + "github.com/prometheus/common/promslog" ) -var timestampFormat = log.TimestampFormat( - func() time.Time { return time.Now().UTC() }, - "2006-01-02T15:04:05.000Z07:00", -) - -// JSONFileLogger represents a logger that writes JSON to a file. +// JSONFileLogger represents a logger that writes JSON to a file. It implements the promql.QueryLogger interface. type JSONFileLogger struct { - logger log.Logger + logger *slog.Logger file *os.File } @@ -40,21 +36,30 @@ func NewJSONFileLogger(s string) (*JSONFileLogger, error) { f, err := os.OpenFile(s, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666) if err != nil { - return nil, fmt.Errorf("can't create json logger: %w", err) + return nil, fmt.Errorf("can't create json log file: %w", err) } + jsonFmt := &promslog.AllowedFormat{} + _ = jsonFmt.Set("json") return &JSONFileLogger{ - logger: log.With(log.NewJSONLogger(f), "ts", timestampFormat), + logger: promslog.New(&promslog.Config{Format: jsonFmt, Writer: f}), file: f, }, nil } -// Close closes the underlying file. +// Close closes the underlying file. It implements the promql.QueryLogger interface. func (l *JSONFileLogger) Close() error { return l.file.Close() } -// Log calls the Log function of the underlying logger. -func (l *JSONFileLogger) Log(i ...interface{}) error { - return l.logger.Log(i...) +// With calls the `With()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) With(args ...any) { + l.logger = l.logger.With(args...) +} + +// Log calls the `Log()` method on the underlying `log/slog.Logger` with the +// provided msg and args. It implements the promql.QueryLogger interface. +func (l *JSONFileLogger) Log(ctx context.Context, level slog.Level, msg string, args ...any) { + l.logger.Log(ctx, level, msg, args...) } diff --git a/util/logging/file_test.go b/util/logging/file_test.go index 0e760a4848..c9f7240fee 100644 --- a/util/logging/file_test.go +++ b/util/logging/file_test.go @@ -14,6 +14,8 @@ package logging import ( + "context" + "log/slog" "os" "strings" "testing" @@ -34,12 +36,13 @@ func TestJSONFileLogger_basic(t *testing.T) { require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l.Log("test", "yes") + l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world") require.NoError(t, err) r := make([]byte, 1024) _, err = f.Read(r) require.NoError(t, err) - result, err := regexp.Match(`^{"test":"yes","ts":"[^"]+"}\n`, r) + + result, err := regexp.Match(`^{"time":"[^"]+","level":"INFO","source":"file.go:\d+","msg":"test","hello":"world"}\n`, r) require.NoError(t, err) require.True(t, result, "unexpected content: %s", r) @@ -63,14 +66,14 @@ func TestJSONFileLogger_parallel(t *testing.T) { require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l.Log("test", "yes") + l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world") require.NoError(t, err) l2, err := NewJSONFileLogger(f.Name()) require.NoError(t, err) require.NotNil(t, l, "logger can't be nil") - err = l2.Log("test", "yes") + l2.Log(context.Background(), slog.LevelInfo, "test", "hello", "world") require.NoError(t, err) err = l.Close() diff --git a/util/notifications/notifications.go b/util/notifications/notifications.go new file mode 100644 index 0000000000..4888a0b664 --- /dev/null +++ b/util/notifications/notifications.go @@ -0,0 +1,185 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notifications + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + ConfigurationUnsuccessful = "Configuration reload has failed." + StartingUp = "Prometheus is starting and replaying the write-ahead log (WAL)." + ShuttingDown = "Prometheus is shutting down and gracefully stopping all operations." +) + +// Notification represents an individual notification message. +type Notification struct { + Text string `json:"text"` + Date time.Time `json:"date"` + Active bool `json:"active"` +} + +// Notifications stores a list of Notification objects. +// It also manages live subscribers that receive notifications via channels. +type Notifications struct { + mu sync.Mutex + notifications []Notification + subscribers map[chan Notification]struct{} // Active subscribers. + maxSubscribers int + + subscriberGauge prometheus.Gauge + notificationsSent prometheus.Counter + notificationsDropped prometheus.Counter +} + +// NewNotifications creates a new Notifications instance. +func NewNotifications(maxSubscribers int, reg prometheus.Registerer) *Notifications { + n := &Notifications{ + subscribers: make(map[chan Notification]struct{}), + maxSubscribers: maxSubscribers, + subscriberGauge: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_active_subscribers", + Help: "The current number of active notification subscribers.", + }), + notificationsSent: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_updates_sent_total", + Help: "Total number of notification updates sent.", + }), + notificationsDropped: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "api", + Name: "notification_updates_dropped_total", + Help: "Total number of notification updates dropped.", + }), + } + + if reg != nil { + reg.MustRegister(n.subscriberGauge, n.notificationsSent, n.notificationsDropped) + } + + return n +} + +// AddNotification adds a new notification or updates the timestamp if it already exists. +func (n *Notifications) AddNotification(text string) { + n.mu.Lock() + defer n.mu.Unlock() + + for i, notification := range n.notifications { + if notification.Text == text { + n.notifications[i].Date = time.Now() + + n.notifySubscribers(n.notifications[i]) + return + } + } + + newNotification := Notification{ + Text: text, + Date: time.Now(), + Active: true, + } + n.notifications = append(n.notifications, newNotification) + + n.notifySubscribers(newNotification) +} + +// notifySubscribers sends a notification to all active subscribers. +func (n *Notifications) notifySubscribers(notification Notification) { + for sub := range n.subscribers { + // Non-blocking send to avoid subscriber blocking issues. + n.notificationsSent.Inc() + select { + case sub <- notification: + // Notification sent to the subscriber. + default: + // Drop the notification if the subscriber's channel is full. + n.notificationsDropped.Inc() + } + } +} + +// DeleteNotification removes the first notification that matches the provided text. +// The deleted notification is sent to subscribers with Active: false before being removed. +func (n *Notifications) DeleteNotification(text string) { + n.mu.Lock() + defer n.mu.Unlock() + + // Iterate through the notifications to find the matching text. + for i, notification := range n.notifications { + if notification.Text == text { + // Mark the notification as inactive and notify subscribers. + notification.Active = false + n.notifySubscribers(notification) + + // Remove the notification from the list. + n.notifications = append(n.notifications[:i], n.notifications[i+1:]...) + return + } + } +} + +// Get returns a copy of the list of notifications for safe access outside the struct. +func (n *Notifications) Get() []Notification { + n.mu.Lock() + defer n.mu.Unlock() + + // Return a copy of the notifications slice to avoid modifying the original slice outside. + notificationsCopy := make([]Notification, len(n.notifications)) + copy(notificationsCopy, n.notifications) + return notificationsCopy +} + +// Sub allows a client to subscribe to live notifications. +// It returns a channel where the subscriber will receive notifications and a function to unsubscribe. +// Each subscriber has its own goroutine to handle notifications and prevent blocking. +func (n *Notifications) Sub() (<-chan Notification, func(), bool) { + n.mu.Lock() + defer n.mu.Unlock() + + if len(n.subscribers) >= n.maxSubscribers { + return nil, nil, false + } + + ch := make(chan Notification, 10) // Buffered channel to prevent blocking. + + // Add the new subscriber to the list. + n.subscribers[ch] = struct{}{} + n.subscriberGauge.Set(float64(len(n.subscribers))) + + // Send all current notifications to the new subscriber. + for _, notification := range n.notifications { + ch <- notification + } + + // Unsubscribe function to remove the channel from subscribers. + unsubscribe := func() { + n.mu.Lock() + defer n.mu.Unlock() + + // Close the channel and remove it from the subscribers map. + close(ch) + delete(n.subscribers, ch) + n.subscriberGauge.Set(float64(len(n.subscribers))) + } + + return ch, unsubscribe, true +} diff --git a/util/notifications/notifications_test.go b/util/notifications/notifications_test.go new file mode 100644 index 0000000000..e487e9ce54 --- /dev/null +++ b/util/notifications/notifications_test.go @@ -0,0 +1,223 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notifications + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// TestNotificationLifecycle tests adding, modifying, and deleting notifications. +func TestNotificationLifecycle(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Add a notification. + notifs.AddNotification("Test Notification 1") + + // Check if the notification was added. + notifications := notifs.Get() + require.Len(t, notifications, 1, "Expected 1 notification after addition.") + require.Equal(t, "Test Notification 1", notifications[0].Text, "Notification text mismatch.") + require.True(t, notifications[0].Active, "Expected notification to be active.") + + // Modify the notification. + notifs.AddNotification("Test Notification 1") + notifications = notifs.Get() + require.Len(t, notifications, 1, "Expected 1 notification after modification.") + + // Delete the notification. + notifs.DeleteNotification("Test Notification 1") + notifications = notifs.Get() + require.Empty(t, notifications, "Expected no notifications after deletion.") +} + +// TestSubscriberReceivesNotifications tests that a subscriber receives notifications, including modifications and deletions. +func TestSubscriberReceivesNotifications(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe to notifications. + sub, unsubscribe, ok := notifs.Sub() + require.True(t, ok) + + var wg sync.WaitGroup + wg.Add(1) + + receivedNotifications := make([]Notification, 0) + + // Goroutine to listen for notifications. + go func() { + defer wg.Done() + for notification := range sub { + receivedNotifications = append(receivedNotifications, notification) + } + }() + + // Add notifications. + notifs.AddNotification("Test Notification 1") + notifs.AddNotification("Test Notification 2") + + // Modify a notification. + notifs.AddNotification("Test Notification 1") + + // Delete a notification. + notifs.DeleteNotification("Test Notification 2") + + // Wait for notifications to propagate. + time.Sleep(100 * time.Millisecond) + + unsubscribe() + wg.Wait() // Wait for the subscriber goroutine to finish. + + // Verify that we received the expected number of notifications. + require.Len(t, receivedNotifications, 4, "Expected 4 notifications (2 active, 1 modified, 1 deleted).") + + // Check the content and state of received notifications. + expected := []struct { + Text string + Active bool + }{ + {"Test Notification 1", true}, + {"Test Notification 2", true}, + {"Test Notification 1", true}, + {"Test Notification 2", false}, + } + + for i, n := range receivedNotifications { + require.Equal(t, expected[i].Text, n.Text, "Notification text mismatch at index %d.", i) + require.Equal(t, expected[i].Active, n.Active, "Notification active state mismatch at index %d.", i) + } +} + +// TestMultipleSubscribers tests that multiple subscribers receive notifications independently. +func TestMultipleSubscribers(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe two subscribers to notifications. + sub1, unsubscribe1, ok1 := notifs.Sub() + require.True(t, ok1) + + sub2, unsubscribe2, ok2 := notifs.Sub() + require.True(t, ok2) + + var wg sync.WaitGroup + wg.Add(2) + + receivedSub1 := make([]Notification, 0) + receivedSub2 := make([]Notification, 0) + + // Goroutine for subscriber 1. + go func() { + defer wg.Done() + for notification := range sub1 { + receivedSub1 = append(receivedSub1, notification) + } + }() + + // Goroutine for subscriber 2. + go func() { + defer wg.Done() + for notification := range sub2 { + receivedSub2 = append(receivedSub2, notification) + } + }() + + // Add and delete notifications. + notifs.AddNotification("Test Notification 1") + notifs.DeleteNotification("Test Notification 1") + + // Wait for notifications to propagate. + time.Sleep(100 * time.Millisecond) + + // Unsubscribe both. + unsubscribe1() + unsubscribe2() + + wg.Wait() + + // Both subscribers should have received the same 2 notifications. + require.Len(t, receivedSub1, 2, "Expected 2 notifications for subscriber 1.") + require.Len(t, receivedSub2, 2, "Expected 2 notifications for subscriber 2.") + + // Verify that both subscribers received the same notifications. + for i := 0; i < 2; i++ { + require.Equal(t, receivedSub1[i], receivedSub2[i], "Subscriber notification mismatch at index %d.", i) + } +} + +// TestUnsubscribe tests that unsubscribing prevents further notifications from being received. +func TestUnsubscribe(t *testing.T) { + notifs := NewNotifications(10, nil) + + // Subscribe to notifications. + sub, unsubscribe, ok := notifs.Sub() + require.True(t, ok) + + var wg sync.WaitGroup + wg.Add(1) + + receivedNotifications := make([]Notification, 0) + + // Goroutine to listen for notifications. + go func() { + defer wg.Done() + for notification := range sub { + receivedNotifications = append(receivedNotifications, notification) + } + }() + + // Add a notification and then unsubscribe. + notifs.AddNotification("Test Notification 1") + time.Sleep(100 * time.Millisecond) // Allow time for notification delivery. + unsubscribe() // Unsubscribe. + + // Add another notification after unsubscribing. + notifs.AddNotification("Test Notification 2") + + // Wait for the subscriber goroutine to finish. + wg.Wait() + + // Only the first notification should have been received. + require.Len(t, receivedNotifications, 1, "Expected 1 notification before unsubscribe.") + require.Equal(t, "Test Notification 1", receivedNotifications[0].Text, "Unexpected notification text.") +} + +// TestMaxSubscribers tests that exceeding the max subscribers limit prevents additional subscriptions. +func TestMaxSubscribers(t *testing.T) { + maxSubscribers := 2 + notifs := NewNotifications(maxSubscribers, nil) + + // Subscribe the maximum number of subscribers. + _, unsubscribe1, ok1 := notifs.Sub() + require.True(t, ok1, "Expected first subscription to succeed.") + + _, unsubscribe2, ok2 := notifs.Sub() + require.True(t, ok2, "Expected second subscription to succeed.") + + // Try to subscribe more than the max allowed. + _, _, ok3 := notifs.Sub() + require.False(t, ok3, "Expected third subscription to fail due to max subscriber limit.") + + // Unsubscribe one subscriber and try again. + unsubscribe1() + + _, unsubscribe4, ok4 := notifs.Sub() + require.True(t, ok4, "Expected subscription to succeed after unsubscribing a subscriber.") + + // Clean up the subscriptions. + unsubscribe2() + unsubscribe4() +} diff --git a/util/runtime/limits_default.go b/util/runtime/limits_default.go index 0126adb1a8..156747d450 100644 --- a/util/runtime/limits_default.go +++ b/util/runtime/limits_default.go @@ -23,7 +23,7 @@ import ( // syscall.RLIM_INFINITY is a constant. // Its type is int on most architectures but there are exceptions such as loong64. -// Uniform it to uint accorind to the standard. +// Uniform it to uint according to the standard. // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_resource.h.html var unlimited uint64 = syscall.RLIM_INFINITY & math.MaxUint64 diff --git a/util/teststorage/storage.go b/util/teststorage/storage.go index 7d1f9dda24..e15d591e0c 100644 --- a/util/teststorage/storage.go +++ b/util/teststorage/storage.go @@ -30,15 +30,15 @@ import ( // New returns a new TestStorage for testing purposes // that removes all associated files on closing. -func New(t testutil.T) *TestStorage { - stor, err := NewWithError() +func New(t testutil.T, outOfOrderTimeWindow ...int64) *TestStorage { + stor, err := NewWithError(outOfOrderTimeWindow...) require.NoError(t, err) return stor } // NewWithError returns a new TestStorage for user facing tests, which reports // errors directly. -func NewWithError() (*TestStorage, error) { +func NewWithError(outOfOrderTimeWindow ...int64) (*TestStorage, error) { dir, err := os.MkdirTemp("", "test_storage") if err != nil { return nil, fmt.Errorf("opening test directory: %w", err) @@ -51,6 +51,14 @@ func NewWithError() (*TestStorage, error) { opts.MaxBlockDuration = int64(24 * time.Hour / time.Millisecond) opts.RetentionDuration = 0 opts.EnableNativeHistograms = true + + // Set OutOfOrderTimeWindow if provided, otherwise use default (0) + if len(outOfOrderTimeWindow) > 0 { + opts.OutOfOrderTimeWindow = outOfOrderTimeWindow[0] + } else { + opts.OutOfOrderTimeWindow = 0 // Default value is zero + } + db, err := tsdb.Open(dir, nil, nil, opts, tsdb.NewDBStats()) if err != nil { return nil, fmt.Errorf("opening test storage: %w", err) diff --git a/util/testutil/port.go b/util/testutil/port.go index 1e449b123d..7cf4cf1ccc 100644 --- a/util/testutil/port.go +++ b/util/testutil/port.go @@ -15,21 +15,56 @@ package testutil import ( "net" + "sync" "testing" ) +var ( + mu sync.Mutex + usedPorts []int +) + // RandomUnprivilegedPort returns valid unprivileged random port number which can be used for testing. func RandomUnprivilegedPort(t *testing.T) int { t.Helper() + mu.Lock() + defer mu.Unlock() + port, err := getPort() + if err != nil { + t.Fatal(err) + } + + for portWasUsed(port) { + port, err = getPort() + if err != nil { + t.Fatal(err) + } + } + + usedPorts = append(usedPorts, port) + + return port +} + +func portWasUsed(port int) bool { + for _, usedPort := range usedPorts { + if port == usedPort { + return true + } + } + return false +} + +func getPort() (int, error) { listener, err := net.Listen("tcp", ":0") if err != nil { - t.Fatalf("Listening on random port: %v", err) + return 0, err } if err := listener.Close(); err != nil { - t.Fatalf("Closing listener: %v", err) + return 0, err } - return listener.Addr().(*net.TCPAddr).Port + return listener.Addr().(*net.TCPAddr).Port, nil } diff --git a/util/treecache/treecache.go b/util/treecache/treecache.go index bbbaaf3d6e..4d4b6f544c 100644 --- a/util/treecache/treecache.go +++ b/util/treecache/treecache.go @@ -17,12 +17,11 @@ import ( "bytes" "errors" "fmt" + "log/slog" "strings" "sync" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/go-zookeeper/zk" "github.com/prometheus/client_golang/prometheus" ) @@ -47,19 +46,19 @@ func init() { prometheus.MustRegister(numWatchers) } -// ZookeeperLogger wraps a log.Logger into a zk.Logger. +// ZookeeperLogger wraps a *slog.Logger into a zk.Logger. type ZookeeperLogger struct { - logger log.Logger + logger *slog.Logger } // NewZookeeperLogger is a constructor for ZookeeperLogger. -func NewZookeeperLogger(logger log.Logger) ZookeeperLogger { +func NewZookeeperLogger(logger *slog.Logger) ZookeeperLogger { return ZookeeperLogger{logger: logger} } // Printf implements zk.Logger. func (zl ZookeeperLogger) Printf(s string, i ...interface{}) { - level.Info(zl.logger).Log("msg", fmt.Sprintf(s, i...)) + zl.logger.Info(s, i...) } // A ZookeeperTreeCache keeps data from all children of a Zookeeper path @@ -72,7 +71,7 @@ type ZookeeperTreeCache struct { wg *sync.WaitGroup head *zookeeperTreeCacheNode - logger log.Logger + logger *slog.Logger } // A ZookeeperTreeCacheEvent models a Zookeeper event for a path. @@ -90,7 +89,7 @@ type zookeeperTreeCacheNode struct { } // NewZookeeperTreeCache creates a new ZookeeperTreeCache for a given path. -func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent, logger log.Logger) *ZookeeperTreeCache { +func NewZookeeperTreeCache(conn *zk.Conn, path string, events chan ZookeeperTreeCacheEvent, logger *slog.Logger) *ZookeeperTreeCache { tc := &ZookeeperTreeCache{ conn: conn, prefix: path, @@ -144,20 +143,20 @@ func (tc *ZookeeperTreeCache) loop(path string) { err := tc.recursiveNodeUpdate(path, tc.head) if err != nil { - level.Error(tc.logger).Log("msg", "Error during initial read of Zookeeper", "err", err) + tc.logger.Error("Error during initial read of Zookeeper", "err", err) failure() } for { select { case ev := <-tc.head.events: - level.Debug(tc.logger).Log("msg", "Received Zookeeper event", "event", ev) + tc.logger.Debug("Received Zookeeper event", "event", ev) if failureMode { continue } if ev.Type == zk.EventNotWatching { - level.Info(tc.logger).Log("msg", "Lost connection to Zookeeper.") + tc.logger.Info("Lost connection to Zookeeper.") failure() } else { path := strings.TrimPrefix(ev.Path, tc.prefix) @@ -178,15 +177,15 @@ func (tc *ZookeeperTreeCache) loop(path string) { switch err := tc.recursiveNodeUpdate(ev.Path, node); { case err != nil: - level.Error(tc.logger).Log("msg", "Error during processing of Zookeeper event", "err", err) + tc.logger.Error("Error during processing of Zookeeper event", "err", err) failure() case tc.head.data == nil: - level.Error(tc.logger).Log("msg", "Error during processing of Zookeeper event", "err", "path no longer exists", "path", tc.prefix) + tc.logger.Error("Error during processing of Zookeeper event", "err", "path no longer exists", "path", tc.prefix) failure() } } case <-retryChan: - level.Info(tc.logger).Log("msg", "Attempting to resync state with Zookeeper") + tc.logger.Info("Attempting to resync state with Zookeeper") previousState := &zookeeperTreeCacheNode{ children: tc.head.children, } @@ -194,13 +193,13 @@ func (tc *ZookeeperTreeCache) loop(path string) { tc.head.children = make(map[string]*zookeeperTreeCacheNode) if err := tc.recursiveNodeUpdate(tc.prefix, tc.head); err != nil { - level.Error(tc.logger).Log("msg", "Error during Zookeeper resync", "err", err) + tc.logger.Error("Error during Zookeeper resync", "err", err) // Revert to our previous state. tc.head.children = previousState.children failure() } else { tc.resyncState(tc.prefix, tc.head, previousState) - level.Info(tc.logger).Log("msg", "Zookeeper resync successful") + tc.logger.Info("Zookeeper resync successful") failureMode = false } case <-tc.stop: diff --git a/util/zeropool/pool_test.go b/util/zeropool/pool_test.go index fea8200226..e9793f64d7 100644 --- a/util/zeropool/pool_test.go +++ b/util/zeropool/pool_test.go @@ -81,7 +81,7 @@ func TestPool(t *testing.T) { t.Run("does not allocate", func(t *testing.T) { pool := zeropool.New(func() []byte { return make([]byte, 1024) }) - // Warm up, this will alloate one slice. + // Warm up, this will allocate one slice. slice := pool.Get() pool.Put(slice) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index d58be211f2..392dfc6aab 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -15,8 +15,12 @@ package v1 import ( "context" + "crypto/sha1" + "encoding/hex" + "encoding/json" "errors" "fmt" + "log/slog" "math" "math/rand" "net" @@ -30,8 +34,6 @@ import ( "strings" "time" - "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/regexp" jsoniter "github.com/json-iterator/go" "github.com/munnerz/goautoneg" @@ -53,6 +55,7 @@ import ( "github.com/prometheus/prometheus/tsdb/index" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/httputil" + "github.com/prometheus/prometheus/util/notifications" "github.com/prometheus/prometheus/util/stats" ) @@ -202,16 +205,18 @@ type API struct { ready func(http.HandlerFunc) http.HandlerFunc globalURLOptions GlobalURLOptions - db TSDBAdminStats - dbDir string - enableAdmin bool - logger log.Logger - CORSOrigin *regexp.Regexp - buildInfo *PrometheusVersion - runtimeInfo func() (RuntimeInfo, error) - gatherer prometheus.Gatherer - isAgent bool - statsRenderer StatsRenderer + db TSDBAdminStats + dbDir string + enableAdmin bool + logger *slog.Logger + CORSOrigin *regexp.Regexp + buildInfo *PrometheusVersion + runtimeInfo func() (RuntimeInfo, error) + gatherer prometheus.Gatherer + isAgent bool + statsRenderer StatsRenderer + notificationsGetter func() []notifications.Notification + notificationsSub func() (<-chan notifications.Notification, func(), bool) remoteWriteHandler http.Handler remoteReadHandler http.Handler @@ -236,7 +241,7 @@ func NewAPI( db TSDBAdminStats, dbDir string, enableAdmin bool, - logger log.Logger, + logger *slog.Logger, rr func(context.Context) RulesRetriever, remoteReadSampleLimit int, remoteReadConcurrencyLimit int, @@ -245,6 +250,8 @@ func NewAPI( corsOrigin *regexp.Regexp, runtimeInfo func() (RuntimeInfo, error), buildInfo *PrometheusVersion, + notificationsGetter func() []notifications.Notification, + notificationsSub func() (<-chan notifications.Notification, func(), bool), gatherer prometheus.Gatherer, registerer prometheus.Registerer, statsRenderer StatsRenderer, @@ -261,22 +268,24 @@ func NewAPI( targetRetriever: tr, alertmanagerRetriever: ar, - now: time.Now, - config: configFunc, - flagsMap: flagsMap, - ready: readyFunc, - globalURLOptions: globalURLOptions, - db: db, - dbDir: dbDir, - enableAdmin: enableAdmin, - rulesRetriever: rr, - logger: logger, - CORSOrigin: corsOrigin, - runtimeInfo: runtimeInfo, - buildInfo: buildInfo, - gatherer: gatherer, - isAgent: isAgent, - statsRenderer: DefaultStatsRenderer, + now: time.Now, + config: configFunc, + flagsMap: flagsMap, + ready: readyFunc, + globalURLOptions: globalURLOptions, + db: db, + dbDir: dbDir, + enableAdmin: enableAdmin, + rulesRetriever: rr, + logger: logger, + CORSOrigin: corsOrigin, + runtimeInfo: runtimeInfo, + buildInfo: buildInfo, + gatherer: gatherer, + isAgent: isAgent, + statsRenderer: DefaultStatsRenderer, + notificationsGetter: notificationsGetter, + notificationsSub: notificationsSub, remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -366,6 +375,9 @@ func (api *API) Register(r *route.Router) { r.Get("/format_query", wrapAgent(api.formatQuery)) r.Post("/format_query", wrapAgent(api.formatQuery)) + r.Get("/parse_query", wrapAgent(api.parseQuery)) + r.Post("/parse_query", wrapAgent(api.parseQuery)) + r.Get("/labels", wrapAgent(api.labelNames)) r.Post("/labels", wrapAgent(api.labelNames)) r.Get("/label/:name/values", wrapAgent(api.labelValues)) @@ -387,6 +399,8 @@ func (api *API) Register(r *route.Router) { r.Get("/status/flags", wrap(api.serveFlags)) r.Get("/status/tsdb", wrapAgent(api.serveTSDBStatus)) r.Get("/status/walreplay", api.serveWALReplayStatus) + r.Get("/notifications", api.notifications) + r.Get("/notifications/live", api.notificationsSSE) r.Post("/read", api.ready(api.remoteRead)) r.Post("/write", api.ready(api.remoteWrite)) r.Post("/otlp/v1/metrics", api.ready(api.otlpWrite)) @@ -485,6 +499,15 @@ func (api *API) formatQuery(r *http.Request) (result apiFuncResult) { return apiFuncResult{expr.Pretty(0), nil, nil, nil} } +func (api *API) parseQuery(r *http.Request) apiFuncResult { + expr, err := parser.ParseExpr(r.FormValue("query")) + if err != nil { + return invalidParamError(err, "query") + } + + return apiFuncResult{data: translateAST(expr), err: nil, warnings: nil, finalizer: nil} +} + func extractQueryOpts(r *http.Request) (promql.QueryOpts, error) { var duration time.Duration @@ -721,7 +744,12 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { ctx := r.Context() name := route.Param(ctx, "name") - if !model.LabelNameRE.MatchString(name) { + if strings.HasPrefix(name, "U__") { + name = model.UnescapeName(name, model.ValueEncodingEscaping) + } + + label := model.LabelName(name) + if !label.IsValid() { return apiFuncResult{nil, &apiError{errorBadData, fmt.Errorf("invalid label name: %q", name)}, nil, nil} } @@ -812,12 +840,22 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { } var ( - // MinTime is the default timestamp used for the begin of optional time ranges. - // Exposed to let downstream projects to reference it. + // MinTime is the default timestamp used for the start of optional time ranges. + // Exposed to let downstream projects reference it. + // + // Historical note: This should just be time.Unix(math.MinInt64/1000, 0).UTC(), + // but it was set to a higher value in the past due to a misunderstanding. + // The value is still low enough for practical purposes, so we don't want + // to change it now, avoiding confusion for importers of this variable. MinTime = time.Unix(math.MinInt64/1000+62135596801, 0).UTC() // MaxTime is the default timestamp used for the end of optional time ranges. // Exposed to let downstream projects to reference it. + // + // Historical note: This should just be time.Unix(math.MaxInt64/1000, 0).UTC(), + // but it was set to a lower value in the past due to a misunderstanding. + // The value is still high enough for practical purposes, so we don't want + // to change it now, avoiding confusion for importers of this variable. MaxTime = time.Unix(math.MaxInt64/1000-62135596801, 999999999).UTC() minTimeFormatted = MinTime.Format(time.RFC3339Nano) @@ -884,7 +922,7 @@ func (api *API) series(r *http.Request) (result apiFuncResult) { s := q.Select(ctx, true, hints, mset...) sets = append(sets, s) } - set = storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) + set = storage.NewMergeSeriesSet(sets, 0, storage.ChainedSeriesMerge) } else { // At this point at least one match exists. set = q.Select(ctx, false, hints, matcherSets[0]...) @@ -1340,7 +1378,8 @@ func (api *API) metricMetadata(r *http.Request) apiFuncResult { // RuleDiscovery has info for all rules. type RuleDiscovery struct { - RuleGroups []*RuleGroup `json:"groups"` + RuleGroups []*RuleGroup `json:"groups"` + GroupNextToken string `json:"groupNextToken,omitempty"` } // RuleGroup has info for rules which are part of a group. @@ -1427,8 +1466,23 @@ func (api *API) rules(r *http.Request) apiFuncResult { return invalidParamError(err, "exclude_alerts") } + maxGroups, nextToken, parseErr := parseListRulesPaginationRequest(r) + if parseErr != nil { + return *parseErr + } + rgs := make([]*RuleGroup, 0, len(ruleGroups)) + + foundToken := false + for _, grp := range ruleGroups { + if maxGroups > 0 && nextToken != "" && !foundToken { + if nextToken != getRuleGroupNextToken(grp.File(), grp.Name()) { + continue + } + foundToken = true + } + if len(rgSet) > 0 { if _, ok := rgSet[grp.Name()]; !ok { continue @@ -1473,6 +1527,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { if !excludeAlerts { activeAlerts = rulesAlertsToAPIAlerts(rule.ActiveAlerts()) } + enrichedRule = AlertingRule{ State: rule.State().String(), Name: rule.Name(), @@ -1488,6 +1543,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { LastEvaluation: rule.GetEvaluationTimestamp(), Type: "alerting", } + case *rules.RecordingRule: if !returnRecording { break @@ -1514,9 +1570,20 @@ func (api *API) rules(r *http.Request) apiFuncResult { // If the rule group response has no rules, skip it - this means we filtered all the rules of this group. if len(apiRuleGroup.Rules) > 0 { + if maxGroups > 0 && len(rgs) == int(maxGroups) { + // We've reached the capacity of our page plus one. That means that for sure there will be at least one + // rule group in a subsequent request. Therefore a next token is required. + res.GroupNextToken = getRuleGroupNextToken(grp.File(), grp.Name()) + break + } rgs = append(rgs, apiRuleGroup) } } + + if maxGroups > 0 && nextToken != "" && !foundToken { + return invalidParamError(fmt.Errorf("invalid group_next_token '%v'. were rule groups changed?", nextToken), "group_next_token") + } + res.RuleGroups = rgs return apiFuncResult{res, nil, nil, nil} } @@ -1535,6 +1602,44 @@ func parseExcludeAlerts(r *http.Request) (bool, error) { return excludeAlerts, nil } +func parseListRulesPaginationRequest(r *http.Request) (int64, string, *apiFuncResult) { + var ( + parsedMaxGroups int64 = -1 + err error + ) + maxGroups := r.URL.Query().Get("group_limit") + nextToken := r.URL.Query().Get("group_next_token") + + if nextToken != "" && maxGroups == "" { + errResult := invalidParamError(errors.New("group_limit needs to be present in order to paginate over the groups"), "group_next_token") + return -1, "", &errResult + } + + if maxGroups != "" { + parsedMaxGroups, err = strconv.ParseInt(maxGroups, 10, 32) + if err != nil { + errResult := invalidParamError(fmt.Errorf("group_limit needs to be a valid number: %w", err), "group_limit") + return -1, "", &errResult + } + if parsedMaxGroups <= 0 { + errResult := invalidParamError(errors.New("group_limit needs to be greater than 0"), "group_limit") + return -1, "", &errResult + } + } + + if parsedMaxGroups > 0 { + return parsedMaxGroups, nextToken, nil + } + + return -1, "", nil +} + +func getRuleGroupNextToken(file, group string) string { + h := sha1.New() + h.Write([]byte(file + ";" + group)) + return hex.EncodeToString(h.Sum(nil)) +} + type prometheusConfig struct { YAML string `json:"yaml"` } @@ -1656,6 +1761,57 @@ func (api *API) serveWALReplayStatus(w http.ResponseWriter, r *http.Request) { }, nil, "") } +func (api *API) notifications(w http.ResponseWriter, r *http.Request) { + httputil.SetCORS(w, api.CORSOrigin, r) + api.respond(w, r, api.notificationsGetter(), nil, "") +} + +func (api *API) notificationsSSE(w http.ResponseWriter, r *http.Request) { + httputil.SetCORS(w, api.CORSOrigin, r) + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + // Subscribe to notifications. + notifications, unsubscribe, ok := api.notificationsSub() + if !ok { + w.WriteHeader(http.StatusNoContent) + return + } + defer unsubscribe() + + // Set up a flusher to push the response to the client. + flusher, ok := w.(http.Flusher) + if !ok { + http.Error(w, "Streaming unsupported", http.StatusInternalServerError) + return + } + + // Flush the response to ensure the headers are immediately and eventSource + // onopen is triggered client-side. + flusher.Flush() + + for { + select { + case notification := <-notifications: + // Marshal the notification to JSON. + jsonData, err := json.Marshal(notification) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + continue + } + + // Write the event data in SSE format with JSON content. + fmt.Fprintf(w, "data: %s\n\n", jsonData) + + // Flush the response to ensure the data is sent immediately. + flusher.Flush() + case <-r.Context().Done(): + return + } + } +} + func (api *API) remoteRead(w http.ResponseWriter, r *http.Request) { // This is only really for tests - this will never be nil IRL. if api.remoteReadHandler != nil { @@ -1677,7 +1833,7 @@ func (api *API) otlpWrite(w http.ResponseWriter, r *http.Request) { if api.otlpWriteHandler != nil { api.otlpWriteHandler.ServeHTTP(w, r) } else { - http.Error(w, "otlp write receiver needs to be enabled with --enable-feature=otlp-write-receiver", http.StatusNotFound) + http.Error(w, "otlp write receiver needs to be enabled with --web.enable-otlp-receiver", http.StatusNotFound) } } @@ -1780,7 +1936,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface b, err := codec.Encode(resp) if err != nil { - level.Error(api.logger).Log("msg", "error marshaling response", "url", req.URL, "err", err) + api.logger.Error("error marshaling response", "url", req.URL, "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -1788,7 +1944,7 @@ func (api *API) respond(w http.ResponseWriter, req *http.Request, data interface w.Header().Set("Content-Type", codec.ContentType().String()) w.WriteHeader(http.StatusOK) if n, err := w.Write(b); err != nil { - level.Error(api.logger).Log("msg", "error writing response", "url", req.URL, "bytesWritten", n, "err", err) + api.logger.Error("error writing response", "url", req.URL, "bytesWritten", n, "err", err) } } @@ -1818,7 +1974,7 @@ func (api *API) respondError(w http.ResponseWriter, apiErr *apiError, data inter Data: data, }) if err != nil { - level.Error(api.logger).Log("msg", "error marshaling json response", "err", err) + api.logger.Error("error marshaling json response", "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) return } @@ -1846,7 +2002,7 @@ func (api *API) respondError(w http.ResponseWriter, apiErr *apiError, data inter w.Header().Set("Content-Type", "application/json") w.WriteHeader(code) if n, err := w.Write(b); err != nil { - level.Error(api.logger).Log("msg", "error writing response", "bytesWritten", n, "err", err) + api.logger.Error("error writing response", "bytesWritten", n, "err", err) } } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index ef9d53dd9d..0168bc57e1 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -34,12 +34,11 @@ import ( "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/testutil" - "github.com/go-kit/log" jsoniter "github.com/json-iterator/go" "github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/prometheus/common/promlog" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/route" "github.com/stretchr/testify/require" @@ -238,7 +237,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule2 := rules.NewAlertingRule( "test_metric4", @@ -250,7 +249,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule3 := rules.NewAlertingRule( "test_metric5", @@ -262,7 +261,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.FromStrings("name", "tm5"), "", false, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule4 := rules.NewAlertingRule( "test_metric6", @@ -274,7 +273,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) rule5 := rules.NewAlertingRule( "test_metric7", @@ -286,7 +285,7 @@ func (m *rulesRetrieverMock) CreateAlertingRules() { labels.Labels{}, "", true, - log.NewNopLogger(), + promslog.NewNopLogger(), ) var r []*rules.AlertingRule r = append(r, rule1) @@ -314,7 +313,7 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { QueryFunc: rules.EngineQueryFunc(engine, storage), Appendable: storage, Context: context.Background(), - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, } @@ -339,7 +338,15 @@ func (m *rulesRetrieverMock) CreateRuleGroups() { ShouldRestore: false, Opts: opts, }) - m.ruleGroups = []*rules.Group{group} + group2 := rules.NewGroup(rules.GroupOptions{ + Name: "grp2", + File: "/path/to/file", + Interval: time.Second, + Rules: []rules.Rule{r[0]}, + ShouldRestore: false, + Opts: opts, + }) + m.ruleGroups = []*rules.Group{group, group2} } func (m *rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { @@ -380,6 +387,8 @@ func TestEndpoints(t *testing.T) { test_metric4{foo="bar", dup="1"} 1+0x100 test_metric4{foo="boo", dup="1"} 1+0x100 test_metric4{foo="boo"} 1+0x100 + test_metric5{"host.name"="localhost"} 1+0x100 + test_metric5{"junk\n{},=: chars"="bar"} 1+0x100 `) t.Cleanup(func() { storage.Close() }) @@ -471,20 +480,20 @@ func TestEndpoints(t *testing.T) { u, err := url.Parse(server.URL) require.NoError(t, err) - al := promlog.AllowedLevel{} + al := promslog.AllowedLevel{} require.NoError(t, al.Set("debug")) - af := promlog.AllowedFormat{} + af := promslog.AllowedFormat{} require.NoError(t, af.Set("logfmt")) - promlogConfig := promlog.Config{ + promslogConfig := promslog.Config{ Level: &al, Format: &af, } dbDir := t.TempDir() - remote := remote.NewStorage(promlog.New(&promlogConfig), prometheus.DefaultRegisterer, func() (int64, error) { + remote := remote.NewStorage(promslog.New(&promslogConfig), prometheus.DefaultRegisterer, func() (int64, error) { return 0, nil }, dbDir, 1*time.Second, nil, false) @@ -608,7 +617,7 @@ func TestGetSeries(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorExec, api: &API{ - Queryable: errorTestQueryable{err: fmt.Errorf("generic")}, + Queryable: errorTestQueryable{err: errors.New("generic")}, }, }, { @@ -616,7 +625,7 @@ func TestGetSeries(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorInternal, api: &API{ - Queryable: errorTestQueryable{err: promql.ErrStorage{Err: fmt.Errorf("generic")}}, + Queryable: errorTestQueryable{err: promql.ErrStorage{Err: errors.New("generic")}}, }, }, } { @@ -710,7 +719,7 @@ func TestQueryExemplars(t *testing.T) { name: "should return errorExec upon genetic error", expectedErrorType: errorExec, api: &API{ - ExemplarQueryable: errorTestQueryable{err: fmt.Errorf("generic")}, + ExemplarQueryable: errorTestQueryable{err: errors.New("generic")}, }, query: url.Values{ "query": []string{`test_metric3{foo="boo"} - test_metric4{foo="bar"}`}, @@ -722,7 +731,7 @@ func TestQueryExemplars(t *testing.T) { name: "should return errorInternal err type is ErrStorage", expectedErrorType: errorInternal, api: &API{ - ExemplarQueryable: errorTestQueryable{err: promql.ErrStorage{Err: fmt.Errorf("generic")}}, + ExemplarQueryable: errorTestQueryable{err: promql.ErrStorage{Err: errors.New("generic")}}, }, query: url.Values{ "query": []string{`test_metric3{foo="boo"} - test_metric4{foo="bar"}`}, @@ -831,7 +840,7 @@ func TestLabelNames(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorExec, api: &API{ - Queryable: errorTestQueryable{err: fmt.Errorf("generic")}, + Queryable: errorTestQueryable{err: errors.New("generic")}, }, }, { @@ -839,7 +848,7 @@ func TestLabelNames(t *testing.T) { matchers: []string{`{foo="boo"}`, `{foo="baz"}`}, expectedErrorType: errorInternal, api: &API{ - Queryable: errorTestQueryable{err: promql.ErrStorage{Err: fmt.Errorf("generic")}}, + Queryable: errorTestQueryable{err: promql.ErrStorage{Err: errors.New("generic")}}, }, }, } { @@ -1110,6 +1119,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E metadata []targetMetadata exemplars []exemplar.QueryResult zeroFunc func(interface{}) + nameValidationScheme model.ValidationScheme } rulesZeroFunc := func(i interface{}) { @@ -2242,6 +2252,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2330,6 +2359,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: nil, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2411,6 +2459,25 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, }, }, + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, }, }, zeroFunc: rulesZeroFunc, @@ -2682,6 +2749,167 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E }, zeroFunc: rulesZeroFunc, }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + }, + response: &RuleDiscovery{ + GroupNextToken: getRuleGroupNextToken("/path/to/file", "grp2"), + RuleGroups: []*RuleGroup{ + { + Name: "grp", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric4", + Query: "up == 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "pending", + Name: "test_metric5", + Query: "vector(1)", + Duration: 1, + Labels: labels.FromStrings("name", "tm5"), + Annotations: labels.Labels{}, + Alerts: []*Alert{ + { + Labels: labels.FromStrings("alertname", "test_metric5", "name", "tm5"), + Annotations: labels.Labels{}, + State: "pending", + Value: "1e+00", + }, + }, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric6", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("testlabel", "rule"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + AlertingRule{ + State: "inactive", + Name: "test_metric7", + Query: "up == 1", + Duration: 1, + Labels: labels.FromStrings("templatedlabel", "{{ $externalURL }}"), + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + RecordingRule{ + Name: "recording-rule-1", + Query: "vector(1)", + Labels: labels.Labels{}, + Health: "ok", + Type: "recording", + }, + RecordingRule{ + Name: "recording-rule-2", + Query: "vector(1)", + Labels: labels.FromStrings("testlabel", "rule"), + Health: "ok", + Type: "recording", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + response: &RuleDiscovery{ + RuleGroups: []*RuleGroup{ + { + Name: "grp2", + File: "/path/to/file", + Interval: 1, + Limit: 0, + Rules: []Rule{ + AlertingRule{ + State: "inactive", + Name: "test_metric3", + Query: "absent(test_metric3) != 1", + Duration: 1, + Labels: labels.Labels{}, + Annotations: labels.Labels{}, + Alerts: []*Alert{}, + Health: "ok", + Type: "alerting", + }, + }, + }, + }, + }, + zeroFunc: rulesZeroFunc, + }, + { // invalid pagination request + endpoint: api.rules, + query: url.Values{ + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // invalid group_limit + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"0"}, + "group_next_token": []string{getRuleGroupNextToken("/path/to/file", "grp2")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // Pagination token is invalid due to changes in the rule groups + endpoint: api.rules, + query: url.Values{ + "group_limit": []string{"1"}, + "group_next_token": []string{getRuleGroupNextToken("/removed/file", "notfound")}, + }, + errType: errorBadData, + zeroFunc: rulesZeroFunc, + }, + { // groupNextToken should not be in empty response + endpoint: api.rules, + query: url.Values{ + "match[]": []string{`{testlabel="abc-cannot-find"}`}, + "group_limit": []string{"1"}, + }, + responseAsJSON: `{"groups":[]}`, + }, { endpoint: api.queryExemplars, query: url.Values{ @@ -2779,6 +3007,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "test_metric2", "test_metric3", "test_metric4", + "test_metric5", }, }, { @@ -2791,13 +3020,36 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "boo", }, }, - // Bad name parameter. + // Bad name parameter for legacy validation. { endpoint: api.labelValues, params: map[string]string{ - "name": "not!!!allowed", + "name": "host.name", + }, + nameValidationScheme: model.LegacyValidation, + errType: errorBadData, + }, + // Valid utf8 name parameter for utf8 validation. + { + endpoint: api.labelValues, + params: map[string]string{ + "name": "host.name", + }, + nameValidationScheme: model.UTF8Validation, + response: []string{ + "localhost", + }, + }, + // Valid escaped utf8 name parameter for utf8 validation. + { + endpoint: api.labelValues, + params: map[string]string{ + "name": "U__junk_0a__7b__7d__2c__3d_:_20__20_chars", + }, + nameValidationScheme: model.UTF8Validation, + response: []string{ + "bar", }, - errType: errorBadData, }, // Start and end before LabelValues starts. { @@ -3033,15 +3285,15 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "name": "__name__", }, query: url.Values{ - "limit": []string{"4"}, + "limit": []string{"5"}, }, - responseLen: 4, // API does not specify which particular values will come back. + responseLen: 5, // API does not specify which particular values will come back. warningsCount: 0, // No warnings if limit isn't exceeded. }, // Label names. { endpoint: api.labelNames, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Start and end before Label names starts. { @@ -3059,7 +3311,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "start": []string{"1"}, "end": []string{"100"}, }, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Start before Label names, end within Label names. { @@ -3068,7 +3320,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "start": []string{"-1"}, "end": []string{"10"}, }, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Start before Label names starts, end after Label names ends. @@ -3078,7 +3330,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "start": []string{"-1"}, "end": []string{"100000"}, }, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Start with bad data for Label names, end within Label names. { @@ -3096,7 +3348,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "start": []string{"1"}, "end": []string{"1000000006"}, }, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Start and end after Label names ends. { @@ -3113,7 +3365,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "start": []string{"4"}, }, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Only provide End within Label names, don't provide a start time. { @@ -3121,7 +3373,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "end": []string{"20"}, }, - response: []string{"__name__", "dup", "foo"}, + response: []string{"__name__", "dup", "foo", "host.name", "junk\n{},=: chars"}, }, // Label names with bad matchers. { @@ -3189,9 +3441,9 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E { endpoint: api.labelNames, query: url.Values{ - "limit": []string{"3"}, + "limit": []string{"5"}, }, - responseLen: 3, // API does not specify which particular values will come back. + responseLen: 5, // API does not specify which particular values will come back. warningsCount: 0, // No warnings if limit isn't exceeded. }, }...) @@ -3227,6 +3479,8 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E ctx = route.WithParam(ctx, p, v) } + model.NameValidationScheme = test.nameValidationScheme + req, err := request(method, test.query) require.NoError(t, err) @@ -3530,7 +3784,7 @@ func TestAdminEndpoints(t *testing.T) { func TestRespondSuccess(t *testing.T) { api := API{ - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), } api.ClearCodecs() @@ -3622,7 +3876,7 @@ func TestRespondSuccess(t *testing.T) { func TestRespondSuccess_DefaultCodecCannotEncodeResponse(t *testing.T) { api := API{ - logger: log.NewNopLogger(), + logger: promslog.NewNopLogger(), } api.ClearCodecs() @@ -3645,7 +3899,7 @@ func TestRespondSuccess_DefaultCodecCannotEncodeResponse(t *testing.T) { require.Equal(t, http.StatusNotAcceptable, resp.StatusCode) require.Equal(t, "application/json", resp.Header.Get("Content-Type")) - require.Equal(t, `{"status":"error","errorType":"not_acceptable","error":"cannot encode response as application/default-format"}`, string(body)) + require.JSONEq(t, `{"status":"error","errorType":"not_acceptable","error":"cannot encode response as application/default-format"}`, string(body)) } func TestRespondError(t *testing.T) { @@ -4034,13 +4288,13 @@ func TestGetGlobalURL(t *testing.T) { false, }, { - mustParseURL(t, "http://exemple.com"), + mustParseURL(t, "http://example.com"), GlobalURLOptions{ ListenAddress: "127.0.0.1:9090", Host: "prometheus.io", Scheme: "https", }, - mustParseURL(t, "http://exemple.com"), + mustParseURL(t, "http://example.com"), false, }, { @@ -4176,7 +4430,7 @@ func TestExtractQueryOpts(t *testing.T) { if test.err == nil { require.NoError(t, err) } else { - require.Equal(t, test.err.Error(), err.Error()) + require.EqualError(t, err, test.err.Error()) } }) } diff --git a/web/api/v1/errors_test.go b/web/api/v1/errors_test.go index 7e1fc09d8a..f5e75615ec 100644 --- a/web/api/v1/errors_test.go +++ b/web/api/v1/errors_test.go @@ -23,9 +23,9 @@ import ( "testing" "time" - "github.com/go-kit/log" "github.com/grafana/regexp" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/promslog" "github.com/prometheus/common/route" "github.com/stretchr/testify/require" @@ -105,7 +105,7 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route t.Helper() engine := promqltest.NewTestEngineWithOpts(t, promql.EngineOpts{ - Logger: log.NewNopLogger(), + Logger: promslog.NewNopLogger(), Reg: nil, ActiveQueryTracker: nil, MaxSamples: 100, @@ -127,13 +127,15 @@ func createPrometheusAPI(t *testing.T, q storage.SampleAndChunkQueryable) *route nil, // Only needed for admin APIs. "", // This is for snapshots, which is disabled when admin APIs are disabled. Hence empty. false, // Disable admin APIs. - log.NewNopLogger(), + promslog.NewNopLogger(), func(context.Context) RulesRetriever { return &DummyRulesRetriever{} }, 0, 0, 0, // Remote read samples and concurrency limit. false, // Not an agent. regexp.MustCompile(".*"), func() (RuntimeInfo, error) { return RuntimeInfo{}, errors.New("not implemented") }, &PrometheusVersion{}, + nil, + nil, prometheus.DefaultGatherer, nil, nil, diff --git a/web/api/v1/translate_ast.go b/web/api/v1/translate_ast.go new file mode 100644 index 0000000000..afa11f16b9 --- /dev/null +++ b/web/api/v1/translate_ast.go @@ -0,0 +1,157 @@ +// Copyright 2024 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "strconv" + + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" +) + +// Take a Go PromQL AST and translate it to an object that's nicely JSON-serializable +// for the tree view in the UI. +// TODO: Could it make sense to do this via the normal JSON marshalling methods? Maybe +// too UI-specific though. +func translateAST(node parser.Expr) interface{} { + if node == nil { + return nil + } + + switch n := node.(type) { + case *parser.AggregateExpr: + return map[string]interface{}{ + "type": "aggregation", + "op": n.Op.String(), + "expr": translateAST(n.Expr), + "param": translateAST(n.Param), + "grouping": sanitizeList(n.Grouping), + "without": n.Without, + } + case *parser.BinaryExpr: + var matching interface{} + if m := n.VectorMatching; m != nil { + matching = map[string]interface{}{ + "card": m.Card.String(), + "labels": sanitizeList(m.MatchingLabels), + "on": m.On, + "include": sanitizeList(m.Include), + } + } + + return map[string]interface{}{ + "type": "binaryExpr", + "op": n.Op.String(), + "lhs": translateAST(n.LHS), + "rhs": translateAST(n.RHS), + "matching": matching, + "bool": n.ReturnBool, + } + case *parser.Call: + args := []interface{}{} + for _, arg := range n.Args { + args = append(args, translateAST(arg)) + } + + return map[string]interface{}{ + "type": "call", + "func": map[string]interface{}{ + "name": n.Func.Name, + "argTypes": n.Func.ArgTypes, + "variadic": n.Func.Variadic, + "returnType": n.Func.ReturnType, + }, + "args": args, + } + case *parser.MatrixSelector: + vs := n.VectorSelector.(*parser.VectorSelector) + return map[string]interface{}{ + "type": "matrixSelector", + "name": vs.Name, + "range": n.Range.Milliseconds(), + "offset": vs.OriginalOffset.Milliseconds(), + "matchers": translateMatchers(vs.LabelMatchers), + "timestamp": vs.Timestamp, + "startOrEnd": getStartOrEnd(vs.StartOrEnd), + } + case *parser.SubqueryExpr: + return map[string]interface{}{ + "type": "subquery", + "expr": translateAST(n.Expr), + "range": n.Range.Milliseconds(), + "offset": n.OriginalOffset.Milliseconds(), + "step": n.Step.Milliseconds(), + "timestamp": n.Timestamp, + "startOrEnd": getStartOrEnd(n.StartOrEnd), + } + case *parser.NumberLiteral: + return map[string]string{ + "type": "numberLiteral", + "val": strconv.FormatFloat(n.Val, 'f', -1, 64), + } + case *parser.ParenExpr: + return map[string]interface{}{ + "type": "parenExpr", + "expr": translateAST(n.Expr), + } + case *parser.StringLiteral: + return map[string]interface{}{ + "type": "stringLiteral", + "val": n.Val, + } + case *parser.UnaryExpr: + return map[string]interface{}{ + "type": "unaryExpr", + "op": n.Op.String(), + "expr": translateAST(n.Expr), + } + case *parser.VectorSelector: + return map[string]interface{}{ + "type": "vectorSelector", + "name": n.Name, + "offset": n.OriginalOffset.Milliseconds(), + "matchers": translateMatchers(n.LabelMatchers), + "timestamp": n.Timestamp, + "startOrEnd": getStartOrEnd(n.StartOrEnd), + } + } + panic("unsupported node type") +} + +func sanitizeList(l []string) []string { + if l == nil { + return []string{} + } + return l +} + +func translateMatchers(in []*labels.Matcher) interface{} { + out := []map[string]interface{}{} + for _, m := range in { + out = append(out, map[string]interface{}{ + "name": m.Name, + "value": m.Value, + "type": m.Type.String(), + }) + } + return out +} + +func getStartOrEnd(startOrEnd parser.ItemType) interface{} { + if startOrEnd == 0 { + return nil + } + + return startOrEnd.String() +} diff --git a/web/federate.go b/web/federate.go index 8176eba365..a1513e46c1 100644 --- a/web/federate.go +++ b/web/federate.go @@ -21,7 +21,6 @@ import ( "sort" "strings" - "github.com/go-kit/log/level" "github.com/gogo/protobuf/proto" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" @@ -101,7 +100,7 @@ func (h *Handler) federation(w http.ResponseWriter, req *http.Request) { sets = append(sets, s) } - set := storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge) + set := storage.NewMergeSeriesSet(sets, 0, storage.ChainedSeriesMerge) it := storage.NewBuffer(int64(h.lookbackDelta / 1e6)) var chkIter chunkenc.Iterator Loop: @@ -157,7 +156,7 @@ Loop: }) } if ws := set.Warnings(); len(ws) > 0 { - level.Debug(h.logger).Log("msg", "Federation select returned warnings", "warnings", ws) + h.logger.Debug("Federation select returned warnings", "warnings", ws) federationWarnings.Add(float64(len(ws))) } if set.Err() != nil { @@ -253,11 +252,11 @@ Loop: }) if err != nil { federationErrors.Inc() - level.Error(h.logger).Log("msg", "federation failed", "err", err) + h.logger.Error("federation failed", "err", err) return } if !nameSeen { - level.Warn(h.logger).Log("msg", "Ignoring nameless metric during federation", "metric", s.Metric) + h.logger.Warn("Ignoring nameless metric during federation", "metric", s.Metric) continue } // Attach global labels if they do not exist yet. @@ -314,7 +313,7 @@ Loop: if protMetricFam != nil { if err := enc.Encode(protMetricFam); err != nil { federationErrors.Inc() - level.Error(h.logger).Log("msg", "federation failed", "err", err) + h.logger.Error("federation failed", "err", err) } } } diff --git a/web/ui/README.md b/web/ui/README.md index 465adf5372..49ec27d8b4 100644 --- a/web/ui/README.md +++ b/web/ui/README.md @@ -1,4 +1,5 @@ ## Overview + The `ui` directory contains static files and templates used in the web UI. For easier distribution they are compressed (c.f. Makefile) and statically compiled into the Prometheus binary using the embed package. @@ -11,19 +12,48 @@ in `.promu.yml`, and then `make build` (or build Prometheus using This will serve all files from your local filesystem. This is for development purposes only. +### Using Prebuilt UI Assets + +If you are only working on the go backend, for faster builds, you can use +prebuilt web UI assets available with each Prometheus release +(`prometheus-web-ui-.tar.gz`). This allows you to skip building the UI +from source. + +1. Download and extract the prebuilt UI tarball: + ```bash + tar -xvf prometheus-web-ui-.tar.gz -C web/ui + ``` + +2. Build Prometheus using the prebuilt assets by passing the following parameter + to `make`: + ```bash + make PREBUILT_ASSETS_STATIC_DIR=web/ui/static build + ``` + +This will include the prebuilt UI files directly in the Prometheus binary, +avoiding the need to install npm or rebuild the frontend from source. + ## React-app ### Introduction -The react application is a monorepo composed by multiple different npm packages. The main one is `react-app` which -contains the code of the react application. +This directory contains two generations of Prometheus' React-based web UI: + +* `react-app`: The old 2.x web UI +* `mantine-ui`: The new 3.x web UI + +Both UIs are built and compiled into Prometheus. The new UI is served by default, but a feature flag +(`--enable-feature=old-ui`) can be used to switch back to serving the old UI. Then you have different npm packages located in the folder `modules`. These packages are supposed to be used by the -react-app and also by others consumers (like Thanos) +two React apps and also by others consumers (like Thanos). + +While most of these applications / modules are part of the same npm workspace, the old UI in the `react-app` directory +has been separated out of the workspace setup, since its dependencies were too incompatible. ### Pre-requisite -To be able to build the react application you need: +To be able to build either of the React applications, you need: * npm >= v7 * node >= v20 @@ -38,46 +68,50 @@ need to move to the directory `web/ui` and then download and install them locall npm consults the `package.json` and `package-lock.json` files for dependencies to install. It creates a `node_modules` directory with all installed dependencies. -**NOTE**: Do not run `npm install` in the `react-app` folder or in any sub folder of the `module` directory. +**NOTE**: Do not run `npm install` in the `react-app` / `mantine-ui` folder or in any sub folder of the `module` directory. ### Upgrading npm dependencies -As it is a monorepo, when upgrading a dependency, you have to upgrade it in every packages that composed this monorepo ( -aka, in all sub folder of `module` and in `react-app`) +As it is a monorepo, when upgrading a dependency, you have to upgrade it in every packages that composed this monorepo +(aka, in all sub folders of `module` and `react-app` / `mantine-ui`) Then you have to run the command `npm install` in `web/ui` and not in a sub folder / sub package. It won't simply work. ### Running a local development server -You can start a development server for the React UI outside of a running Prometheus server by running: +You can start a development server for the new React UI outside of a running Prometheus server by running: npm start -This will open a browser window with the React app running on http://localhost:3000/. The page will reload if you make +(For the old UI, you will have to run the same command from the `react-app` subdirectory.) + +This will open a browser window with the React app running on http://localhost:5173/. The page will reload if you make edits to the source code. You will also see any lint errors in the console. -**NOTE**: It will reload only if you change the code in `react-app` folder. Any code changes in the folder `module` is +**NOTE**: It will reload only if you change the code in `mantine-ui` folder. Any code changes in the folder `module` is not considered by the command `npm start`. In order to see the changes in the react-app you will have to run `npm run build:module` -Due to a `"proxy": "http://localhost:9090"` setting in the `package.json` file, any API requests from the React UI are +Due to a `"proxy": "http://localhost:9090"` setting in the `mantine-ui/vite.config.ts` file, any API requests from the React UI are proxied to `localhost` on port `9090` by the development server. This allows you to run a normal Prometheus server to handle API requests, while iterating separately on the UI. - [browser] ----> [localhost:3000 (dev server)] --(proxy API requests)--> [localhost:9090 (Prometheus)] + [browser] ----> [localhost:5173 (dev server)] --(proxy API requests)--> [localhost:9090 (Prometheus)] ### Running tests -To run the test for the react-app and for all modules, you can simply run: +To run the test for the new React app and for all modules, you can simply run: ```bash npm test ``` -if you want to run the test only for a specific module, you need to go to the folder of the module and run +(For the old UI, you will have to run the same command from the `react-app` subdirectory.) + +If you want to run the test only for a specific module, you need to go to the folder of the module and run again `npm test`. -For example, in case you only want to run the test of the react-app, go to `web/ui/react-app` and run `npm test` +For example, in case you only want to run the test of the new React app, go to `web/ui/mantine-ui` and run `npm test` To generate an HTML-based test coverage report, run: @@ -93,7 +127,7 @@ running tests. ### Building the app for production -To build a production-optimized version of the React app to a `build` subdirectory, run: +To build a production-optimized version of both React app versions to a `static/{react-app,mantine-ui}` subdirectory, run: npm run build @@ -102,10 +136,10 @@ Prometheus `Makefile` when building the full binary. ### Integration into Prometheus -To build a Prometheus binary that includes a compiled-in version of the production build of the React app, change to the +To build a Prometheus binary that includes a compiled-in version of the production build of both React app versions, change to the root of the repository and run: make build -This installs dependencies via npm, builds a production build of the React app, and then finally compiles in all web +This installs dependencies via npm, builds a production build of both React apps, and then finally compiles in all web assets into the Prometheus binary. diff --git a/web/ui/build_ui.sh b/web/ui/build_ui.sh index 8761bfa1e3..360ec33ea8 100644 --- a/web/ui/build_ui.sh +++ b/web/ui/build_ui.sh @@ -21,6 +21,7 @@ then fi buildOrder=(lezer-promql codemirror-promql) +assetsDir="./static" function buildModule() { for module in "${buildOrder[@]}"; do @@ -31,9 +32,18 @@ function buildModule() { function buildReactApp() { echo "build react-app" - npm run build -w @prometheus-io/app - rm -rf ./static/react - mv ./react-app/build ./static/react + (cd react-app && npm run build) + mkdir -p ${assetsDir} + rm -rf ${assetsDir}/react-app + mv ./react-app/build ${assetsDir}/react-app +} + +function buildMantineUI() { + echo "build mantine-ui" + npm run build -w @prometheus-io/mantine-ui + mkdir -p ${assetsDir} + rm -rf ${assetsDir}/mantine-ui + mv ./mantine-ui/dist ${assetsDir}/mantine-ui } for i in "$@"; do @@ -41,6 +51,7 @@ for i in "$@"; do --all) buildModule buildReactApp + buildMantineUI shift ;; --build-module) diff --git a/web/ui/mantine-ui/.gitignore b/web/ui/mantine-ui/.gitignore new file mode 100644 index 0000000000..a547bf36d8 --- /dev/null +++ b/web/ui/mantine-ui/.gitignore @@ -0,0 +1,24 @@ +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +pnpm-debug.log* +lerna-debug.log* + +node_modules +dist +dist-ssr +*.local + +# Editor directories and files +.vscode/* +!.vscode/extensions.json +.idea +.DS_Store +*.suo +*.ntvs* +*.njsproj +*.sln +*.sw? diff --git a/web/ui/mantine-ui/eslint.config.mjs b/web/ui/mantine-ui/eslint.config.mjs new file mode 100644 index 0000000000..c3cc58920e --- /dev/null +++ b/web/ui/mantine-ui/eslint.config.mjs @@ -0,0 +1,71 @@ +import { fixupConfigRules } from '@eslint/compat'; +import reactRefresh from 'eslint-plugin-react-refresh'; +import globals from 'globals'; +import tsParser from '@typescript-eslint/parser'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import js from '@eslint/js'; +import { FlatCompat } from '@eslint/eslintrc'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const compat = new FlatCompat({ + baseDirectory: __dirname, + recommendedConfig: js.configs.recommended, + allConfig: js.configs.all +}); + +export default [{ + ignores: ['**/dist', '**/.eslintrc.cjs'], +}, ...fixupConfigRules(compat.extends( + 'eslint:recommended', + 'plugin:@typescript-eslint/recommended', + 'plugin:react-hooks/recommended', +)), { + plugins: { + 'react-refresh': reactRefresh, + }, + + languageOptions: { + globals: { + ...globals.browser, + }, + + parser: tsParser, + }, + + rules: { + 'react-refresh/only-export-components': ['warn', { + allowConstantExport: true, + }], + + // Disable the base rule as it can report incorrect errors + 'no-unused-vars': 'off', + + // Use the TypeScript-specific rule for unused vars + '@typescript-eslint/no-unused-vars': ['warn', { + argsIgnorePattern: '^_', + varsIgnorePattern: '^_', + caughtErrorsIgnorePattern: '^_', + }], + + 'prefer-const': ['error', { + destructuring: 'all', + }], + }, + }, + // Override for Node.js-based config files + { + files: ['postcss.config.cjs'], // Specify any other config files + languageOptions: { + ecmaVersion: 2021, // Optional, set ECMAScript version + sourceType: 'script', // For CommonJS (non-ESM) modules + globals: { + module: 'readonly', + require: 'readonly', + process: 'readonly', + __dirname: 'readonly', // Include other Node.js globals if needed + }, + }, + }, +]; diff --git a/web/ui/mantine-ui/index.html b/web/ui/mantine-ui/index.html new file mode 100644 index 0000000000..d2723488ac --- /dev/null +++ b/web/ui/mantine-ui/index.html @@ -0,0 +1,37 @@ + + + + + + + + + + + + TITLE_PLACEHOLDER + + +
+ + + diff --git a/web/ui/mantine-ui/package.json b/web/ui/mantine-ui/package.json new file mode 100644 index 0000000000..dc32eee96d --- /dev/null +++ b/web/ui/mantine-ui/package.json @@ -0,0 +1,73 @@ +{ + "name": "@prometheus-io/mantine-ui", + "private": true, + "version": "0.300.0", + "type": "module", + "scripts": { + "start": "vite", + "build": "tsc && vite build", + "lint": "eslint . --report-unused-disable-directives --max-warnings 0", + "lint:fix": "eslint . --report-unused-disable-directives --max-warnings 0 --fix", + "preview": "vite preview", + "test": "vitest" + }, + "dependencies": { + "@codemirror/autocomplete": "^6.18.1", + "@codemirror/language": "^6.10.2", + "@codemirror/lint": "^6.8.1", + "@codemirror/state": "^6.4.1", + "@codemirror/view": "^6.34.1", + "@floating-ui/dom": "^1.6.7", + "@lezer/common": "^1.2.1", + "@lezer/highlight": "^1.2.1", + "@mantine/code-highlight": "^7.13.1", + "@mantine/core": "^7.11.2", + "@mantine/dates": "^7.13.1", + "@mantine/hooks": "^7.11.2", + "@mantine/notifications": "^7.13.1", + "@microsoft/fetch-event-source": "^2.0.1", + "@nexucis/fuzzy": "^0.5.1", + "@nexucis/kvsearch": "^0.9.1", + "@prometheus-io/codemirror-promql": "0.300.0", + "@reduxjs/toolkit": "^2.2.1", + "@tabler/icons-react": "^3.19.0", + "@tanstack/react-query": "^5.59.0", + "@testing-library/jest-dom": "^6.5.0", + "@testing-library/react": "^16.0.1", + "@types/lodash": "^4.17.9", + "@types/sanitize-html": "^2.13.0", + "@uiw/react-codemirror": "^4.23.3", + "clsx": "^2.1.1", + "dayjs": "^1.11.10", + "lodash": "^4.17.21", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-infinite-scroll-component": "^6.1.0", + "react-redux": "^9.1.2", + "react-router-dom": "^6.26.2", + "sanitize-html": "^2.13.0", + "uplot": "^1.6.30", + "uplot-react": "^1.2.2", + "use-query-params": "^2.2.1" + }, + "devDependencies": { + "@eslint/compat": "^1.1.1", + "@eslint/eslintrc": "^3.1.0", + "@eslint/js": "^9.11.1", + "@types/react": "^18.3.5", + "@types/react-dom": "^18.3.0", + "@typescript-eslint/eslint-plugin": "^6.21.0", + "@typescript-eslint/parser": "^6.21.0", + "@vitejs/plugin-react": "^4.2.1", + "eslint": "^9.11.1", + "eslint-plugin-react-hooks": "^5.1.0-rc-e56f4ae3-20240830", + "eslint-plugin-react-refresh": "^0.4.12", + "globals": "^15.10.0", + "jsdom": "^25.0.1", + "postcss": "^8.4.47", + "postcss-preset-mantine": "^1.17.0", + "postcss-simple-vars": "^7.0.1", + "vite": "^5.4.8", + "vitest": "^2.1.1" + } +} diff --git a/web/ui/mantine-ui/postcss.config.cjs b/web/ui/mantine-ui/postcss.config.cjs new file mode 100644 index 0000000000..bfba0ddfae --- /dev/null +++ b/web/ui/mantine-ui/postcss.config.cjs @@ -0,0 +1,14 @@ +module.exports = { + plugins: { + 'postcss-preset-mantine': {}, + 'postcss-simple-vars': { + variables: { + 'mantine-breakpoint-xs': '36em', + 'mantine-breakpoint-sm': '48em', + 'mantine-breakpoint-md': '62em', + 'mantine-breakpoint-lg': '75em', + 'mantine-breakpoint-xl': '88em', + }, + }, + }, +}; diff --git a/web/ui/mantine-ui/public/favicon.svg b/web/ui/mantine-ui/public/favicon.svg new file mode 100644 index 0000000000..5c51f66d90 --- /dev/null +++ b/web/ui/mantine-ui/public/favicon.svg @@ -0,0 +1,50 @@ + + + +image/svg+xml \ No newline at end of file diff --git a/web/ui/mantine-ui/src/App.module.css b/web/ui/mantine-ui/src/App.module.css new file mode 100644 index 0000000000..2a66b14f36 --- /dev/null +++ b/web/ui/mantine-ui/src/App.module.css @@ -0,0 +1,40 @@ +.control { + display: block; + padding: var(--mantine-spacing-xs) var(--mantine-spacing-md); + border-radius: var(--mantine-radius-md); + font-weight: 500; + + @mixin hover { + background-color: var(--mantine-color-gray-8); + } +} + +.link { + display: block; + line-height: 1; + padding: rem(8px) rem(12px); + border-radius: var(--mantine-radius-sm); + text-decoration: none; + color: var(--mantine-color-gray-0); + font-size: var(--mantine-font-size-sm); + font-weight: 500; + background-color: transparent; + + @mixin hover { + background-color: var(--mantine-color-gray-6); + color: var(--mantine-color-gray-0); + } + + [data-mantine-color-scheme] &[aria-current="page"] { + background-color: var(--mantine-color-blue-filled); + color: var(--mantine-color-white); + } +} + +/* Font used for autocompletion item icons. */ +@font-face { + font-family: "codicon"; + src: + local("codicon"), + url(./fonts/codicon.ttf) format("truetype"); +} diff --git a/web/ui/mantine-ui/src/App.tsx b/web/ui/mantine-ui/src/App.tsx new file mode 100644 index 0000000000..3bec30fa31 --- /dev/null +++ b/web/ui/mantine-ui/src/App.tsx @@ -0,0 +1,449 @@ +import "@mantine/core/styles.css"; +import "@mantine/code-highlight/styles.css"; +import "@mantine/notifications/styles.css"; +import "@mantine/dates/styles.css"; +import "./mantine-overrides.css"; +import classes from "./App.module.css"; +import PrometheusLogo from "./images/prometheus-logo.svg"; + +import { + ActionIcon, + AppShell, + Box, + Burger, + Button, + Group, + MantineProvider, + Menu, + Skeleton, + Text, + createTheme, + rem, +} from "@mantine/core"; +import { useDisclosure } from "@mantine/hooks"; +import { + IconBell, + IconBellFilled, + IconBook, + IconChevronDown, + IconChevronRight, + IconCloudDataConnection, + IconDatabase, + IconDeviceDesktopAnalytics, + IconFlag, + IconHeartRateMonitor, + IconInfoCircle, + IconSearch, + IconServer, + IconServerCog, +} from "@tabler/icons-react"; +import { + BrowserRouter, + Link, + NavLink, + Navigate, + Route, + Routes, +} from "react-router-dom"; +import { IconTable } from "@tabler/icons-react"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import QueryPage from "./pages/query/QueryPage"; +import AlertsPage from "./pages/AlertsPage"; +import RulesPage from "./pages/RulesPage"; +import TargetsPage from "./pages/targets/TargetsPage"; +import StatusPage from "./pages/StatusPage"; +import TSDBStatusPage from "./pages/TSDBStatusPage"; +import FlagsPage from "./pages/FlagsPage"; +import ConfigPage from "./pages/ConfigPage"; +import AgentPage from "./pages/AgentPage"; +import { Suspense } from "react"; +import ErrorBoundary from "./components/ErrorBoundary"; +import { ThemeSelector } from "./components/ThemeSelector"; +import { Notifications } from "@mantine/notifications"; +import { useSettings } from "./state/settingsSlice"; +import SettingsMenu from "./components/SettingsMenu"; +import ReadinessWrapper from "./components/ReadinessWrapper"; +import NotificationsProvider from "./components/NotificationsProvider"; +import NotificationsIcon from "./components/NotificationsIcon"; +import { QueryParamProvider } from "use-query-params"; +import { ReactRouter6Adapter } from "use-query-params/adapters/react-router-6"; +import ServiceDiscoveryPage from "./pages/service-discovery/ServiceDiscoveryPage"; +import AlertmanagerDiscoveryPage from "./pages/AlertmanagerDiscoveryPage"; +import { actionIconStyle, navIconStyle } from "./styles"; + +const queryClient = new QueryClient(); + +const mainNavPages = [ + { + title: "Query", + path: "/query", + icon: , + element: , + inAgentMode: false, + }, + { + title: "Alerts", + path: "/alerts", + icon: , + element: , + inAgentMode: false, + }, +]; + +const monitoringStatusPages = [ + { + title: "Target health", + path: "/targets", + icon: , + element: , + inAgentMode: true, + }, + { + title: "Rule health", + path: "/rules", + icon: , + element: , + inAgentMode: false, + }, + { + title: "Service discovery", + path: "/service-discovery", + icon: , + element: , + inAgentMode: true, + }, +]; + +const serverStatusPages = [ + { + title: "Runtime & build information", + path: "/status", + icon: , + element: , + inAgentMode: true, + }, + { + title: "TSDB status", + path: "/tsdb-status", + icon: , + element: , + inAgentMode: false, + }, + { + title: "Command-line flags", + path: "/flags", + icon: , + element: , + inAgentMode: true, + }, + { + title: "Configuration", + path: "/config", + icon: , + element: , + inAgentMode: true, + }, + { + title: "Alertmanager discovery", + path: "/alertmanager-discovery", + icon: , + element: , + inAgentMode: false, + }, +]; + +const allStatusPages = [...monitoringStatusPages, ...serverStatusPages]; + +const theme = createTheme({ + colors: { + "codebox-bg": [ + "#f5f5f5", + "#e7e7e7", + "#cdcdcd", + "#b2b2b2", + "#9a9a9a", + "#8b8b8b", + "#848484", + "#717171", + "#656565", + "#575757", + ], + }, +}); + +const navLinkXPadding = "md"; + +function App() { + const [opened, { toggle }] = useDisclosure(); + + const { agentMode, consolesLink, pathPrefix } = useSettings(); + + const navLinks = ( + <> + {consolesLink && ( + + )} + + {mainNavPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + ))} + + + + {allStatusPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + + + } + /> + ))} + + + + } + /> + + + + Monitoring status + {monitoringStatusPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + {p.title} + + ))} + + + Server status + {serverStatusPages + .filter((p) => !agentMode || p.inAgentMode) + .map((p) => ( + + {p.title} + + ))} + + + + ); + + const navActionIcons = ( + <> + + + + + + + + ); + + return ( + + + + + + + + + + + + + + + + Prometheus{agentMode && " Agent"} + + + + {navLinks} + + + + {navActionIcons} + + + + + + + + {navLinks} + + {navActionIcons} + + + + + + + + {Array.from(Array(10), (_, i) => ( + + ))} + + } + > + + + } + /> + {agentMode ? ( + + + + } + /> + ) : ( + <> + + + + } + /> + + + + } + /> + + )} + {allStatusPages.map((p) => ( + {p.element} + } + /> + ))} + + + + + + {/* */} + + + + + ); +} + +export default App; diff --git a/web/ui/mantine-ui/src/Badge.module.css b/web/ui/mantine-ui/src/Badge.module.css new file mode 100644 index 0000000000..afa27cea2e --- /dev/null +++ b/web/ui/mantine-ui/src/Badge.module.css @@ -0,0 +1,58 @@ +.statsBadge { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-gray-8) + ); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-5)); +} + +.labelBadge { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-gray-8) + ); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-5)); +} + +.healthOk { + background-color: light-dark( + var(--mantine-color-green-1), + var(--mantine-color-green-9) + ); + color: light-dark(var(--mantine-color-green-9), var(--mantine-color-green-1)); +} + +.healthErr { + background-color: light-dark( + var(--mantine-color-red-1), + darken(var(--mantine-color-red-9), 0.25) + ); + color: light-dark(var(--mantine-color-red-9), var(--mantine-color-red-1)); +} + +.healthWarn { + background-color: light-dark( + var(--mantine-color-yellow-1), + var(--mantine-color-yellow-9) + ); + color: light-dark( + var(--mantine-color-yellow-9), + var(--mantine-color-yellow-1) + ); +} + +.healthInfo { + background-color: light-dark( + var(--mantine-color-blue-1), + var(--mantine-color-blue-9) + ); + color: light-dark(var(--mantine-color-blue-9), var(--mantine-color-blue-1)); +} + +.healthUnknown { + background-color: light-dark( + var(--mantine-color-gray-2), + var(--mantine-color-gray-7) + ); + color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-gray-4)); +} diff --git a/web/ui/mantine-ui/src/Panel.module.css b/web/ui/mantine-ui/src/Panel.module.css new file mode 100644 index 0000000000..0c90b70de0 --- /dev/null +++ b/web/ui/mantine-ui/src/Panel.module.css @@ -0,0 +1,19 @@ +.panelHealthOk { + border-left: 5px solid + light-dark(var(--mantine-color-green-3), var(--mantine-color-green-8)) !important; +} + +.panelHealthErr { + border-left: 5px solid + light-dark(var(--mantine-color-red-3), var(--mantine-color-red-9)) !important; +} + +.panelHealthWarn { + border-left: 5px solid + light-dark(var(--mantine-color-orange-3), var(--mantine-color-yellow-9)) !important; +} + +.panelHealthUnknown { + border-left: 5px solid + light-dark(var(--mantine-color-gray-3), var(--mantine-color-gray-6)) !important; +} diff --git a/web/ui/mantine-ui/src/api/api.ts b/web/ui/mantine-ui/src/api/api.ts new file mode 100644 index 0000000000..f1dd2b8c0c --- /dev/null +++ b/web/ui/mantine-ui/src/api/api.ts @@ -0,0 +1,131 @@ +import { QueryKey, useQuery, useSuspenseQuery } from "@tanstack/react-query"; +import { useSettings } from "../state/settingsSlice"; + +export const API_PATH = "api/v1"; + +export type SuccessAPIResponse = { + status: "success"; + data: T; + warnings?: string[]; + infos?: string[]; +}; + +export type ErrorAPIResponse = { + status: "error"; + errorType: string; + error: string; +}; + +export type APIResponse = SuccessAPIResponse | ErrorAPIResponse; + +const createQueryFn = + ({ + pathPrefix, + path, + params, + recordResponseTime, + }: { + pathPrefix: string; + path: string; + params?: Record; + recordResponseTime?: (time: number) => void; + }) => + async ({ signal }: { signal: AbortSignal }) => { + const queryString = params + ? `?${new URLSearchParams(params).toString()}` + : ""; + + try { + const startTime = Date.now(); + + const res = await fetch( + `${pathPrefix}/${API_PATH}${path}${queryString}`, + { + cache: "no-store", + credentials: "same-origin", + signal, + } + ); + + if ( + !res.ok && + !res.headers.get("content-type")?.startsWith("application/json") + ) { + // For example, Prometheus may send a 503 Service Unavailable response + // with a "text/plain" content type when it's starting up. But the API + // may also respond with a JSON error message and the same error code. + throw new Error(res.statusText); + } + + const apiRes = (await res.json()) as APIResponse; + + if (recordResponseTime) { + recordResponseTime(Date.now() - startTime); + } + + if (apiRes.status === "error") { + throw new Error( + apiRes.error !== undefined + ? apiRes.error + : 'missing "error" field in response JSON' + ); + } + + return apiRes as SuccessAPIResponse; + } catch (error) { + if (!(error instanceof Error)) { + throw new Error("Unknown error"); + } + + switch (error.name) { + case "TypeError": + throw new Error("Network error or unable to reach the server"); + case "SyntaxError": + throw new Error("Invalid JSON response"); + default: + throw error; + } + } + }; + +type QueryOptions = { + key?: QueryKey; + path: string; + params?: Record; + enabled?: boolean; + refetchInterval?: false | number; + recordResponseTime?: (time: number) => void; +}; + +export const useAPIQuery = ({ + key, + path, + params, + enabled, + recordResponseTime, + refetchInterval, +}: QueryOptions) => { + const { pathPrefix } = useSettings(); + + return useQuery>({ + queryKey: key !== undefined ? key : [path, params], + retry: false, + refetchOnWindowFocus: false, + refetchInterval: refetchInterval, + gcTime: 0, + enabled, + queryFn: createQueryFn({ pathPrefix, path, params, recordResponseTime }), + }); +}; + +export const useSuspenseAPIQuery = ({ key, path, params }: QueryOptions) => { + const { pathPrefix } = useSettings(); + + return useSuspenseQuery>({ + queryKey: key !== undefined ? key : [path, params], + retry: false, + refetchOnWindowFocus: false, + gcTime: 0, + queryFn: createQueryFn({ pathPrefix, path, params }), + }); +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/alertmanagers.ts b/web/ui/mantine-ui/src/api/responseTypes/alertmanagers.ts new file mode 100644 index 0000000000..2fcb8c23a3 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/alertmanagers.ts @@ -0,0 +1,10 @@ +export type AlertmanagerTarget = { + url: string; +}; + +// Result type for /api/v1/alertmanagers endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#alertmanagers +export type AlertmanagersResult = { + activeAlertmanagers: AlertmanagerTarget[]; + droppedAlertmanagers: AlertmanagerTarget[]; +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/config.ts b/web/ui/mantine-ui/src/api/responseTypes/config.ts new file mode 100644 index 0000000000..4f509f9e00 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/config.ts @@ -0,0 +1,5 @@ +// Result type for /api/v1/status/config endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#config +export default interface ConfigResult { + yaml: string; +} diff --git a/web/ui/mantine-ui/src/api/responseTypes/labelValues.ts b/web/ui/mantine-ui/src/api/responseTypes/labelValues.ts new file mode 100644 index 0000000000..3c69b79b5a --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/labelValues.ts @@ -0,0 +1,3 @@ +// Result type for /api/v1/label//values endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values +export type LabelValuesResult = string[]; diff --git a/web/ui/mantine-ui/src/api/responseTypes/metadata.ts b/web/ui/mantine-ui/src/api/responseTypes/metadata.ts new file mode 100644 index 0000000000..1bd168455c --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/metadata.ts @@ -0,0 +1,6 @@ +// Result type for /api/v1/alerts endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#querying-target-metadata +export type MetadataResult = Record< + string, + { type: string; help: string; unit: string }[] +>; diff --git a/web/ui/mantine-ui/src/api/responseTypes/notifications.ts b/web/ui/mantine-ui/src/api/responseTypes/notifications.ts new file mode 100644 index 0000000000..d6ebf68d41 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/notifications.ts @@ -0,0 +1,8 @@ +export interface Notification { + text: string; + date: string; + active: boolean; + modified: boolean; +} + +export type NotificationsResult = Notification[]; diff --git a/web/ui/mantine-ui/src/api/responseTypes/query.ts b/web/ui/mantine-ui/src/api/responseTypes/query.ts new file mode 100644 index 0000000000..327b049b5f --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/query.ts @@ -0,0 +1,51 @@ +export interface Metric { + [key: string]: string; +} + +export interface Histogram { + count: string; + sum: string; + buckets?: [number, string, string, string][]; +} + +export interface InstantSample { + metric: Metric; + value?: SampleValue; + histogram?: SampleHistogram; +} + +export interface RangeSamples { + metric: Metric; + values?: SampleValue[]; + histograms?: SampleHistogram[]; +} + +export type SampleValue = [number, string]; +export type SampleHistogram = [number, Histogram]; + +// Result type for /api/v1/query endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries +export type InstantQueryResult = + | { + resultType: "vector"; + result: InstantSample[]; + } + | { + resultType: "matrix"; + result: RangeSamples[]; + } + | { + resultType: "scalar"; + result: SampleValue; + } + | { + resultType: "string"; + result: SampleValue; + }; + +// Result type for /api/v1/query_range endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries +export type RangeQueryResult = { + resultType: "matrix"; + result: RangeSamples[]; +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/rules.ts b/web/ui/mantine-ui/src/api/responseTypes/rules.ts new file mode 100644 index 0000000000..13535315c7 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/rules.ts @@ -0,0 +1,63 @@ +type RuleState = "pending" | "firing" | "inactive"; + +export interface Alert { + labels: Record; + state: RuleState; + value: string; + annotations: Record; + activeAt: string; + keepFiringSince: string; +} + +type CommonRuleFields = { + name: string; + query: string; + evaluationTime: string; + health: "ok" | "unknown" | "err"; + lastError?: string; + lastEvaluation: string; +}; + +export type AlertingRule = { + type: "alerting"; + // For alerting rules, the 'labels' field is always present, even when there are no labels. + labels: Record; + annotations: Record; + duration: number; + keepFiringFor: number; + state: RuleState; + alerts: Alert[]; +} & CommonRuleFields; + +type RecordingRule = { + type: "recording"; + // For recording rules, the 'labels' field is only present when there are labels. + labels?: Record; +} & CommonRuleFields; + +export type Rule = AlertingRule | RecordingRule; + +interface RuleGroup { + name: string; + file: string; + interval: string; + rules: Rule[]; + evaluationTime: string; + lastEvaluation: string; +} + +export type AlertingRuleGroup = Omit & { + rules: AlertingRule[]; +}; + +// Result type for /api/v1/alerts endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#alerts +export interface RulesResult { + groups: RuleGroup[]; +} + +// Same as RulesResult above, but can be used when the caller ensures via a +// "type=alert" query parameter that all rules are alerting rules. +export interface AlertingRulesResult { + groups: AlertingRuleGroup[]; +} diff --git a/web/ui/mantine-ui/src/api/responseTypes/scrapePools.ts b/web/ui/mantine-ui/src/api/responseTypes/scrapePools.ts new file mode 100644 index 0000000000..793ad71571 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/scrapePools.ts @@ -0,0 +1,2 @@ +// Result type for /api/v1/scrape_pools endpoint. +export type ScrapePoolsResult = { scrapePools: string[] }; diff --git a/web/ui/mantine-ui/src/api/responseTypes/series.ts b/web/ui/mantine-ui/src/api/responseTypes/series.ts new file mode 100644 index 0000000000..0c07def223 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/series.ts @@ -0,0 +1,6 @@ +// Result type for /api/v1/series endpoint. + +import { Metric } from "./query"; + +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers +export type SeriesResult = Metric[]; diff --git a/web/ui/mantine-ui/src/api/responseTypes/targets.ts b/web/ui/mantine-ui/src/api/responseTypes/targets.ts new file mode 100644 index 0000000000..cc0e891f05 --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/targets.ts @@ -0,0 +1,29 @@ +export interface Labels { + [key: string]: string; +} + +export type Target = { + discoveredLabels: Labels; + labels: Labels; + scrapePool: string; + scrapeUrl: string; + globalUrl: string; + lastError: string; + lastScrape: string; + lastScrapeDuration: number; + health: string; + scrapeInterval: string; + scrapeTimeout: string; +}; + +export interface DroppedTarget { + discoveredLabels: Labels; +} + +// Result type for /api/v1/targets endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#targets +export type TargetsResult = { + activeTargets: Target[]; + droppedTargets: DroppedTarget[]; + droppedTargetCounts: Record; +}; diff --git a/web/ui/mantine-ui/src/api/responseTypes/tsdbStatus.ts b/web/ui/mantine-ui/src/api/responseTypes/tsdbStatus.ts new file mode 100644 index 0000000000..255249757f --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/tsdbStatus.ts @@ -0,0 +1,22 @@ +interface Stats { + name: string; + value: number; +} + +interface HeadStats { + numSeries: number; + numLabelPairs: number; + chunkCount: number; + minTime: number; + maxTime: number; +} + +// Result type for /api/v1/status/tsdb endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats +export interface TSDBStatusResult { + headStats: HeadStats; + seriesCountByMetricName: Stats[]; + labelValueCountByLabelName: Stats[]; + memoryInBytesByLabelName: Stats[]; + seriesCountByLabelValuePair: Stats[]; +} diff --git a/web/ui/mantine-ui/src/api/responseTypes/walreplay.ts b/web/ui/mantine-ui/src/api/responseTypes/walreplay.ts new file mode 100644 index 0000000000..b881ba750d --- /dev/null +++ b/web/ui/mantine-ui/src/api/responseTypes/walreplay.ts @@ -0,0 +1,7 @@ +// Result type for /api/v1/status/walreplay endpoint. +// See: https://prometheus.io/docs/prometheus/latest/querying/api/#wal-replay-stats +export interface WALReplayStatus { + min: number; + max: number; + current: number; +} diff --git a/web/ui/mantine-ui/src/codemirror/theme.ts b/web/ui/mantine-ui/src/codemirror/theme.ts new file mode 100644 index 0000000000..4649837593 --- /dev/null +++ b/web/ui/mantine-ui/src/codemirror/theme.ts @@ -0,0 +1,323 @@ +import { HighlightStyle } from "@codemirror/language"; +import { EditorView } from "@codemirror/view"; +import { tags } from "@lezer/highlight"; + +export const baseTheme = EditorView.theme({ + ".cm-content": { + paddingTop: "3px", + paddingBottom: "0px", + }, + "&.cm-editor": { + "&.cm-focused": { + outline: "none", + outline_fallback: "none", + }, + backgroundColor: "transparent", + }, + ".cm-scroller": { + overflow: "hidden", + fontFamily: '"DejaVu Sans Mono", monospace', + }, + ".cm-placeholder": { + fontFamily: + '-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans","Liberation Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji"', + }, + + ".cm-matchingBracket": { + fontWeight: "bold", + outline: "1px dashed transparent", + }, + ".cm-nonmatchingBracket": { borderColor: "red" }, + + ".cm-tooltip.cm-tooltip-autocomplete": { + "& > ul": { + maxHeight: "350px", + fontFamily: '"DejaVu Sans Mono", monospace', + maxWidth: "unset", + }, + "& > ul > li": { + padding: "2px 1em 2px 3px", + }, + minWidth: "30%", + }, + + ".cm-completionDetail": { + float: "right", + color: "#999", + }, + + ".cm-tooltip.cm-completionInfo": { + padding: "10px", + fontFamily: + "'Open Sans', 'Lucida Sans Unicode', 'Lucida Grande', sans-serif;", + border: "none", + minWidth: "250px", + maxWidth: "min-content", + }, + + ".cm-completionInfo.cm-completionInfo-right": { + "&:before": { + content: "' '", + height: "0", + position: "absolute", + width: "0", + left: "-20px", + borderWidth: "10px", + borderStyle: "solid", + borderColor: "transparent", + }, + marginTop: "-11px", + marginLeft: "12px", + }, + ".cm-completionInfo.cm-completionInfo-left": { + "&:before": { + content: "' '", + height: "0", + position: "absolute", + width: "0", + right: "-20px", + borderWidth: "10px", + borderStyle: "solid", + borderColor: "transparent", + }, + marginTop: "-11px", + marginRight: "12px", + }, + ".cm-completionInfo.cm-completionInfo-right-narrow": { + "&:before": { + content: "' '", + height: "0", + position: "absolute", + width: "0", + top: "-20px", + borderWidth: "10px", + borderStyle: "solid", + borderColor: "transparent", + }, + marginTop: "10px", + marginLeft: "150px", + }, + ".cm-completionMatchedText": { + textDecoration: "none", + fontWeight: "bold", + }, + + ".cm-selectionMatch": { + backgroundColor: "#e6f3ff", + }, + + ".cm-diagnostic": { + "&.cm-diagnostic-error": { + borderLeft: "3px solid #e65013", + }, + }, + + ".cm-completionIcon": { + boxSizing: "content-box", + fontSize: "16px", + lineHeight: "1", + marginRight: "10px", + verticalAlign: "top", + "&:after": { content: "'\\ea88'" }, + fontFamily: "codicon", + paddingRight: "0", + opacity: "1", + }, + + ".cm-completionIcon-function, .cm-completionIcon-method": { + "&:after": { content: "'\\ea8c'" }, + }, + ".cm-completionIcon-class": { + "&:after": { content: "'○'" }, + }, + ".cm-completionIcon-interface": { + "&:after": { content: "'◌'" }, + }, + ".cm-completionIcon-variable": { + "&:after": { content: "'𝑥'" }, + }, + ".cm-completionIcon-constant": { + "&:after": { content: "'\\eb5f'" }, + }, + ".cm-completionIcon-type": { + "&:after": { content: "'𝑡'" }, + }, + ".cm-completionIcon-enum": { + "&:after": { content: "'∪'" }, + }, + ".cm-completionIcon-property": { + "&:after": { content: "'□'" }, + }, + ".cm-completionIcon-keyword": { + "&:after": { content: "'\\eb62'" }, + }, + ".cm-completionIcon-namespace": { + "&:after": { content: "'▢'" }, + }, + ".cm-completionIcon-text": { + "&:after": { content: "'\\ea95'" }, + color: "#ee9d28", + }, +}); + +export const lightTheme = EditorView.theme( + { + ".cm-tooltip": { + backgroundColor: "#f8f8f8", + borderColor: "rgba(52, 79, 113, 0.2)", + }, + + ".cm-tooltip.cm-tooltip-autocomplete": { + "& li:hover": { + backgroundColor: "#ddd", + }, + "& > ul > li[aria-selected]": { + backgroundColor: "#d6ebff", + color: "unset", + }, + }, + + ".cm-tooltip.cm-completionInfo": { + backgroundColor: "#d6ebff", + }, + + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right": { + "&:before": { + borderRightColor: "#d6ebff", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right-narrow": { + "&:before": { + borderBottomColor: "#d6ebff", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-left": { + "&:before": { + borderLeftColor: "#d6ebff", + }, + }, + + ".cm-line": { + "&::selection": { + backgroundColor: "#add6ff", + }, + "& > span::selection": { + backgroundColor: "#add6ff", + }, + }, + + ".cm-matchingBracket": { + color: "#000", + backgroundColor: "#dedede", + }, + + ".cm-completionMatchedText": { + color: "#0066bf", + }, + + ".cm-completionIcon": { + color: "#007acc", + }, + + ".cm-completionIcon-constant": { + color: "#007acc", + }, + + ".cm-completionIcon-function, .cm-completionIcon-method": { + color: "#652d90", + }, + + ".cm-completionIcon-keyword": { + color: "#616161", + }, + }, + { dark: false } +); + +export const darkTheme = EditorView.theme( + { + ".cm-content": { + caretColor: "#fff", + }, + + ".cm-tooltip.cm-completionInfo": { + backgroundColor: "#333338", + }, + + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right": { + "&:before": { + borderRightColor: "#333338", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-right-narrow": { + "&:before": { + borderBottomColor: "#333338", + }, + }, + ".cm-tooltip > .cm-completionInfo.cm-completionInfo-left": { + "&:before": { + borderLeftColor: "#333338", + }, + }, + + ".cm-line": { + "&::selection": { + backgroundColor: "#767676", + }, + "& > span::selection": { + backgroundColor: "#767676", + }, + }, + + ".cm-matchingBracket, &.cm-focused .cm-matchingBracket": { + backgroundColor: "#616161", + }, + + ".cm-completionMatchedText": { + color: "#7dd3fc", + }, + + ".cm-completionIcon, .cm-completionIcon-constant": { + color: "#7dd3fc", + }, + + ".cm-completionIcon-function, .cm-completionIcon-method": { + color: "#d8b4fe", + }, + + ".cm-completionIcon-keyword": { + color: "#cbd5e1 !important", + }, + }, + { dark: true } +); + +export const promqlHighlighter = HighlightStyle.define([ + { tag: tags.number, color: "#09885a" }, + { tag: tags.string, color: "#a31515" }, + { tag: tags.keyword, color: "#008080" }, + { tag: tags.function(tags.variableName), color: "#008080" }, + { tag: tags.labelName, color: "#800000" }, + { tag: tags.operator }, + { tag: tags.modifier, color: "#008080" }, + { tag: tags.paren }, + { tag: tags.squareBracket }, + { tag: tags.brace }, + { tag: tags.invalid, color: "red" }, + { tag: tags.comment, color: "#888", fontStyle: "italic" }, +]); + +export const darkPromqlHighlighter = HighlightStyle.define([ + { tag: tags.number, color: "#22c55e" }, + { tag: tags.string, color: "#fca5a5" }, + { tag: tags.keyword, color: "#14bfad" }, + { tag: tags.function(tags.variableName), color: "#14bfad" }, + { tag: tags.labelName, color: "#ff8585" }, + { tag: tags.operator }, + { tag: tags.modifier, color: "#14bfad" }, + { tag: tags.paren }, + { tag: tags.squareBracket }, + { tag: tags.brace }, + { tag: tags.invalid, color: "#ff3d3d" }, + { tag: tags.comment, color: "#9ca3af", fontStyle: "italic" }, +]); diff --git a/web/ui/mantine-ui/src/components/CustomInfiniteScroll.tsx b/web/ui/mantine-ui/src/components/CustomInfiniteScroll.tsx new file mode 100644 index 0000000000..f926c299c5 --- /dev/null +++ b/web/ui/mantine-ui/src/components/CustomInfiniteScroll.tsx @@ -0,0 +1,54 @@ +import { ComponentType, useEffect, useState } from "react"; +import InfiniteScroll from "react-infinite-scroll-component"; + +const initialNumberOfItemsDisplayed = 50; + +export interface InfiniteScrollItemsProps { + items: T[]; +} + +interface CustomInfiniteScrollProps { + allItems: T[]; + child: ComponentType>; +} + +const CustomInfiniteScroll = ({ + allItems, + child, +}: CustomInfiniteScrollProps) => { + const [items, setItems] = useState(allItems.slice(0, 50)); + const [index, setIndex] = useState(initialNumberOfItemsDisplayed); + const [hasMore, setHasMore] = useState( + allItems.length > initialNumberOfItemsDisplayed + ); + const Child = child; + + useEffect(() => { + setItems(allItems.slice(0, initialNumberOfItemsDisplayed)); + setHasMore(allItems.length > initialNumberOfItemsDisplayed); + }, [allItems]); + + const fetchMoreData = () => { + if (items.length === allItems.length) { + setHasMore(false); + } else { + const newIndex = index + initialNumberOfItemsDisplayed; + setIndex(newIndex); + setItems(allItems.slice(0, newIndex)); + } + }; + + return ( + loading...} + dataLength={items.length} + height={items.length > 25 ? "75vh" : ""} + > + + + ); +}; + +export default CustomInfiniteScroll; diff --git a/web/ui/mantine-ui/src/components/EndpointLink.tsx b/web/ui/mantine-ui/src/components/EndpointLink.tsx new file mode 100644 index 0000000000..c9b6a8989c --- /dev/null +++ b/web/ui/mantine-ui/src/components/EndpointLink.tsx @@ -0,0 +1,58 @@ +import { Anchor, Badge, Group, Stack } from "@mantine/core"; +import { FC } from "react"; + +export interface EndpointLinkProps { + endpoint: string; + globalUrl: string; +} + +const EndpointLink: FC = ({ endpoint, globalUrl }) => { + let url: URL; + let search = ""; + let invalidURL = false; + try { + url = new URL(endpoint); + } catch (err: unknown) { + // In cases of IPv6 addresses with a Zone ID, URL may not be parseable. + // See https://github.com/prometheus/prometheus/issues/9760 + // In this case, we attempt to prepare a synthetic URL with the + // same query parameters, for rendering purposes. + invalidURL = true; + if (endpoint.indexOf("?") > -1) { + search = endpoint.substring(endpoint.indexOf("?")); + } + url = new URL("http://0.0.0.0" + search); + } + + const { host, pathname, protocol, searchParams }: URL = url; + const params = Array.from(searchParams.entries()); + const displayLink = invalidURL + ? endpoint.replace(search, "") + : `${protocol}//${host}${pathname}`; + return ( + + + {displayLink} + + {params.length > 0 && ( + + {params.map(([labelName, labelValue]: [string, string]) => { + return ( + + {`${labelName}="${labelValue}"`} + + ); + })} + + )} + + ); +}; + +export default EndpointLink; diff --git a/web/ui/mantine-ui/src/components/ErrorBoundary.tsx b/web/ui/mantine-ui/src/components/ErrorBoundary.tsx new file mode 100644 index 0000000000..6e7e4c0cb5 --- /dev/null +++ b/web/ui/mantine-ui/src/components/ErrorBoundary.tsx @@ -0,0 +1,58 @@ +import { Alert } from "@mantine/core"; +import { IconAlertTriangle } from "@tabler/icons-react"; +import { Component, ErrorInfo, ReactNode } from "react"; +import { useLocation } from "react-router-dom"; + +interface Props { + children?: ReactNode; + title?: string; +} + +interface State { + error: Error | null; +} + +class ErrorBoundary extends Component { + public state: State = { + error: null, + }; + + public static getDerivedStateFromError(error: Error): State { + // Update state so the next render will show the fallback UI. + return { error }; + } + + public componentDidCatch(error: Error, errorInfo: ErrorInfo) { + console.error("Uncaught error:", error, errorInfo); + } + + public render() { + if (this.state.error !== null) { + return ( + } + maw={500} + mx="auto" + mt="lg" + > + Error: {this.state.error.message} + + ); + } + + return this.props.children; + } +} + +const ResettingErrorBoundary = (props: Props) => { + const location = useLocation(); + return ( + + {props.children} + + ); +}; + +export default ResettingErrorBoundary; diff --git a/web/ui/mantine-ui/src/components/InfoPageCard.tsx b/web/ui/mantine-ui/src/components/InfoPageCard.tsx new file mode 100644 index 0000000000..f6797133c4 --- /dev/null +++ b/web/ui/mantine-ui/src/components/InfoPageCard.tsx @@ -0,0 +1,32 @@ +import { Card, Group } from "@mantine/core"; +import { IconProps } from "@tabler/icons-react"; +import { FC, ReactNode } from "react"; +import { infoPageCardTitleIconStyle } from "../styles"; + +const InfoPageCard: FC<{ + children: ReactNode; + title?: string; + icon?: React.ComponentType; +}> = ({ children, title, icon: Icon }) => { + return ( + + {title && ( + + {Icon && } + {title} + + )} + {children} + + ); +}; + +export default InfoPageCard; diff --git a/web/ui/mantine-ui/src/components/InfoPageStack.tsx b/web/ui/mantine-ui/src/components/InfoPageStack.tsx new file mode 100644 index 0000000000..eacc4f8318 --- /dev/null +++ b/web/ui/mantine-ui/src/components/InfoPageStack.tsx @@ -0,0 +1,12 @@ +import { Stack } from "@mantine/core"; +import { FC, ReactNode } from "react"; + +const InfoPageStack: FC<{ children: ReactNode }> = ({ children }) => { + return ( + + {children} + + ); +}; + +export default InfoPageStack; diff --git a/web/ui/mantine-ui/src/components/LabelBadges.tsx b/web/ui/mantine-ui/src/components/LabelBadges.tsx new file mode 100644 index 0000000000..8aa7135564 --- /dev/null +++ b/web/ui/mantine-ui/src/components/LabelBadges.tsx @@ -0,0 +1,39 @@ +import { Badge, BadgeVariant, Group, MantineColor, Stack } from "@mantine/core"; +import { FC } from "react"; +import { escapeString } from "../lib/escapeString"; +import badgeClasses from "../Badge.module.css"; +import { maybeQuoteLabelName } from "../promql/utils"; + +export interface LabelBadgesProps { + labels: Record; + variant?: BadgeVariant; + color?: MantineColor; + wrapper?: typeof Group | typeof Stack; +} + +export const LabelBadges: FC = ({ + labels, + variant, + color, + wrapper: Wrapper = Group, +}) => ( + + {Object.entries(labels).map(([k, v]) => { + return ( + + {maybeQuoteLabelName(k)}="{escapeString(v)}" + + ); + })} + +); diff --git a/web/ui/mantine-ui/src/components/NotificationsIcon.tsx b/web/ui/mantine-ui/src/components/NotificationsIcon.tsx new file mode 100644 index 0000000000..6d5afa1901 --- /dev/null +++ b/web/ui/mantine-ui/src/components/NotificationsIcon.tsx @@ -0,0 +1,106 @@ +import { + ActionIcon, + Indicator, + Popover, + Card, + Text, + Stack, + ScrollArea, + Group, + rem, +} from "@mantine/core"; +import { + IconAlertTriangle, + IconNetworkOff, + IconMessageExclamation, +} from "@tabler/icons-react"; +import { useNotifications } from "../state/useNotifications"; +import { actionIconStyle } from "../styles"; +import { useSettings } from "../state/settingsSlice"; +import { formatTimestamp } from "../lib/formatTime"; + +const NotificationsIcon = () => { + const { notifications, isConnectionError } = useNotifications(); + const { useLocalTime } = useSettings(); + + return notifications.length === 0 && !isConnectionError ? null : ( + + + + + + + + + + + + Notifications + + + {isConnectionError ? ( + + + + + + Real-time notifications interrupted. + + + Please refresh the page or check your connection. + + + + + ) : notifications.length === 0 ? ( + + No notifications + + ) : ( + notifications.map((notification, index) => ( + + + + + + {notification.text} + + + {formatTimestamp( + new Date(notification.date).valueOf() / 1000, + useLocalTime + )} + + + + + )) + )} + + + + + + ); +}; + +export default NotificationsIcon; diff --git a/web/ui/mantine-ui/src/components/NotificationsProvider.tsx b/web/ui/mantine-ui/src/components/NotificationsProvider.tsx new file mode 100644 index 0000000000..a331e524b0 --- /dev/null +++ b/web/ui/mantine-ui/src/components/NotificationsProvider.tsx @@ -0,0 +1,79 @@ +import React, { useEffect, useState } from 'react'; +import { useSettings } from '../state/settingsSlice'; +import { NotificationsContext } from '../state/useNotifications'; +import { Notification, NotificationsResult } from "../api/responseTypes/notifications"; +import { useAPIQuery } from '../api/api'; +import { fetchEventSource } from '@microsoft/fetch-event-source'; + +export const NotificationsProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const { pathPrefix } = useSettings(); + const [notifications, setNotifications] = useState([]); + const [isConnectionError, setIsConnectionError] = useState(false); + const [shouldFetchFromAPI, setShouldFetchFromAPI] = useState(false); + + const { data, isError } = useAPIQuery({ + path: '/notifications', + enabled: shouldFetchFromAPI, + refetchInterval: 10000, + }); + + useEffect(() => { + if (data && data.data) { + setNotifications(data.data); + } + setIsConnectionError(isError); + }, [data, isError]); + + useEffect(() => { + const controller = new AbortController(); + fetchEventSource(`${pathPrefix}/api/v1/notifications/live`, { + signal: controller.signal, + async onopen(response) { + if (response.ok) { + if (response.status === 200) { + setNotifications([]); + setIsConnectionError(false); + } else if (response.status === 204) { + controller.abort(); + setShouldFetchFromAPI(true); + } + } else { + setIsConnectionError(true); + throw new Error(`Unexpected response: ${response.status} ${response.statusText}`); + } + }, + onmessage(event) { + const notification: Notification = JSON.parse(event.data); + + setNotifications((prev: Notification[]) => { + const updatedNotifications = [...prev.filter((n: Notification) => n.text !== notification.text)]; + + if (notification.active) { + updatedNotifications.push(notification); + } + + return updatedNotifications; + }); + }, + onclose() { + throw new Error("Server closed the connection"); + }, + onerror() { + setIsConnectionError(true); + return 5000; + }, + }); + + return () => { + controller.abort(); + }; + }, [pathPrefix]); + + return ( + + {children} + + ); +}; + +export default NotificationsProvider; diff --git a/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx b/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx new file mode 100644 index 0000000000..2e471de5e3 --- /dev/null +++ b/web/ui/mantine-ui/src/components/ReadinessWrapper.tsx @@ -0,0 +1,122 @@ +import { FC, PropsWithChildren, useEffect, useState } from "react"; +import { IconAlertTriangle } from "@tabler/icons-react"; +import { useAppDispatch } from "../state/hooks"; +import { updateSettings, useSettings } from "../state/settingsSlice"; +import { useSuspenseAPIQuery } from "../api/api"; +import { WALReplayStatus } from "../api/responseTypes/walreplay"; +import { Progress, Alert, Stack } from "@mantine/core"; +import { useSuspenseQuery } from "@tanstack/react-query"; + +const STATUS_STARTING = "is starting up..."; +const STATUS_STOPPING = "is shutting down..."; +const STATUS_LOADING = "is not ready..."; + +const ReadinessLoader: FC = () => { + const { pathPrefix, agentMode } = useSettings(); + const dispatch = useAppDispatch(); + + // Query key is incremented every second to retrigger the status fetching. + const [queryKey, setQueryKey] = useState(0); + const [statusMessage, setStatusMessage] = useState(""); + + // Query readiness status. + const { data: ready } = useSuspenseQuery({ + queryKey: ["ready", queryKey], + retry: false, + refetchOnWindowFocus: false, + gcTime: 0, + queryFn: async ({ signal }: { signal: AbortSignal }) => { + try { + const res = await fetch(`${pathPrefix}/-/ready`, { + cache: "no-store", + credentials: "same-origin", + signal, + }); + switch (res.status) { + case 200: + setStatusMessage(""); // Clear any status message when ready. + return true; + case 503: + // Check the custom header `X-Prometheus-Stopping` for stopping information. + if (res.headers.get("X-Prometheus-Stopping") === "true") { + setStatusMessage(STATUS_STOPPING); + } else { + setStatusMessage(STATUS_STARTING); + } + + return false; + default: + throw new Error(res.statusText); + } + } catch (error) { + throw new Error("Unexpected error while fetching ready status"); + } + }, + }); + + // Only call WAL replay status API if the service is starting up. + const shouldQueryWALReplay = statusMessage === STATUS_STARTING; + + const { data: walData, isSuccess: walSuccess } = + useSuspenseAPIQuery({ + path: "/status/walreplay", + key: ["walreplay", queryKey], + enabled: shouldQueryWALReplay, // Only enabled when service is starting up. + }); + + useEffect(() => { + if (ready) { + dispatch(updateSettings({ ready: ready })); + } + }, [ready, dispatch]); + + useEffect(() => { + const interval = setInterval(() => setQueryKey((v) => v + 1), 1000); + return () => clearInterval(interval); + }, []); + + return ( + } + maw={500} + mx="auto" + mt="lg" + > + {shouldQueryWALReplay && walSuccess && walData && ( + + + Replaying WAL ({walData.data.current}/{walData.data.max}) + + + + )} + + ); +}; + +export const ReadinessWrapper: FC = ({ children }) => { + const { ready } = useSettings(); + + if (ready) { + return <>{children}; + } + + return ; +}; + +export default ReadinessWrapper; diff --git a/web/ui/mantine-ui/src/components/RuleDefinition.module.css b/web/ui/mantine-ui/src/components/RuleDefinition.module.css new file mode 100644 index 0000000000..15a8d4a420 --- /dev/null +++ b/web/ui/mantine-ui/src/components/RuleDefinition.module.css @@ -0,0 +1,15 @@ +.codebox { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-gray-9) + ); +} + +.queryButton { + opacity: 0; + transition: opacity 0.1s ease-in-out; +} + +.codebox:hover .queryButton { + opacity: 1; +} diff --git a/web/ui/mantine-ui/src/components/RuleDefinition.tsx b/web/ui/mantine-ui/src/components/RuleDefinition.tsx new file mode 100644 index 0000000000..f3c4c88485 --- /dev/null +++ b/web/ui/mantine-ui/src/components/RuleDefinition.tsx @@ -0,0 +1,116 @@ +import { + ActionIcon, + Badge, + Box, + Card, + Group, + Table, + Tooltip, + useComputedColorScheme, +} from "@mantine/core"; +import { IconClockPause, IconClockPlay, IconSearch } from "@tabler/icons-react"; +import { FC } from "react"; +import { formatPrometheusDuration } from "../lib/formatTime"; +import codeboxClasses from "./RuleDefinition.module.css"; +import { Rule } from "../api/responseTypes/rules"; +import CodeMirror, { EditorView } from "@uiw/react-codemirror"; +import { syntaxHighlighting } from "@codemirror/language"; +import { + baseTheme, + darkPromqlHighlighter, + lightTheme, + promqlHighlighter, +} from "../codemirror/theme"; +import { PromQLExtension } from "@prometheus-io/codemirror-promql"; +import { LabelBadges } from "./LabelBadges"; +import { useSettings } from "../state/settingsSlice"; +import { actionIconStyle, badgeIconStyle } from "../styles"; + +const promqlExtension = new PromQLExtension(); + +const RuleDefinition: FC<{ rule: Rule }> = ({ rule }) => { + const theme = useComputedColorScheme(); + const { pathPrefix } = useSettings(); + + return ( + <> + + + + + { + window.open( + `${pathPrefix}/query?g0.expr=${encodeURIComponent(rule.query)}&g0.tab=1`, + "_blank" + ); + }} + className={codeboxClasses.queryButton} + > + + + + + {rule.type === "alerting" && ( + + {rule.duration && ( + } + > + for: {formatPrometheusDuration(rule.duration * 1000)} + + )} + {rule.keepFiringFor && ( + } + > + keep_firing_for: {formatPrometheusDuration(rule.duration * 1000)} + + )} + + )} + {rule.labels && Object.keys(rule.labels).length > 0 && ( + + + + )} + {rule.type === "alerting" && Object.keys(rule.annotations).length > 0 && ( + + + {Object.entries(rule.annotations).map(([k, v]) => ( + + + {k} + + {v} + + ))} + +
+ )} + + ); +}; + +export default RuleDefinition; diff --git a/web/ui/mantine-ui/src/components/SettingsMenu.tsx b/web/ui/mantine-ui/src/components/SettingsMenu.tsx new file mode 100644 index 0000000000..ada252d57c --- /dev/null +++ b/web/ui/mantine-ui/src/components/SettingsMenu.tsx @@ -0,0 +1,113 @@ +import { Popover, ActionIcon, Fieldset, Checkbox, Stack } from "@mantine/core"; +import { IconSettings } from "@tabler/icons-react"; +import { FC } from "react"; +import { useAppDispatch } from "../state/hooks"; +import { updateSettings, useSettings } from "../state/settingsSlice"; +import { actionIconStyle } from "../styles"; + +const SettingsMenu: FC = () => { + const { + useLocalTime, + enableQueryHistory, + enableAutocomplete, + enableSyntaxHighlighting, + enableLinter, + showAnnotations, + } = useSettings(); + const dispatch = useAppDispatch(); + + return ( + + + + + + + + +
+ + dispatch( + updateSettings({ useLocalTime: event.currentTarget.checked }) + ) + } + /> +
+ +
+ + + dispatch( + updateSettings({ + enableQueryHistory: event.currentTarget.checked, + }) + ) + } + /> + + dispatch( + updateSettings({ + enableAutocomplete: event.currentTarget.checked, + }) + ) + } + /> + + dispatch( + updateSettings({ + enableSyntaxHighlighting: event.currentTarget.checked, + }) + ) + } + /> + + dispatch( + updateSettings({ + enableLinter: event.currentTarget.checked, + }) + ) + } + /> + +
+ +
+ + dispatch( + updateSettings({ + showAnnotations: event.currentTarget.checked, + }) + ) + } + /> +
+
+
+
+ ); +}; + +export default SettingsMenu; diff --git a/web/ui/mantine-ui/src/components/StateMultiSelect.tsx b/web/ui/mantine-ui/src/components/StateMultiSelect.tsx new file mode 100644 index 0000000000..4621c5b9e4 --- /dev/null +++ b/web/ui/mantine-ui/src/components/StateMultiSelect.tsx @@ -0,0 +1,143 @@ +import { FC } from "react"; +import { + CheckIcon, + Combobox, + ComboboxChevron, + ComboboxClearButton, + Group, + Pill, + PillsInput, + useCombobox, +} from "@mantine/core"; +import { IconHeartRateMonitor } from "@tabler/icons-react"; +import { inputIconStyle } from "../styles"; + +interface StatePillProps extends React.ComponentPropsWithoutRef<"div"> { + value: string; + onRemove?: () => void; +} + +export function StatePill({ value, onRemove, ...others }: StatePillProps) { + return ( + + {value} + + ); +} + +interface StateMultiSelectProps { + options: string[]; + optionClass: (option: string) => string; + optionCount?: (option: string) => number; + placeholder: string; + values: string[]; + onChange: (values: string[]) => void; +} + +export const StateMultiSelect: FC = ({ + options, + optionClass, + optionCount, + placeholder, + values, + onChange, +}) => { + const combobox = useCombobox({ + onDropdownClose: () => combobox.resetSelectedOption(), + onDropdownOpen: () => combobox.updateSelectedOptionIndex("active"), + }); + + const handleValueSelect = (val: string) => + onChange( + values.includes(val) ? values.filter((v) => v !== val) : [...values, val] + ); + + const handleValueRemove = (val: string) => + onChange(values.filter((v) => v !== val)); + + const renderedValues = values.map((item) => ( + handleValueRemove(item)} + key={item} + /> + )); + + return ( + + + combobox.toggleDropdown()} + miw={200} + leftSection={} + rightSection={ + values.length > 0 ? ( + onChange([])} /> + ) : ( + + ) + } + > + + {renderedValues.length > 0 ? ( + renderedValues + ) : ( + + )} + + + combobox.closeDropdown()} + onKeyDown={(event) => { + if (event.key === "Backspace") { + event.preventDefault(); + handleValueRemove(values[values.length - 1]); + } + }} + /> + + + + + + + + {options.map((value) => { + return ( + + + {values.includes(value) ? ( + + ) : null} + + + + ); + })} + + + + ); +}; diff --git a/web/ui/mantine-ui/src/components/ThemeSelector.tsx b/web/ui/mantine-ui/src/components/ThemeSelector.tsx new file mode 100644 index 0000000000..a7f5d1dcaa --- /dev/null +++ b/web/ui/mantine-ui/src/components/ThemeSelector.tsx @@ -0,0 +1,64 @@ +import { + useMantineColorScheme, + SegmentedControl, + rem, + MantineColorScheme, + Tooltip, +} from "@mantine/core"; +import { + IconMoonFilled, + IconSunFilled, + IconUserFilled, +} from "@tabler/icons-react"; +import { FC } from "react"; + +export const ThemeSelector: FC = () => { + const { colorScheme, setColorScheme } = useMantineColorScheme(); + const iconProps = { + style: { width: rem(20), height: rem(20), display: "block" }, + stroke: 1.5, + }; + + return ( + setColorScheme(v as MantineColorScheme)} + data={[ + { + value: "light", + label: ( + + + + ), + }, + { + value: "dark", + label: ( + + + + ), + }, + { + value: "auto", + label: ( + + + + ), + }, + ]} + /> + ); +}; diff --git a/web/ui/mantine-ui/src/fonts/codicon.ttf b/web/ui/mantine-ui/src/fonts/codicon.ttf new file mode 100644 index 0000000000..82acc8995b Binary files /dev/null and b/web/ui/mantine-ui/src/fonts/codicon.ttf differ diff --git a/web/ui/mantine-ui/src/images/prometheus-logo.svg b/web/ui/mantine-ui/src/images/prometheus-logo.svg new file mode 100644 index 0000000000..b914095ecf --- /dev/null +++ b/web/ui/mantine-ui/src/images/prometheus-logo.svg @@ -0,0 +1,19 @@ + + + + + + + + + + diff --git a/web/ui/mantine-ui/src/lib/escapeString.ts b/web/ui/mantine-ui/src/lib/escapeString.ts new file mode 100644 index 0000000000..5fc1955de8 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/escapeString.ts @@ -0,0 +1,4 @@ +// Used for escaping escape sequences and double quotes in double-quoted strings. +export const escapeString = (str: string) => { + return str.replace(/([\\"])/g, "\\$1"); +}; diff --git a/web/ui/mantine-ui/src/lib/formatFloatValue.ts b/web/ui/mantine-ui/src/lib/formatFloatValue.ts new file mode 100644 index 0000000000..6b45e626f3 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatFloatValue.ts @@ -0,0 +1,21 @@ +export const parsePrometheusFloat = (str: string): number => { + switch (str) { + case "+Inf": + return Infinity; + case "-Inf": + return -Infinity; + default: + return parseFloat(str); + } +}; + +export const formatPrometheusFloat = (num: number): string => { + switch (num) { + case Infinity: + return "+Inf"; + case -Infinity: + return "-Inf"; + default: + return num.toString(); + } +}; diff --git a/web/ui/mantine-ui/src/lib/formatSeries.ts b/web/ui/mantine-ui/src/lib/formatSeries.ts new file mode 100644 index 0000000000..0076590706 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatSeries.ts @@ -0,0 +1,24 @@ +import { + maybeQuoteLabelName, + metricContainsExtendedCharset, +} from "../promql/utils"; +import { escapeString } from "./escapeString"; + +// TODO: Maybe replace this with the new PromLens-derived serialization code in src/promql/serialize.ts? +export const formatSeries = (labels: { [key: string]: string }): string => { + if (labels === null) { + return "scalar"; + } + + if (metricContainsExtendedCharset(labels.__name__ || "")) { + return `{"${escapeString(labels.__name__)}",${Object.entries(labels) + .filter(([k]) => k !== "__name__") + .map(([k, v]) => `${maybeQuoteLabelName(k)}="${escapeString(v)}"`) + .join(", ")}}`; + } + + return `${labels.__name__ || ""}{${Object.entries(labels) + .filter(([k]) => k !== "__name__") + .map(([k, v]) => `${maybeQuoteLabelName(k)}="${escapeString(v)}"`) + .join(", ")}}`; +}; diff --git a/web/ui/mantine-ui/src/lib/formatTime.test.ts b/web/ui/mantine-ui/src/lib/formatTime.test.ts new file mode 100644 index 0000000000..597d6909b9 --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatTime.test.ts @@ -0,0 +1,110 @@ +import { humanizeDuration, formatPrometheusDuration } from "./formatTime"; + +describe("formatPrometheusDuration", () => { + test('returns "0s" for 0 milliseconds', () => { + expect(formatPrometheusDuration(0)).toBe("0s"); + }); + + test("formats milliseconds correctly", () => { + expect(formatPrometheusDuration(1)).toBe("1ms"); + expect(formatPrometheusDuration(999)).toBe("999ms"); + }); + + test("formats seconds correctly", () => { + expect(formatPrometheusDuration(1000)).toBe("1s"); + expect(formatPrometheusDuration(1500)).toBe("1s500ms"); + expect(formatPrometheusDuration(59999)).toBe("59s999ms"); + }); + + test("formats minutes correctly", () => { + expect(formatPrometheusDuration(60000)).toBe("1m"); + expect(formatPrometheusDuration(120000)).toBe("2m"); + expect(formatPrometheusDuration(3599999)).toBe("59m59s999ms"); + }); + + test("formats hours correctly", () => { + expect(formatPrometheusDuration(3600000)).toBe("1h"); + expect(formatPrometheusDuration(7200000)).toBe("2h"); + expect(formatPrometheusDuration(86399999)).toBe("23h59m59s999ms"); + }); + + test("formats days correctly", () => { + expect(formatPrometheusDuration(86400000)).toBe("1d"); + expect(formatPrometheusDuration(172800000)).toBe("2d"); + expect(formatPrometheusDuration(86400000 * 365 - 1)).toBe( + "364d23h59m59s999ms" + ); + }); + + test("handles negative durations", () => { + expect(formatPrometheusDuration(-1000)).toBe("-1s"); + expect(formatPrometheusDuration(-86400000)).toBe("-1d"); + }); + + test("combines multiple units correctly", () => { + expect( + formatPrometheusDuration(86400000 + 3600000 + 60000 + 1000 + 1) + ).toBe("1d1h1m1s1ms"); + }); + + test("omits zero values", () => { + expect(formatPrometheusDuration(86400000 + 1000)).toBe("1d1s"); + }); +}); + +describe("humanizeDuration", () => { + test('returns "0s" for 0 milliseconds', () => { + expect(humanizeDuration(0)).toBe("0s"); + }); + + test("formats submilliseconds correctly", () => { + expect(humanizeDuration(0.1)).toBe("0ms"); + expect(humanizeDuration(0.6)).toBe("1ms"); + expect(humanizeDuration(0.000001)).toBe("0ms"); + }); + + test("formats milliseconds correctly", () => { + expect(humanizeDuration(1)).toBe("1ms"); + expect(humanizeDuration(999)).toBe("999ms"); + }); + + test("formats seconds correctly", () => { + expect(humanizeDuration(1000)).toBe("1s"); + expect(humanizeDuration(1500)).toBe("1.5s"); + expect(humanizeDuration(59999)).toBe("59.999s"); + }); + + test("formats minutes correctly", () => { + expect(humanizeDuration(60000)).toBe("1m"); + expect(humanizeDuration(120000)).toBe("2m"); + expect(humanizeDuration(3599999)).toBe("59m 59.999s"); + }); + + test("formats hours correctly", () => { + expect(humanizeDuration(3600000)).toBe("1h"); + expect(humanizeDuration(7200000)).toBe("2h"); + expect(humanizeDuration(86399999)).toBe("23h 59m 59.999s"); + }); + + test("formats days correctly", () => { + expect(humanizeDuration(86400000)).toBe("1d"); + expect(humanizeDuration(172800000)).toBe("2d"); + expect(humanizeDuration(86400000 * 365 - 1)).toBe("364d 23h 59m 59.999s"); + expect(humanizeDuration(86400000 * 365 - 1)).toBe("364d 23h 59m 59.999s"); + }); + + test("handles negative durations", () => { + expect(humanizeDuration(-1000)).toBe("-1s"); + expect(humanizeDuration(-86400000)).toBe("-1d"); + }); + + test("combines multiple units correctly", () => { + expect(humanizeDuration(86400000 + 3600000 + 60000 + 1000 + 1)).toBe( + "1d 1h 1m 1.001s" + ); + }); + + test("omits zero values", () => { + expect(humanizeDuration(86400000 + 1000)).toBe("1d 1s"); + }); +}); diff --git a/web/ui/mantine-ui/src/lib/formatTime.ts b/web/ui/mantine-ui/src/lib/formatTime.ts new file mode 100644 index 0000000000..95beb21d6b --- /dev/null +++ b/web/ui/mantine-ui/src/lib/formatTime.ts @@ -0,0 +1,127 @@ +import dayjs from "dayjs"; +import duration from "dayjs/plugin/duration"; +dayjs.extend(duration); +import utc from "dayjs/plugin/utc"; +dayjs.extend(utc); + +// Parse Prometheus-specific duration strings such as "5m" or "1d2h3m4s" into milliseconds. +export const parsePrometheusDuration = (durationStr: string): number | null => { + if (durationStr === "") { + return null; + } + if (durationStr === "0") { + // Allow 0 without a unit. + return 0; + } + + const durationRE = new RegExp( + "^(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?$" + ); + const matches = durationStr.match(durationRE); + if (!matches) { + return null; + } + + let dur = 0; + + // Parse the match at pos `pos` in the regex and use `mult` to turn that + // into ms, then add that value to the total parsed duration. + const m = (pos: number, mult: number) => { + if (matches[pos] === undefined) { + return; + } + const n = parseInt(matches[pos]); + dur += n * mult; + }; + + m(2, 1000 * 60 * 60 * 24 * 365); // y + m(4, 1000 * 60 * 60 * 24 * 7); // w + m(6, 1000 * 60 * 60 * 24); // d + m(8, 1000 * 60 * 60); // h + m(10, 1000 * 60); // m + m(12, 1000); // s + m(14, 1); // ms + + return dur; +}; + +// Used by: +// - formatPrometheusDuration() => "5d5m2s123ms" +// - humanizeDuration() => "5d 5m 2.123s" +const formatDuration = ( + d: number, + componentSeparator?: string, + showFractionalSeconds?: boolean +): string => { + if (d === 0) { + return "0s"; + } + + const sign = d < 0 ? "-" : ""; + let ms = Math.abs(d); + const r: string[] = []; + + for (const { unit, mult, exact } of [ + // Only format years and weeks if the remainder is zero, as it is often + // easier to read 90d than 12w6d. + { unit: "y", mult: 1000 * 60 * 60 * 24 * 365, exact: true }, + { unit: "w", mult: 1000 * 60 * 60 * 24 * 7, exact: true }, + { unit: "d", mult: 1000 * 60 * 60 * 24, exact: false }, + { unit: "h", mult: 1000 * 60 * 60, exact: false }, + { unit: "m", mult: 1000 * 60, exact: false }, + { unit: "s", mult: 1000, exact: false }, + { unit: "ms", mult: 1, exact: false }, + ]) { + if (exact && ms % mult !== 0) { + continue; + } + const v = Math.floor(ms / mult); + if (v > 0) { + ms -= v * mult; + if (showFractionalSeconds && unit === "s" && ms > 0) { + // Show "2.34s" instead of "2s 340ms". + r.push(`${parseFloat((v + ms / 1000).toFixed(3))}s`); + break; + } else { + r.push(`${v}${unit}`); + } + } + if (r.length == 0 && unit == "ms") { + r.push(`${Math.round(ms)}ms`) + } + } + + return sign + r.join(componentSeparator || ""); +}; + +// Format a duration in milliseconds into a Prometheus duration string like "1d2h3m4s". +export const formatPrometheusDuration = (d: number): string => { + return formatDuration(d); +}; + +export function parseTime(timeText: string): number { + return dayjs.utc(timeText).valueOf(); +} + +export const now = (): number => dayjs().valueOf(); + +export const humanizeDuration = (milliseconds: number): string => { + return formatDuration(milliseconds, " ", true); +}; + +export const humanizeDurationRelative = ( + startStr: string, + end: number, + suffix: string = " ago" +): string => { + const start = parseTime(startStr); + if (start < 0) { + return "never"; + } + return humanizeDuration(end - start) + suffix; +}; + +export const formatTimestamp = (t: number, useLocalTime: boolean) => + useLocalTime + ? dayjs.unix(t).tz(dayjs.tz.guess()).format() + : dayjs.unix(t).utc().format(); diff --git a/web/ui/mantine-ui/src/main.tsx b/web/ui/mantine-ui/src/main.tsx new file mode 100644 index 0000000000..71b3cf937f --- /dev/null +++ b/web/ui/mantine-ui/src/main.tsx @@ -0,0 +1,15 @@ +import React from "react"; +import ReactDOM from "react-dom/client"; +import App from "./App.tsx"; +import store from "./state/store.ts"; +import { Provider } from "react-redux"; +import "./fonts/codicon.ttf"; +import "./promql.css"; + +ReactDOM.createRoot(document.getElementById("root")!).render( + + + + + +); diff --git a/web/ui/mantine-ui/src/mantine-overrides.css b/web/ui/mantine-ui/src/mantine-overrides.css new file mode 100644 index 0000000000..b9662f8d52 --- /dev/null +++ b/web/ui/mantine-ui/src/mantine-overrides.css @@ -0,0 +1,4 @@ +.mantine-Badge-label { + overflow: unset; + text-overflow: unset; +} diff --git a/web/ui/mantine-ui/src/pages/AgentPage.tsx b/web/ui/mantine-ui/src/pages/AgentPage.tsx new file mode 100644 index 0000000000..d11af49582 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/AgentPage.tsx @@ -0,0 +1,28 @@ +import { Text } from "@mantine/core"; +import { IconSpy } from "@tabler/icons-react"; +import { FC } from "react"; +import InfoPageStack from "../components/InfoPageStack"; +import InfoPageCard from "../components/InfoPageCard"; + +const AgentPage: FC = () => { + return ( + + + + This Prometheus instance is running in agent mode. In + this mode, Prometheus is only used to scrape discovered targets and + forward the scraped metrics to remote write endpoints. + + + Some features are not available in this mode, such as querying and + alerting. + + + + ); +}; + +export default AgentPage; diff --git a/web/ui/mantine-ui/src/pages/AlertmanagerDiscoveryPage.tsx b/web/ui/mantine-ui/src/pages/AlertmanagerDiscoveryPage.tsx new file mode 100644 index 0000000000..d1ffb2f5fe --- /dev/null +++ b/web/ui/mantine-ui/src/pages/AlertmanagerDiscoveryPage.tsx @@ -0,0 +1,70 @@ +import { Alert, Table } from "@mantine/core"; +import { IconBell, IconBellOff, IconInfoCircle } from "@tabler/icons-react"; + +import { useSuspenseAPIQuery } from "../api/api"; +import { AlertmanagersResult } from "../api/responseTypes/alertmanagers"; +import EndpointLink from "../components/EndpointLink"; +import InfoPageCard from "../components/InfoPageCard"; +import InfoPageStack from "../components/InfoPageStack"; + +export const targetPoolDisplayLimit = 20; + +export default function AlertmanagerDiscoveryPage() { + // Load the list of all available scrape pools. + const { + data: { + data: { activeAlertmanagers, droppedAlertmanagers }, + }, + } = useSuspenseAPIQuery({ + path: `/alertmanagers`, + }); + + return ( + + + {activeAlertmanagers.length === 0 ? ( + }> + No active alertmanagers found. + + ) : ( + + + {activeAlertmanagers.map((alertmanager) => ( + + + + + + ))} + +
+ )} +
+ + {droppedAlertmanagers.length === 0 ? ( + }> + No dropped alertmanagers found. + + ) : ( + + + {droppedAlertmanagers.map((alertmanager) => ( + + + + + + ))} + +
+ )} +
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/AlertsPage.tsx b/web/ui/mantine-ui/src/pages/AlertsPage.tsx new file mode 100644 index 0000000000..1513f73d58 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/AlertsPage.tsx @@ -0,0 +1,414 @@ +import { + Card, + Group, + Table, + Text, + Accordion, + Badge, + Tooltip, + Box, + Stack, + Alert, + TextInput, + Anchor, +} from "@mantine/core"; +import { useSuspenseAPIQuery } from "../api/api"; +import { AlertingRule, AlertingRulesResult } from "../api/responseTypes/rules"; +import badgeClasses from "../Badge.module.css"; +import panelClasses from "../Panel.module.css"; +import RuleDefinition from "../components/RuleDefinition"; +import { humanizeDurationRelative, now } from "../lib/formatTime"; +import { Fragment, useMemo } from "react"; +import { StateMultiSelect } from "../components/StateMultiSelect"; +import { IconInfoCircle, IconSearch } from "@tabler/icons-react"; +import { LabelBadges } from "../components/LabelBadges"; +import { useSettings } from "../state/settingsSlice"; +import { + ArrayParam, + BooleanParam, + StringParam, + useQueryParam, + withDefault, +} from "use-query-params"; +import { useDebouncedValue } from "@mantine/hooks"; +import { KVSearch } from "@nexucis/kvsearch"; +import { inputIconStyle } from "../styles"; + +type AlertsPageData = { + // How many rules are in each state across all groups. + globalCounts: { + inactive: number; + pending: number; + firing: number; + }; + groups: { + name: string; + file: string; + // How many rules are in each state for this group. + counts: { + total: number; + inactive: number; + pending: number; + firing: number; + }; + rules: { + rule: AlertingRule; + // How many alerts are in each state for this rule. + counts: { + firing: number; + pending: number; + }; + }[]; + }[]; +}; + +const kvSearch = new KVSearch({ + shouldSort: true, + indexedKeys: ["name", "labels", ["labels", /.*/]], +}); + +const buildAlertsPageData = ( + data: AlertingRulesResult, + search: string, + stateFilter: (string | null)[] +) => { + const pageData: AlertsPageData = { + globalCounts: { + inactive: 0, + pending: 0, + firing: 0, + }, + groups: [], + }; + + for (const group of data.groups) { + const groupCounts = { + total: 0, + inactive: 0, + pending: 0, + firing: 0, + }; + + for (const r of group.rules) { + groupCounts.total++; + switch (r.state) { + case "inactive": + pageData.globalCounts.inactive++; + groupCounts.inactive++; + break; + case "firing": + pageData.globalCounts.firing++; + groupCounts.firing++; + break; + case "pending": + pageData.globalCounts.pending++; + groupCounts.pending++; + break; + default: + throw new Error(`Unknown rule state: ${r.state}`); + } + } + + const filteredRules: AlertingRule[] = ( + search === "" + ? group.rules + : kvSearch.filter(search, group.rules).map((value) => value.original) + ).filter((r) => stateFilter.length === 0 || stateFilter.includes(r.state)); + + pageData.groups.push({ + name: group.name, + file: group.file, + counts: groupCounts, + rules: filteredRules.map((r) => ({ + rule: r, + counts: { + firing: r.alerts.filter((a) => a.state === "firing").length, + pending: r.alerts.filter((a) => a.state === "pending").length, + }, + })), + }); + } + + return pageData; +}; + +export default function AlertsPage() { + // Fetch the alerting rules data. + const { data } = useSuspenseAPIQuery({ + path: `/rules`, + params: { + type: "alert", + }, + }); + + const { showAnnotations } = useSettings(); + + // Define URL query params. + const [stateFilter, setStateFilter] = useQueryParam( + "state", + withDefault(ArrayParam, []) + ); + const [searchFilter, setSearchFilter] = useQueryParam( + "search", + withDefault(StringParam, "") + ); + const [debouncedSearch] = useDebouncedValue(searchFilter.trim(), 250); + const [showEmptyGroups, setShowEmptyGroups] = useQueryParam( + "showEmptyGroups", + withDefault(BooleanParam, true) + ); + + // Update the page data whenever the fetched data or filters change. + const alertsPageData: AlertsPageData = useMemo( + () => buildAlertsPageData(data.data, debouncedSearch, stateFilter), + [data, stateFilter, debouncedSearch] + ); + + const shownGroups = showEmptyGroups + ? alertsPageData.groups + : alertsPageData.groups.filter((g) => g.rules.length > 0); + + return ( + + + + o === "inactive" + ? badgeClasses.healthOk + : o === "pending" + ? badgeClasses.healthWarn + : badgeClasses.healthErr + } + optionCount={(o) => + alertsPageData.globalCounts[ + o as keyof typeof alertsPageData.globalCounts + ] + } + placeholder="Filter by rule state" + values={(stateFilter?.filter((v) => v !== null) as string[]) || []} + onChange={(values) => setStateFilter(values)} + /> + } + placeholder="Filter by rule name or labels" + value={searchFilter || ""} + onChange={(event) => + setSearchFilter(event.currentTarget.value || null) + } + > + + {alertsPageData.groups.length === 0 ? ( + }> + No rules found. + + ) : ( + !showEmptyGroups && + alertsPageData.groups.length !== shownGroups.length && ( + } + > + Hiding {alertsPageData.groups.length - shownGroups.length} empty + groups due to filters or no rules. + setShowEmptyGroups(true)}> + Show empty groups + + + ) + )} + + {shownGroups.map((g, i) => { + return ( + + + + + {g.name} + + + {g.file} + + + + {g.counts.firing > 0 && ( + + firing ({g.counts.firing}) + + )} + {g.counts.pending > 0 && ( + + pending ({g.counts.pending}) + + )} + {g.counts.inactive > 0 && ( + + inactive ({g.counts.inactive}) + + )} + + + {g.counts.total === 0 ? ( + }> + No rules in this group. + setShowEmptyGroups(false)} + > + Hide empty groups + + + ) : g.rules.length === 0 ? ( + }> + No rules in this group match your filter criteria (omitted{" "} + {g.counts.total} filtered rules). + setShowEmptyGroups(false)} + > + Hide empty groups + + + ) : ( + + {g.rules.map((r, j) => { + return ( + 0 + ? panelClasses.panelHealthErr + : r.counts.pending > 0 + ? panelClasses.panelHealthWarn + : panelClasses.panelHealthOk + } + > + + + {r.rule.name} + + {r.counts.firing > 0 && ( + + firing ({r.counts.firing}) + + )} + {r.counts.pending > 0 && ( + + pending ({r.counts.pending}) + + )} + + + + + + {r.rule.alerts.length > 0 && ( + + + + Alert labels + State + Active Since + Value + + + + {r.rule.type === "alerting" && + r.rule.alerts.map((a, k) => ( + + + + + + + + {a.state} + + + + + + {humanizeDurationRelative( + a.activeAt, + now(), + "" + )} + + + + + {isNaN(Number(a.value)) + ? a.value + : Number(a.value)} + + + {showAnnotations && ( + + +
+ + {Object.entries( + a.annotations + ).map(([k, v]) => ( + + + {k} + + {v} + + ))} + +
+ + + )} + + ))} + + + )} +
+
+ ); + })} +
+ )} +
+ ); + })} +
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/ConfigPage.tsx b/web/ui/mantine-ui/src/pages/ConfigPage.tsx new file mode 100644 index 0000000000..a8419a8db4 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/ConfigPage.tsx @@ -0,0 +1,23 @@ +import { CodeHighlight } from "@mantine/code-highlight"; +import { useSuspenseAPIQuery } from "../api/api"; +import ConfigResult from "../api/responseTypes/config"; + +export default function ConfigPage() { + const { + data: { + data: { yaml }, + }, + } = useSuspenseAPIQuery({ path: `/status/config` }); + + return ( + + ); +} diff --git a/web/ui/mantine-ui/src/pages/FlagsPage.module.css b/web/ui/mantine-ui/src/pages/FlagsPage.module.css new file mode 100644 index 0000000000..221b2de028 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/FlagsPage.module.css @@ -0,0 +1,21 @@ +.th { + padding: 0; +} + +.control { + width: 100%; + padding: var(--mantine-spacing-xs) var(--mantine-spacing-md); + + @mixin hover { + background-color: light-dark( + var(--mantine-color-gray-0), + var(--mantine-color-dark-6) + ); + } +} + +.icon { + width: rem(21px); + height: rem(21px); + border-radius: rem(21px); +} diff --git a/web/ui/mantine-ui/src/pages/FlagsPage.tsx b/web/ui/mantine-ui/src/pages/FlagsPage.tsx new file mode 100644 index 0000000000..856ab9a40d --- /dev/null +++ b/web/ui/mantine-ui/src/pages/FlagsPage.tsx @@ -0,0 +1,181 @@ +import { useState } from "react"; +import { + Table, + UnstyledButton, + Group, + Text, + Center, + TextInput, + rem, + keys, +} from "@mantine/core"; +import { + IconSelector, + IconChevronDown, + IconChevronUp, + IconSearch, +} from "@tabler/icons-react"; +import classes from "./FlagsPage.module.css"; +import { useSuspenseAPIQuery } from "../api/api"; +import InfoPageStack from "../components/InfoPageStack"; +import InfoPageCard from "../components/InfoPageCard"; +import { inputIconStyle } from "../styles"; + +interface RowData { + flag: string; + value: string; +} + +interface ThProps { + children: React.ReactNode; + reversed: boolean; + sorted: boolean; + onSort(): void; +} + +function Th({ children, reversed, sorted, onSort }: ThProps) { + const Icon = sorted + ? reversed + ? IconChevronUp + : IconChevronDown + : IconSelector; + return ( + + + + + {children} + +
+ +
+
+
+
+ ); +} + +function filterData(data: RowData[], search: string) { + const query = search.toLowerCase().trim(); + return data.filter((item) => + keys(data[0]).some((key) => item[key].toLowerCase().includes(query)) + ); +} + +function sortData( + data: RowData[], + payload: { sortBy: keyof RowData | null; reversed: boolean; search: string } +) { + const { sortBy } = payload; + + if (!sortBy) { + return filterData(data, payload.search); + } + + return filterData( + [...data].sort((a, b) => { + if (payload.reversed) { + return b[sortBy].localeCompare(a[sortBy]); + } + + return a[sortBy].localeCompare(b[sortBy]); + }), + payload.search + ); +} + +export default function FlagsPage() { + const { data } = useSuspenseAPIQuery>({ + path: `/status/flags`, + }); + + const flags = Object.entries(data.data).map(([flag, value]) => ({ + flag, + value, + })); + + const [search, setSearch] = useState(""); + const [sortedData, setSortedData] = useState(flags); + const [sortBy, setSortBy] = useState(null); + const [reverseSortDirection, setReverseSortDirection] = useState(false); + + const setSorting = (field: keyof RowData) => { + const reversed = field === sortBy ? !reverseSortDirection : false; + setReverseSortDirection(reversed); + setSortBy(field); + setSortedData(sortData(flags, { sortBy: field, reversed, search })); + }; + + const handleSearchChange = (event: React.ChangeEvent) => { + const { value } = event.currentTarget; + setSearch(value); + setSortedData( + sortData(flags, { sortBy, reversed: reverseSortDirection, search: value }) + ); + }; + + const rows = sortedData.map((row) => ( + + + --{row.flag} + + + {row.value} + + + )); + + return ( + + + } + value={search} + onChange={handleSearchChange} + /> + + + + + + + + + + {rows.length > 0 ? ( + rows + ) : ( + + + + Nothing found + + + + )} + +
setSorting("flag")} + > + Flag + setSorting("value")} + > + Value +
+
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/RulesPage.tsx b/web/ui/mantine-ui/src/pages/RulesPage.tsx new file mode 100644 index 0000000000..a4ed44e7c2 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/RulesPage.tsx @@ -0,0 +1,209 @@ +import { + Accordion, + Alert, + Badge, + Card, + Group, + rem, + Stack, + Text, + Tooltip, +} from "@mantine/core"; +// import { useQuery } from "react-query"; +import { + humanizeDurationRelative, + humanizeDuration, + now, +} from "../lib/formatTime"; +import { + IconAlertTriangle, + IconBell, + IconHourglass, + IconInfoCircle, + IconRefresh, + IconRepeat, + IconTimeline, +} from "@tabler/icons-react"; +import { useSuspenseAPIQuery } from "../api/api"; +import { RulesResult } from "../api/responseTypes/rules"; +import badgeClasses from "../Badge.module.css"; +import RuleDefinition from "../components/RuleDefinition"; +import { badgeIconStyle } from "../styles"; + +const healthBadgeClass = (state: string) => { + switch (state) { + case "ok": + return badgeClasses.healthOk; + case "err": + return badgeClasses.healthErr; + case "unknown": + return badgeClasses.healthUnknown; + default: + throw new Error("Unknown rule health state"); + } +}; + +export default function RulesPage() { + const { data } = useSuspenseAPIQuery({ path: `/rules` }); + + return ( + + {data.data.groups.length === 0 && ( + }> + No rule groups configured. + + )} + {data.data.groups.map((g, i) => ( + + + + + {g.name} + + + {g.file} + + + + + } + > + last run {humanizeDurationRelative(g.lastEvaluation, now())} + + + + } + > + took {humanizeDuration(parseFloat(g.evaluationTime) * 1000)} + + + + } + > + every {humanizeDuration(parseFloat(g.interval) * 1000)}{" "} + + + + + {g.rules.length === 0 && ( + }> + No rules in rule group. + + )} + + {g.rules.map((r, j) => ( + + + + + {r.type === "alerting" ? ( + + + + ) : ( + + + + )} + {r.name} + + + + + } + > + {humanizeDurationRelative(r.lastEvaluation, now())} + + + + + + } + > + {humanizeDuration( + parseFloat(r.evaluationTime) * 1000 + )} + + + + + {r.health} + + + + + + + {r.lastError && ( + } + > + Error: {r.lastError} + + )} + + + ))} + + + ))} + + ); +} diff --git a/web/ui/mantine-ui/src/pages/StatusPage.tsx b/web/ui/mantine-ui/src/pages/StatusPage.tsx new file mode 100644 index 0000000000..71dc476a2d --- /dev/null +++ b/web/ui/mantine-ui/src/pages/StatusPage.tsx @@ -0,0 +1,80 @@ +import { Table } from "@mantine/core"; +import { useSuspenseAPIQuery } from "../api/api"; +import { IconRun, IconWall } from "@tabler/icons-react"; +import { formatTimestamp } from "../lib/formatTime"; +import { useSettings } from "../state/settingsSlice"; +import InfoPageCard from "../components/InfoPageCard"; +import InfoPageStack from "../components/InfoPageStack"; + +export default function StatusPage() { + const { data: buildinfo } = useSuspenseAPIQuery>({ + path: `/status/buildinfo`, + }); + const { data: runtimeinfo } = useSuspenseAPIQuery>({ + path: `/status/runtimeinfo`, + }); + + const { useLocalTime } = useSettings(); + + const statusConfig: Record< + string, + { + title?: string; + formatValue?: (v: string | boolean) => string; + } + > = { + startTime: { + title: "Start time", + formatValue: (v: string | boolean) => + formatTimestamp(new Date(v as string).valueOf() / 1000, useLocalTime), + }, + CWD: { title: "Working directory" }, + reloadConfigSuccess: { + title: "Configuration reload", + formatValue: (v: string | boolean) => (v ? "Successful" : "Unsuccessful"), + }, + lastConfigTime: { + title: "Last successful configuration reload", + formatValue: (v: string | boolean) => + formatTimestamp(new Date(v as string).valueOf() / 1000, useLocalTime), + }, + corruptionCount: { title: "WAL corruptions" }, + goroutineCount: { title: "Goroutines" }, + storageRetention: { title: "Storage retention" }, + }; + + return ( + + + + + {Object.entries(buildinfo.data).map(([k, v]) => ( + + {k} + {v} + + ))} + +
+
+ + + + {Object.entries(runtimeinfo.data).map(([k, v]) => { + const { title = k, formatValue = (val: string) => val } = + statusConfig[k] || {}; + return ( + + + {title} + + {formatValue(v)} + + ); + })} + +
+
+
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/TSDBStatusPage.tsx b/web/ui/mantine-ui/src/pages/TSDBStatusPage.tsx new file mode 100644 index 0000000000..cf7851c165 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/TSDBStatusPage.tsx @@ -0,0 +1,102 @@ +import { Table } from "@mantine/core"; +import { useSuspenseAPIQuery } from "../api/api"; +import { TSDBStatusResult } from "../api/responseTypes/tsdbStatus"; +import { formatTimestamp } from "../lib/formatTime"; +import { useSettings } from "../state/settingsSlice"; +import InfoPageStack from "../components/InfoPageStack"; +import InfoPageCard from "../components/InfoPageCard"; + +export default function TSDBStatusPage() { + const { + data: { + data: { + headStats, + labelValueCountByLabelName, + seriesCountByMetricName, + memoryInBytesByLabelName, + seriesCountByLabelValuePair, + }, + }, + } = useSuspenseAPIQuery({ path: `/status/tsdb` }); + + const { useLocalTime } = useSettings(); + + const unixToTime = (unix: number): string => { + const formatted = formatTimestamp(unix, useLocalTime); + if (formatted === "Invalid Date") { + if (numSeries === 0) { + return "No datapoints yet"; + } + return `Error parsing time (${unix})`; + } + + return formatted; + }; + + const { chunkCount, numSeries, numLabelPairs, minTime, maxTime } = headStats; + const stats = [ + { name: "Number of Series", value: numSeries }, + { name: "Number of Chunks", value: chunkCount }, + { name: "Number of Label Pairs", value: numLabelPairs }, + { name: "Current Min Time", value: `${unixToTime(minTime / 1000)}` }, + { name: "Current Max Time", value: `${unixToTime(maxTime / 1000)}` }, + ]; + + return ( + + {[ + { + title: "TSDB Head Status", + stats, + formatAsCode: false, + }, + { + title: "Top 10 label names with value count", + stats: labelValueCountByLabelName, + formatAsCode: true, + }, + { + title: "Top 10 series count by metric names", + stats: seriesCountByMetricName, + formatAsCode: true, + }, + { + title: "Top 10 label names with high memory usage", + unit: "Bytes", + stats: memoryInBytesByLabelName, + formatAsCode: true, + }, + { + title: "Top 10 series count by label value pairs", + stats: seriesCountByLabelValuePair, + formatAsCode: true, + }, + ].map(({ title, unit = "Count", stats, formatAsCode }) => ( + + + + + Name + {unit} + + + + {stats.map(({ name, value }) => ( + + + {formatAsCode ? {name} : name} + + {value} + + ))} + +
+
+ ))} +
+ ); +} diff --git a/web/ui/mantine-ui/src/pages/query/DataTable.module.css b/web/ui/mantine-ui/src/pages/query/DataTable.module.css new file mode 100644 index 0000000000..255173d9bd --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/DataTable.module.css @@ -0,0 +1,10 @@ +.tableWrapper { + border: 1px solid + light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-5)); + border-radius: var(--mantine-radius-default); +} + +.numberCell { + text-align: right; + font-variant-numeric: tabular-nums; +} diff --git a/web/ui/mantine-ui/src/pages/query/DataTable.tsx b/web/ui/mantine-ui/src/pages/query/DataTable.tsx new file mode 100644 index 0000000000..375b033eda --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/DataTable.tsx @@ -0,0 +1,211 @@ +import { FC, ReactNode, useState } from "react"; +import { + Table, + Alert, + Box, + SegmentedControl, + ScrollArea, + Group, + Stack, + Text, + Anchor, +} from "@mantine/core"; +import { IconAlertTriangle, IconInfoCircle } from "@tabler/icons-react"; +import { + InstantQueryResult, + InstantSample, + RangeSamples, +} from "../../api/responseTypes/query"; +import SeriesName from "./SeriesName"; +import classes from "./DataTable.module.css"; +import dayjs from "dayjs"; +import timezone from "dayjs/plugin/timezone"; +import { formatTimestamp } from "../../lib/formatTime"; +import HistogramChart from "./HistogramChart"; +import { Histogram } from "../../types/types"; +import { bucketRangeString } from "./HistogramHelpers"; +import { useSettings } from "../../state/settingsSlice"; +dayjs.extend(timezone); + +const maxFormattableSeries = 1000; +const maxDisplayableSeries = 10000; + +const limitSeries = ( + series: S[], + limit: boolean +): S[] => { + if (limit && series.length > maxDisplayableSeries) { + return series.slice(0, maxDisplayableSeries); + } + return series; +}; + +export interface DataTableProps { + data: InstantQueryResult; + limitResults: boolean; + setLimitResults: (limit: boolean) => void; +} + +const DataTable: FC = ({ + data, + limitResults, + setLimitResults, +}) => { + const [scale, setScale] = useState("exponential"); + const { useLocalTime } = useSettings(); + + const { result, resultType } = data; + const doFormat = result.length <= maxFormattableSeries; + + return ( + + {limitResults && + ["vector", "matrix"].includes(resultType) && + result.length > maxDisplayableSeries && ( + } + title="Showing limited results" + > + Fetched {data.result.length} metrics, only displaying first{" "} + {maxDisplayableSeries} for performance reasons. + setLimitResults(false)}> + Show all results + + + )} + + {!doFormat && ( + }> + Showing more than {maxFormattableSeries} series, turning off label + formatting to improve rendering performance. + + )} + + + + + {resultType === "vector" ? ( + limitSeries(result, limitResults).map((s, idx) => ( + + + + + + {s.value && s.value[1]} + {s.histogram && ( + + + + + + Count: {s.histogram[1].count} + + + Sum: {s.histogram[1].sum} + + + + x-axis scale: + + + + {histogramTable(s.histogram[1])} + + )} + + + )) + ) : resultType === "matrix" ? ( + limitSeries(result, limitResults).map((s, idx) => ( + + + + + + {s.values && + s.values.map((v, idx) => ( +
+ {v[1]}{" "} + + @ {v[0]} + +
+ ))} +
+
+ )) + ) : resultType === "scalar" ? ( + + Scalar value + {result[1]} + + ) : resultType === "string" ? ( + + String value + {result[1]} + + ) : ( + } + > + Invalid result value type + + )} +
+
+
+
+ ); +}; + +const histogramTable = (h: Histogram): ReactNode => ( + + + + Bucket range + Count + + + {h.buckets?.map((b, i) => ( + + + {bucketRangeString(b)} + + {b[3]} + + ))} + + +
+); + +export default DataTable; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/Aggregation.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/Aggregation.tsx new file mode 100644 index 0000000000..3facd692c1 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/Aggregation.tsx @@ -0,0 +1,159 @@ +import { FC } from "react"; +import ASTNode, { Aggregation, aggregationType } from "../../../promql/ast"; +import { labelNameList } from "../../../promql/format"; +import { parsePrometheusFloat } from "../../../lib/formatFloatValue"; +import { Card, Text } from "@mantine/core"; + +const describeAggregationType = ( + aggrType: aggregationType, + param: ASTNode | null +) => { + switch (aggrType) { + case "sum": + return "sums over the sample values of the input series"; + case "min": + return "takes the minimum of the sample values of the input series"; + case "max": + return "takes the maximum of the sample values of the input series"; + case "avg": + return "calculates the average of the sample values of the input series"; + case "stddev": + return "calculates the population standard deviation of the sample values of the input series"; + case "stdvar": + return "calculates the population standard variation of the sample values of the input series"; + case "count": + return "counts the number of input series"; + case "group": + return "groups the input series by the supplied grouping labels, while setting the sample value to 1"; + case "count_values": + if (param === null) { + throw new Error( + "encountered count_values() node without label parameter" + ); + } + if (param.type !== "stringLiteral") { + throw new Error( + "encountered count_values() node without string literal label parameter" + ); + } + return ( + <> + outputs one time series for each unique sample value in the input + series (each counting the number of occurrences of that value and + indicating the original value in the {labelNameList([param.val])}{" "} + label) + + ); + case "bottomk": + if (param === null || param.type !== "numberLiteral") { + return ( + <> + returns the bottom{" "} + K series by value + + ); + } + return ( + <> + returns the bottom{" "} + {param.val} series + by value + + ); + case "topk": + if (param === null || param.type !== "numberLiteral") { + return ( + <> + returns the top K{" "} + series by value + + ); + } + return ( + <> + returns the top{" "} + {param.val} series + by value + + ); + case "quantile": + if (param === null) { + throw new Error( + "encountered quantile() node without quantile parameter" + ); + } + if (param.type === "numberLiteral") { + return `calculates the ${param.val}th quantile (${ + parsePrometheusFloat(param.val) * 100 + }th percentile) over the sample values of the input series`; + } + return "calculates a quantile over the sample values of the input series"; + + case "limitk": + if (param === null || param.type !== "numberLiteral") { + return ( + <> + limits the output to{" "} + K series + + ); + } + return ( + <> + limits the output to{" "} + {param.val} series + + ); + case "limit_ratio": + if (param === null || param.type !== "numberLiteral") { + return "limits the output to a ratio of the input series"; + } + return ( + <> + limits the output to a ratio of{" "} + {param.val} ( + {parsePrometheusFloat(param.val) * 100}%) of the input series + + ); + default: + throw new Error(`invalid aggregation type ${aggrType}`); + } +}; + +const describeAggregationGrouping = (grouping: string[], without: boolean) => { + if (without) { + return ( + <>aggregating away the [{labelNameList(grouping)}] label dimensions + ); + } + + if (grouping.length === 1) { + return <>grouped by their {labelNameList(grouping)} label dimension; + } + + if (grouping.length > 1) { + return <>grouped by their [{labelNameList(grouping)}] label dimensions; + } + + return "aggregating away any label dimensions"; +}; + +interface AggregationExplainViewProps { + node: Aggregation; +} + +const AggregationExplainView: FC = ({ node }) => { + return ( + + + Aggregation + + + This node {describeAggregationType(node.op, node.param)},{" "} + {describeAggregationGrouping(node.grouping, node.without)}. + + + ); +}; + +export default AggregationExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/BinaryExpr.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/BinaryExpr.tsx new file mode 100644 index 0000000000..837b84da31 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/BinaryExpr.tsx @@ -0,0 +1,106 @@ +import { FC } from "react"; +import { BinaryExpr } from "../../../../promql/ast"; +import serializeNode from "../../../../promql/serialize"; +import VectorScalarBinaryExprExplainView from "./VectorScalar"; +import VectorVectorBinaryExprExplainView from "./VectorVector"; +import ScalarScalarBinaryExprExplainView from "./ScalarScalar"; +import { nodeValueType } from "../../../../promql/utils"; +import { useSuspenseAPIQuery } from "../../../../api/api"; +import { InstantQueryResult } from "../../../../api/responseTypes/query"; +import { Card, Text } from "@mantine/core"; + +interface BinaryExprExplainViewProps { + node: BinaryExpr; +} + +const BinaryExprExplainView: FC = ({ node }) => { + const { data: lhs } = useSuspenseAPIQuery({ + path: `/query`, + params: { + query: serializeNode(node.lhs), + }, + }); + const { data: rhs } = useSuspenseAPIQuery({ + path: `/query`, + params: { + query: serializeNode(node.rhs), + }, + }); + + if ( + lhs.data.resultType !== nodeValueType(node.lhs) || + rhs.data.resultType !== nodeValueType(node.rhs) + ) { + // This can happen for a brief transitionary render when "node" has changed, but "lhs" and "rhs" + // haven't switched back to loading yet (leading to a crash in e.g. the vector-vector explain view). + return null; + } + + // Scalar-scalar binops. + if (lhs.data.resultType === "scalar" && rhs.data.resultType === "scalar") { + return ( + + + Scalar-to-scalar binary operation + + + + ); + } + + // Vector-scalar binops. + if (lhs.data.resultType === "scalar" && rhs.data.resultType === "vector") { + return ( + + + Scalar-to-vector binary operation + + + + ); + } + if (lhs.data.resultType === "vector" && rhs.data.resultType === "scalar") { + return ( + + + Vector-to-scalar binary operation + + + + ); + } + + // Vector-vector binops. + if (lhs.data.resultType === "vector" && rhs.data.resultType === "vector") { + return ( + + + Vector-to-vector binary operation + + + + ); + } + + throw new Error("invalid binary operator argument types"); +}; + +export default BinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/ScalarScalar.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/ScalarScalar.tsx new file mode 100644 index 0000000000..874d824486 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/ScalarScalar.tsx @@ -0,0 +1,54 @@ +import { FC } from "react"; +import { BinaryExpr } from "../../../../promql/ast"; +import { scalarBinOp } from "../../../../promql/binOp"; +import { Table } from "@mantine/core"; +import { SampleValue } from "../../../../api/responseTypes/query"; +import { + formatPrometheusFloat, + parsePrometheusFloat, +} from "../../../../lib/formatFloatValue"; + +interface ScalarScalarBinaryExprExplainViewProps { + node: BinaryExpr; + lhs: SampleValue; + rhs: SampleValue; +} + +const ScalarScalarBinaryExprExplainView: FC< + ScalarScalarBinaryExprExplainViewProps +> = ({ node, lhs, rhs }) => { + const [lhsVal, rhsVal] = [ + parsePrometheusFloat(lhs[1]), + parsePrometheusFloat(rhs[1]), + ]; + + return ( + + + + Left value + Operator + Right value + + Result + + + + + {lhs[1]} + + {node.op} + {node.bool && " bool"} + + {rhs[1]} + = + + {formatPrometheusFloat(scalarBinOp(node.op, lhsVal, rhsVal))} + + + +
+ ); +}; + +export default ScalarScalarBinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorScalar.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorScalar.tsx new file mode 100644 index 0000000000..3a0652f818 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorScalar.tsx @@ -0,0 +1,104 @@ +import { FC } from "react"; +import { BinaryExpr } from "../../../../promql/ast"; +// import SeriesName from '../../../../utils/SeriesName'; +import { isComparisonOperator } from "../../../../promql/utils"; +import { vectorElemBinop } from "../../../../promql/binOp"; +import { + InstantSample, + SampleValue, +} from "../../../../api/responseTypes/query"; +import { Alert, Table, Text } from "@mantine/core"; +import { + formatPrometheusFloat, + parsePrometheusFloat, +} from "../../../../lib/formatFloatValue"; +import SeriesName from "../../SeriesName"; + +interface VectorScalarBinaryExprExplainViewProps { + node: BinaryExpr; + scalar: SampleValue; + vector: InstantSample[]; + scalarLeft: boolean; +} + +const VectorScalarBinaryExprExplainView: FC< + VectorScalarBinaryExprExplainViewProps +> = ({ node, scalar, vector, scalarLeft }) => { + if (vector.length === 0) { + return ( + + One side of the binary operation produces 0 results, no matching + information shown. + + ); + } + + return ( + + + + {!scalarLeft && Left labels} + Left value + Operator + {scalarLeft && Right labels} + Right value + + Result + + + + {vector.map((sample: InstantSample, idx) => { + if (!sample.value) { + // TODO: Handle native histograms or show a better error message. + throw new Error("Native histograms are not supported yet"); + } + + const vecVal = parsePrometheusFloat(sample.value[1]); + const scalVal = parsePrometheusFloat(scalar[1]); + + let { value, keep } = scalarLeft + ? vectorElemBinop(node.op, scalVal, vecVal) + : vectorElemBinop(node.op, vecVal, scalVal); + if (isComparisonOperator(node.op) && scalarLeft) { + value = vecVal; + } + if (node.bool) { + value = Number(keep); + keep = true; + } + + const scalarCell = {scalar[1]}; + const vectorCells = ( + <> + + + + {sample.value[1]} + + ); + + return ( + + {scalarLeft ? scalarCell : vectorCells} + + {node.op} + {node.bool && " bool"} + + {scalarLeft ? vectorCells : scalarCell} + = + + {keep ? ( + formatPrometheusFloat(value) + ) : ( + dropped + )} + + + ); + })} + +
+ ); +}; + +export default VectorScalarBinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorVector.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorVector.tsx new file mode 100644 index 0000000000..e70b7a3f3e --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/BinaryExpr/VectorVector.tsx @@ -0,0 +1,658 @@ +import React, { FC, useState } from "react"; +import { BinaryExpr, vectorMatchCardinality } from "../../../../promql/ast"; +import { InstantSample, Metric } from "../../../../api/responseTypes/query"; +import { isComparisonOperator, isSetOperator } from "../../../../promql/utils"; +import { + VectorMatchError, + BinOpMatchGroup, + MatchErrorType, + computeVectorVectorBinOp, + filteredSampleValue, +} from "../../../../promql/binOp"; +import { formatNode, labelNameList } from "../../../../promql/format"; +import { + Alert, + Anchor, + Box, + Group, + List, + Switch, + Table, + Text, +} from "@mantine/core"; +import { useLocalStorage } from "@mantine/hooks"; +import { IconAlertTriangle } from "@tabler/icons-react"; +import SeriesName from "../../SeriesName"; + +// We use this color pool for two purposes: +// +// 1. To distinguish different match groups from each other. +// 2. To distinguish multiple series within one match group from each other. +const colorPool = [ + "#1f77b4", + "#ff7f0e", + "#2ca02c", + "#d62728", + "#9467bd", + "#8c564b", + "#e377c2", + "#7f7f7f", + "#bcbd22", + "#17becf", + "#393b79", + "#637939", + "#8c6d31", + "#843c39", + "#d6616b", + "#7b4173", + "#ce6dbd", + "#9c9ede", + "#c5b0d5", + "#c49c94", + "#f7b6d2", + "#c7c7c7", + "#dbdb8d", + "#9edae5", + "#393b79", + "#637939", + "#8c6d31", + "#843c39", + "#d6616b", + "#7b4173", + "#ce6dbd", + "#9c9ede", + "#c5b0d5", + "#c49c94", + "#f7b6d2", + "#c7c7c7", + "#dbdb8d", + "#9edae5", + "#17becf", + "#393b79", + "#637939", + "#8c6d31", + "#843c39", + "#d6616b", + "#7b4173", + "#ce6dbd", + "#9c9ede", + "#c5b0d5", + "#c49c94", + "#f7b6d2", +]; + +const rhsColorOffset = colorPool.length / 2 + 3; +const colorForIndex = (idx: number, offset?: number) => + `${colorPool[(idx + (offset || 0)) % colorPool.length]}80`; + +const seriesSwatch = (color: string) => ( + +); +interface VectorVectorBinaryExprExplainViewProps { + node: BinaryExpr; + lhs: InstantSample[]; + rhs: InstantSample[]; +} + +const noMatchLabels = ( + metric: Metric, + on: boolean, + labels: string[] +): Metric => { + const result: Metric = {}; + for (const name in metric) { + if (!(labels.includes(name) === on && (on || name !== "__name__"))) { + result[name] = metric[name]; + } + } + return result; +}; + +const explanationText = (node: BinaryExpr): React.ReactNode => { + const matching = node.matching!; + const [oneSide, manySide] = + matching.card === vectorMatchCardinality.oneToMany + ? ["left", "right"] + : ["right", "left"]; + + return ( + <> + + {isComparisonOperator(node.op) ? ( + <> + This node filters the series from the left-hand side based on the + result of a " + {node.op}" + comparison with matching series from the right-hand side. + + ) : ( + <> + This node calculates the result of applying the " + {node.op}" + operator between the sample values of matching series from two sets + of time series. + + )} + + + {(matching.labels.length > 0 || matching.on) && + (matching.on ? ( + + on( + {labelNameList(matching.labels)}):{" "} + {matching.labels.length > 0 ? ( + <> + series on both sides are matched on the labels{" "} + {labelNameList(matching.labels)} + + ) : ( + <> + all series from one side are matched to all series on the + other side. + + )} + + ) : ( + + ignoring( + {labelNameList(matching.labels)}): series on both sides are + matched on all of their labels, except{" "} + {labelNameList(matching.labels)}. + + ))} + {matching.card === vectorMatchCardinality.oneToOne ? ( + + One-to-one match. Each series from the left-hand side is allowed to + match with at most one series on the right-hand side, and vice + versa. + + ) : ( + + + group_{manySide}({labelNameList(matching.include)}) + + : {matching.card} match. Each series from the {oneSide}-hand side is + allowed to match with multiple series from the {manySide}-hand side. + {matching.include.length !== 0 && ( + <> + {" "} + Any {labelNameList(matching.include)} labels found on the{" "} + {oneSide}-hand side are propagated into the result, in addition + to the match group's labels. + + )} + + )} + {node.bool && ( + + bool: Instead of + filtering series based on the outcome of the comparison for matched + series, keep all series, but return the comparison outcome as a + boolean 0 or{" "} + 1 sample value. + + )} + + + ); +}; + +const explainError = ( + binOp: BinaryExpr, + _mg: BinOpMatchGroup, + err: VectorMatchError +) => { + const fixes = ( + <> + + Possible fixes: + + + {err.type === MatchErrorType.multipleMatchesForOneToOneMatching && ( + + + + Allow {err.dupeSide === "left" ? "many-to-one" : "one-to-many"}{" "} + matching + + : If you want to allow{" "} + {err.dupeSide === "left" ? "many-to-one" : "one-to-many"}{" "} + matching, you need to explicitly request it by adding a{" "} + + group_{err.dupeSide}() + {" "} + modifier to the operator: + + + {formatNode( + { + ...binOp, + matching: { + ...(binOp.matching + ? binOp.matching + : { labels: [], on: false, include: [] }), + card: + err.dupeSide === "left" + ? vectorMatchCardinality.manyToOne + : vectorMatchCardinality.oneToMany, + }, + }, + true, + 1 + )} + + + )} + + Update your matching parameters: Consider including + more differentiating labels in your matching modifiers (via{" "} + on() /{" "} + ignoring()) to + split multiple series into distinct match groups. + + + Aggregate the input: Consider aggregating away the + extra labels that create multiple series per group before applying the + binary operation. + + + + ); + + switch (err.type) { + case MatchErrorType.multipleMatchesForOneToOneMatching: + return ( + <> + + Binary operators only allow one-to-one matching by + default, but we found{" "} + multiple series on the {err.dupeSide} side for this + match group. + + {fixes} + + ); + case MatchErrorType.multipleMatchesOnBothSides: + return ( + <> + + We found multiple series on both sides for this + match group. Since many-to-many matching is not + supported, you need to ensure that at least one of the sides only + yields a single series. + + {fixes} + + ); + case MatchErrorType.multipleMatchesOnOneSide: { + const [oneSide, manySide] = + binOp.matching!.card === vectorMatchCardinality.oneToMany + ? ["left", "right"] + : ["right", "left"]; + return ( + <> + + You requested{" "} + + {oneSide === "right" ? "many-to-one" : "one-to-many"} matching + {" "} + via{" "} + + group_{manySide}() + + , but we also found{" "} + multiple series on the {oneSide} side of the match + group. Make sure that the {oneSide} side only contains a single + series. + + {fixes} + + ); + } + default: + throw new Error("unknown match error"); + } +}; + +const VectorVectorBinaryExprExplainView: FC< + VectorVectorBinaryExprExplainViewProps +> = ({ node, lhs, rhs }) => { + // TODO: Don't use Mantine's local storage as a one-off here. Decide whether we + // want to keep Redux, and then do it only via one or the other everywhere. + const [showSampleValues, setShowSampleValues] = useLocalStorage({ + key: "queryPage.explain.binaryOperators.showSampleValues", + defaultValue: false, + }); + + const [maxGroups, setMaxGroups] = useState(100); + const [maxSeriesPerGroup, setMaxSeriesPerGroup] = useState< + number | undefined + >(100); + + const { matching } = node; + if (matching === null) { + // The parent should make sure to only pass in vector-vector binops that have their "matching" field filled out. + throw new Error("missing matching parameters in vector-to-vector binop"); + } + + const { groups: matchGroups, numGroups } = computeVectorVectorBinOp( + node.op, + matching, + node.bool, + lhs, + rhs, + { + maxGroups: maxGroups, + maxSeriesPerGroup: maxSeriesPerGroup, + } + ); + const errCount = Object.values(matchGroups).filter((mg) => mg.error).length; + + return ( + <> + {explanationText(node)} + + + setShowSampleValues(event.currentTarget.checked)} + /> + + + {numGroups > Object.keys(matchGroups).length && ( + }> + Too many match groups to display, only showing{" "} + {Object.keys(matchGroups).length} out of {numGroups} groups. +
+
+ setMaxGroups(undefined)}> + Show all groups + +
+ )} + + {errCount > 0 && ( + }> + Found matching issues in {errCount} match group + {errCount > 1 ? "s" : ""}. See below for per-group error details. + + )} + + + + {Object.values(matchGroups).map((mg, mgIdx) => { + const { groupLabels, lhs, lhsCount, rhs, rhsCount, result, error } = + mg; + + const matchGroupTitleRow = (color: string) => ( + + + + + + ); + + const matchGroupTable = ( + series: InstantSample[], + seriesCount: number, + color: string, + colorOffset?: number + ) => ( + +
+ + {seriesCount === 0 ? ( + + + no matching series + + + ) : ( + <> + {matchGroupTitleRow(color)} + {series.map((s, sIdx) => { + if (s.value === undefined) { + // TODO: Figure out how to handle native histograms. + throw new Error( + "Native histograms are not supported yet" + ); + } + + return ( + + + + {seriesSwatch( + colorForIndex(sIdx, colorOffset) + )} + + + + + {showSampleValues && ( + {s.value[1]} + )} + + ); + })} + + )} + {seriesCount > series.length && ( + + + {seriesCount - series.length} more series omitted +   –   + setMaxSeriesPerGroup(undefined)} + > + Show all series + + + + )} + +
+
+ ); + + const groupColor = colorPool[mgIdx % colorPool.length]; + + const lhsTable = matchGroupTable(lhs, lhsCount, groupColor); + const rhsTable = matchGroupTable( + rhs, + rhsCount, + groupColor, + rhsColorOffset + ); + + const resultTable = ( + + + + {error !== null ? ( + + + error, result omitted + + + ) : result.length === 0 ? ( + + + dropped + + + ) : error !== null ? ( + + + error, result omitted + + + ) : ( + <> + {result.map(({ sample, manySideIdx }, resIdx) => { + if (sample.value === undefined) { + // TODO: Figure out how to handle native histograms. + throw new Error( + "Native histograms are not supported yet" + ); + } + + const filtered = + sample.value[1] === filteredSampleValue; + const [lIdx, rIdx] = + matching.card === vectorMatchCardinality.oneToMany + ? [0, manySideIdx] + : [manySideIdx, 0]; + + return ( + + + + + {seriesSwatch(colorForIndex(lIdx))} + + {seriesSwatch( + colorForIndex(rIdx, rhsColorOffset) + )} + + + + + + {showSampleValues && ( + + {filtered ? ( + + filtered + + ) : ( + {sample.value[1]} + )} + + )} + + ); + })} + + )} + +
+
+ ); + + return ( + + {mgIdx !== 0 && } + + + {error && ( + } + > + {explainError(node, mg, error)} + + )} + + + + + {lhsTable} + + + {node.op} + {node.bool && " bool"} + + + {rhsTable} + + = + + {resultTable} + + + + ); + })} + + + + ); +}; + +export default VectorVectorBinaryExprExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.module.css b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.module.css new file mode 100644 index 0000000000..7cb36c7624 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.module.css @@ -0,0 +1,8 @@ +.funcDoc code { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-dark-5) + ); + padding: 0.05em 0.2em; + border-radius: 0.2em; +} diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.tsx new file mode 100644 index 0000000000..e089c7851a --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/ExplainView.tsx @@ -0,0 +1,201 @@ +import { FC } from "react"; +import { Alert, Text, Anchor, Card, Divider } from "@mantine/core"; +import ASTNode, { nodeType } from "../../../promql/ast"; +// import AggregationExplainView from "./Aggregation"; +// import BinaryExprExplainView from "./BinaryExpr/BinaryExpr"; +// import SelectorExplainView from "./Selector"; +import funcDocs from "../../../promql/functionDocs"; +import { escapeString } from "../../../promql/utils"; +import { formatPrometheusDuration } from "../../../lib/formatTime"; +import classes from "./ExplainView.module.css"; +import SelectorExplainView from "./Selector"; +import AggregationExplainView from "./Aggregation"; +import BinaryExprExplainView from "./BinaryExpr/BinaryExpr"; +import { IconInfoCircle } from "@tabler/icons-react"; +interface ExplainViewProps { + node: ASTNode | null; + treeShown: boolean; + showTree: () => void; +} + +const ExplainView: FC = ({ + node, + treeShown, + showTree: setShowTree, +}) => { + if (node === null) { + return ( + }> + This tab can help you understand the behavior of individual components + of a query. +
+
+ To use the Explain view,{" "} + {!treeShown && ( + <> + + enable the query tree view + {" "} + (also available via the expression input menu) and then + + )}{" "} + select a node in the tree above. +
+ ); + } + + switch (node.type) { + case nodeType.aggregation: + return ; + case nodeType.binaryExpr: + return ; + case nodeType.call: + return ( + + + Function call + + + This node calls the{" "} + + + {node.func.name}() + + {" "} + function{node.args.length > 0 ? " on the provided inputs" : ""}. + + + {/* TODO: Some docs, like x_over_time, have relative links pointing back to the Prometheus docs, + make sure to modify those links in the docs extraction so they work from the explain view */} + + {funcDocs[node.func.name]} + + + ); + case nodeType.matrixSelector: + return ; + case nodeType.subquery: + return ( + + + Subquery + + + This node evaluates the passed expression as a subquery over the + last{" "} + + {formatPrometheusDuration(node.range)} + {" "} + at a query resolution{" "} + {node.step > 0 ? ( + <> + of{" "} + + {formatPrometheusDuration(node.step)} + + + ) : ( + "equal to the default rule evaluation interval" + )} + {node.timestamp !== null ? ( + <> + , evaluated relative to an absolute evaluation timestamp of{" "} + + {(node.timestamp / 1000).toFixed(3)} + + + ) : node.startOrEnd !== null ? ( + <> + , evaluated relative to the {node.startOrEnd} of the query range + + ) : ( + <> + )} + {node.offset === 0 ? ( + <> + ) : node.offset > 0 ? ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(node.offset)} + {" "} + into the past + + ) : ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(-node.offset)} + {" "} + into the future + + )} + . + + + ); + case nodeType.numberLiteral: + return ( + + + Number literal + + + A scalar number literal with the value{" "} + {node.val}. + + + ); + case nodeType.parenExpr: + return ( + + + Parentheses + + + Parentheses that contain a sub-expression to be evaluated. + + + ); + case nodeType.stringLiteral: + return ( + + + String literal + + + A string literal with the value{" "} + + "{escapeString(node.val)}" + + . + + + ); + case nodeType.unaryExpr: + return ( + + + Unary expression + + + A unary expression that{" "} + {node.op === "+" + ? "does not affect the expression it is applied to" + : "changes the sign of the expression it is applied to"} + . + + + ); + case nodeType.vectorSelector: + return ; + default: + throw new Error("invalid node type"); + } +}; + +export default ExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExplainViews/Selector.tsx b/web/ui/mantine-ui/src/pages/query/ExplainViews/Selector.tsx new file mode 100644 index 0000000000..1b71e3591d --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExplainViews/Selector.tsx @@ -0,0 +1,232 @@ +import { FC, ReactNode } from "react"; +import { + VectorSelector, + MatrixSelector, + nodeType, + LabelMatcher, + matchType, +} from "../../../promql/ast"; +import { escapeString } from "../../../promql/utils"; +import { useSuspenseAPIQuery } from "../../../api/api"; +import { Card, Text, Divider, List } from "@mantine/core"; +import { MetadataResult } from "../../../api/responseTypes/metadata"; +import { formatPrometheusDuration } from "../../../lib/formatTime"; +import { useSettings } from "../../../state/settingsSlice"; + +interface SelectorExplainViewProps { + node: VectorSelector | MatrixSelector; +} + +const matchingCriteriaList = ( + name: string, + matchers: LabelMatcher[] +): ReactNode => { + return ( + + {name.length > 0 && ( + + The metric name is{" "} + {name}. + + )} + {matchers + .filter((m) => !(m.name === "__name__")) + .map((m) => { + switch (m.type) { + case matchType.equal: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + is exactly{" "} + + "{escapeString(m.value)}" + + . + + ); + case matchType.notEqual: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + is not{" "} + + "{escapeString(m.value)}" + + . + + ); + case matchType.matchRegexp: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + matches the regular expression{" "} + + "{escapeString(m.value)}" + + . + + ); + case matchType.matchNotRegexp: + return ( + + + {m.name} + + {m.type} + + "{escapeString(m.value)}" + + : The label{" "} + + {m.name} + {" "} + does not match the regular expression{" "} + + "{escapeString(m.value)}" + + . + + ); + default: + throw new Error("invalid matcher type"); + } + })} + + ); +}; + +const SelectorExplainView: FC = ({ node }) => { + const baseMetricName = node.name.replace(/(_count|_sum|_bucket)$/, ""); + const { lookbackDelta } = useSettings(); + const { data: metricMeta } = useSuspenseAPIQuery({ + path: `/metadata`, + params: { + metric: baseMetricName, + }, + }); + + return ( + + + {node.type === nodeType.vectorSelector ? "Instant" : "Range"} vector + selector + + + {metricMeta.data === undefined || + metricMeta.data[baseMetricName] === undefined || + metricMeta.data[baseMetricName].length < 1 ? ( + <>No metric metadata found. + ) : ( + <> + Metric help:{" "} + {metricMeta.data[baseMetricName][0].help} +
+ Metric type:{" "} + {metricMeta.data[baseMetricName][0].type} + + )} +
+ + + {node.type === nodeType.vectorSelector ? ( + <> + This node selects the latest (non-stale) sample value within the + last {lookbackDelta} + + ) : ( + <> + This node selects{" "} + + {formatPrometheusDuration(node.range)} + {" "} + of data going backward from the evaluation timestamp + + )} + {node.timestamp !== null ? ( + <> + , evaluated relative to an absolute evaluation timestamp of{" "} + + {(node.timestamp / 1000).toFixed(3)} + + + ) : node.startOrEnd !== null ? ( + <>, evaluated relative to the {node.startOrEnd} of the query range + ) : ( + <> + )} + {node.offset === 0 ? ( + <> + ) : node.offset > 0 ? ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(node.offset)} + {" "} + into the past, + + ) : ( + <> + , time-shifted{" "} + + {formatPrometheusDuration(-node.offset)} + {" "} + into the future, + + )}{" "} + for any series that match all of the following criteria: + + {matchingCriteriaList(node.name, node.matchers)} + + If a series has no values in the last{" "} + + {node.type === nodeType.vectorSelector + ? lookbackDelta + : formatPrometheusDuration(node.range)} + + {node.offset > 0 && ( + <> + {" "} + (relative to the time-shifted instant{" "} + + {formatPrometheusDuration(node.offset)} + {" "} + in the past) + + )} + , the series will not be returned. + +
+ ); +}; + +export default SelectorExplainView; diff --git a/web/ui/mantine-ui/src/pages/query/ExpressionInput.module.css b/web/ui/mantine-ui/src/pages/query/ExpressionInput.module.css new file mode 100644 index 0000000000..027919f2e0 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExpressionInput.module.css @@ -0,0 +1,13 @@ +.input { + /* border: calc(0.0625rem * var(--mantine-scale)) solid var(--input-bd); */ + border-radius: var(--mantine-radius-default); + flex: auto; + /* padding: 4px 0 0 8px; */ + /* font-size: 15px; */ + /* font-family: "DejaVu Sans Mono"; */ + + &:focus-within { + outline: rem(1.3px) solid var(--mantine-color-blue-filled); + border-color: transparent; + } +} diff --git a/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx new file mode 100644 index 0000000000..a4b26cd910 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx @@ -0,0 +1,374 @@ +import { + ActionIcon, + Box, + Button, + Group, + InputBase, + Loader, + Menu, + Modal, + Skeleton, + useComputedColorScheme, +} from "@mantine/core"; +import { + CompleteStrategy, + PromQLExtension, + newCompleteStrategy, +} from "@prometheus-io/codemirror-promql"; +import { FC, Suspense, useEffect, useRef, useState } from "react"; +import CodeMirror, { + EditorState, + EditorView, + Prec, + ReactCodeMirrorRef, + highlightSpecialChars, + keymap, + placeholder, +} from "@uiw/react-codemirror"; +import { + baseTheme, + darkPromqlHighlighter, + darkTheme, + lightTheme, + promqlHighlighter, +} from "../../codemirror/theme"; +import { + bracketMatching, + indentOnInput, + syntaxHighlighting, + syntaxTree, +} from "@codemirror/language"; +import classes from "./ExpressionInput.module.css"; +import { + CompletionContext, + CompletionResult, + autocompletion, + closeBrackets, + closeBracketsKeymap, + completionKeymap, +} from "@codemirror/autocomplete"; +import { + defaultKeymap, + history, + historyKeymap, + insertNewlineAndIndent, +} from "@codemirror/commands"; +import { highlightSelectionMatches } from "@codemirror/search"; +import { lintKeymap } from "@codemirror/lint"; +import { + IconAlignJustified, + IconBinaryTree, + IconDotsVertical, + IconSearch, + IconTerminal, + IconTrash, +} from "@tabler/icons-react"; +import { useAPIQuery } from "../../api/api"; +import { notifications } from "@mantine/notifications"; +import { useSettings } from "../../state/settingsSlice"; +import MetricsExplorer from "./MetricsExplorer/MetricsExplorer"; +import ErrorBoundary from "../../components/ErrorBoundary"; +import { useAppSelector } from "../../state/hooks"; +import { inputIconStyle, menuIconStyle } from "../../styles"; + +const promqlExtension = new PromQLExtension(); + +// Autocompletion strategy that wraps the main one and enriches +// it with past query items. +export class HistoryCompleteStrategy implements CompleteStrategy { + private complete: CompleteStrategy; + private queryHistory: string[]; + constructor(complete: CompleteStrategy, queryHistory: string[]) { + this.complete = complete; + this.queryHistory = queryHistory; + } + + promQL( + context: CompletionContext + ): Promise | CompletionResult | null { + return Promise.resolve(this.complete.promQL(context)).then((res) => { + const { state, pos } = context; + const tree = syntaxTree(state).resolve(pos, -1); + const start = res != null ? res.from : tree.from; + + if (start !== 0) { + return res; + } + + const historyItems: CompletionResult = { + from: start, + to: pos, + options: this.queryHistory.map((q) => ({ + label: q.length < 80 ? q : q.slice(0, 76).concat("..."), + detail: "past query", + apply: q, + info: q.length < 80 ? undefined : q, + })), + validFor: /^[a-zA-Z0-9_:]+$/, + }; + + if (res !== null) { + historyItems.options = historyItems.options.concat(res.options); + } + return historyItems; + }); + } +} + +interface ExpressionInputProps { + initialExpr: string; + metricNames: string[]; + executeQuery: (expr: string) => void; + treeShown: boolean; + setShowTree: (showTree: boolean) => void; + removePanel: () => void; +} + +const ExpressionInput: FC = ({ + initialExpr, + metricNames, + executeQuery, + removePanel, + treeShown, + setShowTree, +}) => { + const theme = useComputedColorScheme(); + const { queryHistory } = useAppSelector((state) => state.queryPage); + const { + pathPrefix, + enableAutocomplete, + enableSyntaxHighlighting, + enableLinter, + enableQueryHistory, + } = useSettings(); + const [expr, setExpr] = useState(initialExpr); + useEffect(() => { + setExpr(initialExpr); + }, [initialExpr]); + + const { + data: formatResult, + error: formatError, + isFetching: isFormatting, + refetch: formatQuery, + } = useAPIQuery({ + path: "/format_query", + params: { + query: expr, + }, + enabled: false, + }); + + useEffect(() => { + if (formatError) { + notifications.show({ + color: "red", + title: "Error formatting query", + message: formatError.message, + }); + return; + } + + if (formatResult) { + setExpr(formatResult.data); + notifications.show({ + title: "Expression formatted", + message: "Expression formatted successfully!", + }); + } + }, [formatResult, formatError]); + + const cmRef = useRef(null); + + const [showMetricsExplorer, setShowMetricsExplorer] = useState(false); + + // (Re)initialize editor based on settings / setting changes. + useEffect(() => { + // Build the dynamic part of the config. + promqlExtension + .activateCompletion(enableAutocomplete) + .activateLinter(enableLinter) + .setComplete({ + completeStrategy: new HistoryCompleteStrategy( + newCompleteStrategy({ + remote: { + url: pathPrefix, + cache: { initialMetricList: metricNames }, + }, + }), + enableQueryHistory ? queryHistory : [] + ), + }); + }, [ + pathPrefix, + metricNames, + enableAutocomplete, + enableLinter, + enableQueryHistory, + queryHistory, + ]); // TODO: Maybe use dynamic config compartment again as in the old UI? + + return ( + + {/* eslint-disable-next-line @typescript-eslint/no-explicit-any */} + + leftSection={ + isFormatting ? : + } + rightSection={ + + + + + + + + Query options + } + onClick={() => setShowMetricsExplorer(true)} + > + Explore metrics + + } + onClick={() => formatQuery()} + disabled={ + isFormatting || expr === "" || expr === formatResult?.data + } + > + Format expression + + } + onClick={() => setShowTree(!treeShown)} + > + {treeShown ? "Hide" : "Show"} tree view + + } + onClick={removePanel} + > + Remove query + + + + } + component={CodeMirror} + className={classes.input} + basicSetup={false} + value={expr} + onChange={setExpr} + autoFocus + ref={cmRef} + extensions={[ + baseTheme, + highlightSpecialChars(), + history(), + EditorState.allowMultipleSelections.of(true), + indentOnInput(), + bracketMatching(), + closeBrackets(), + autocompletion(), + highlightSelectionMatches(), + EditorView.lineWrapping, + keymap.of([ + ...closeBracketsKeymap, + ...defaultKeymap, + ...historyKeymap, + ...completionKeymap, + ...lintKeymap, + ]), + placeholder("Enter expression (press Shift+Enter for newlines)"), + enableSyntaxHighlighting + ? syntaxHighlighting( + theme === "light" ? promqlHighlighter : darkPromqlHighlighter + ) + : [], + promqlExtension.asExtension(), + theme === "light" ? lightTheme : darkTheme, + keymap.of([ + { + key: "Escape", + run: (v: EditorView): boolean => { + v.contentDOM.blur(); + return false; + }, + }, + ]), + Prec.highest( + keymap.of([ + { + key: "Enter", + run: (): boolean => { + executeQuery(expr); + return true; + }, + }, + { + key: "Shift-Enter", + run: insertNewlineAndIndent, + }, + ]) + ), + ]} + multiline + /> + + + setShowMetricsExplorer(false)} + title="Explore metrics" + > + + + {Array.from(Array(20), (_, i) => ( + + ))} + + } + > + { + if (cmRef.current && cmRef.current.view) { + const view = cmRef.current.view; + view.dispatch( + view.state.update({ + changes: { + from: view.state.selection.ranges[0].from, + to: view.state.selection.ranges[0].to, + insert: text, + }, + }) + ); + } + }} + close={() => setShowMetricsExplorer(false)} + /> + + + + + ); +}; + +export default ExpressionInput; diff --git a/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx.old b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx.old new file mode 100644 index 0000000000..52b10f6713 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ExpressionInput.tsx.old @@ -0,0 +1,367 @@ +import React, { FC, useState, useEffect, useRef } from "react"; + +import { + EditorView, + highlightSpecialChars, + keymap, + ViewUpdate, + placeholder, +} from "@codemirror/view"; +import { EditorState, Prec, Compartment } from "@codemirror/state"; +import { + bracketMatching, + indentOnInput, + syntaxHighlighting, + syntaxTree, +} from "@codemirror/language"; +import { + defaultKeymap, + history, + historyKeymap, + insertNewlineAndIndent, +} from "@codemirror/commands"; +import { highlightSelectionMatches } from "@codemirror/search"; +import { lintKeymap } from "@codemirror/lint"; +import { + autocompletion, + completionKeymap, + CompletionContext, + CompletionResult, + closeBrackets, + closeBracketsKeymap, +} from "@codemirror/autocomplete"; +import { + baseTheme, + lightTheme, + darkTheme, + promqlHighlighter, + darkPromqlHighlighter, +} from "../../codemirror/theme"; + +import { + CompleteStrategy, + PromQLExtension, +} from "@prometheus-io/codemirror-promql"; +import { newCompleteStrategy } from "@prometheus-io/codemirror-promql/dist/esm/complete"; + +const promqlExtension = new PromQLExtension(); + +interface ExpressionInputProps { + value: string; + onChange: (expr: string) => void; + queryHistory: string[]; + metricNames: string[]; + executeQuery: () => void; +} + +const dynamicConfigCompartment = new Compartment(); + +// Autocompletion strategy that wraps the main one and enriches +// it with past query items. +export class HistoryCompleteStrategy implements CompleteStrategy { + private complete: CompleteStrategy; + private queryHistory: string[]; + constructor(complete: CompleteStrategy, queryHistory: string[]) { + this.complete = complete; + this.queryHistory = queryHistory; + } + + promQL( + context: CompletionContext + ): Promise | CompletionResult | null { + return Promise.resolve(this.complete.promQL(context)).then((res) => { + const { state, pos } = context; + const tree = syntaxTree(state).resolve(pos, -1); + const start = res != null ? res.from : tree.from; + + if (start !== 0) { + return res; + } + + const historyItems: CompletionResult = { + from: start, + to: pos, + options: this.queryHistory.map((q) => ({ + label: q.length < 80 ? q : q.slice(0, 76).concat("..."), + detail: "past query", + apply: q, + info: q.length < 80 ? undefined : q, + })), + validFor: /^[a-zA-Z0-9_:]+$/, + }; + + if (res !== null) { + historyItems.options = historyItems.options.concat(res.options); + } + return historyItems; + }); + } +} + +const ExpressionInput: FC = ({ + value, + onChange, + queryHistory, + metricNames, +}) => { + const containerRef = useRef(null); + const viewRef = useRef(null); + const [showMetricsExplorer, setShowMetricsExplorer] = + useState(false); + const pathPrefix = usePathPrefix(); + const { theme } = useTheme(); + + const [formatError, setFormatError] = useState(null); + const [isFormatting, setIsFormatting] = useState(false); + const [exprFormatted, setExprFormatted] = useState(false); + + // (Re)initialize editor based on settings / setting changes. + useEffect(() => { + // Build the dynamic part of the config. + promqlExtension + .activateCompletion(enableAutocomplete) + .activateLinter(enableLinter) + .setComplete({ + completeStrategy: new HistoryCompleteStrategy( + newCompleteStrategy({ + remote: { + url: pathPrefix, + cache: { initialMetricList: metricNames }, + }, + }), + queryHistory + ), + }); + + let highlighter = syntaxHighlighting( + theme === "dark" ? darkPromqlHighlighter : promqlHighlighter + ); + if (theme === "dark") { + highlighter = syntaxHighlighting(darkPromqlHighlighter); + } + + const dynamicConfig = [ + enableHighlighting ? highlighter : [], + promqlExtension.asExtension(), + theme === "dark" ? darkTheme : lightTheme, + ]; + + // Create or reconfigure the editor. + const view = viewRef.current; + if (view === null) { + // If the editor does not exist yet, create it. + if (!containerRef.current) { + throw new Error("expected CodeMirror container element to exist"); + } + + const startState = EditorState.create({ + doc: value, + extensions: [ + baseTheme, + highlightSpecialChars(), + history(), + EditorState.allowMultipleSelections.of(true), + indentOnInput(), + bracketMatching(), + closeBrackets(), + autocompletion(), + highlightSelectionMatches(), + EditorView.lineWrapping, + keymap.of([ + ...closeBracketsKeymap, + ...defaultKeymap, + ...historyKeymap, + ...completionKeymap, + ...lintKeymap, + ]), + placeholder("Expression (press Shift+Enter for newlines)"), + dynamicConfigCompartment.of(dynamicConfig), + // This keymap is added without precedence so that closing the autocomplete dropdown + // via Escape works without blurring the editor. + keymap.of([ + { + key: "Escape", + run: (v: EditorView): boolean => { + v.contentDOM.blur(); + return false; + }, + }, + ]), + Prec.highest( + keymap.of([ + { + key: "Enter", + run: (v: EditorView): boolean => { + executeQuery(); + return true; + }, + }, + { + key: "Shift-Enter", + run: insertNewlineAndIndent, + }, + ]) + ), + EditorView.updateListener.of((update: ViewUpdate): void => { + if (update.docChanged) { + onExpressionChange(update.state.doc.toString()); + setExprFormatted(false); + } + }), + ], + }); + + const view = new EditorView({ + state: startState, + parent: containerRef.current, + }); + + viewRef.current = view; + + view.focus(); + } else { + // The editor already exists, just reconfigure the dynamically configured parts. + view.dispatch( + view.state.update({ + effects: dynamicConfigCompartment.reconfigure(dynamicConfig), + }) + ); + } + // "value" is only used in the initial render, so we don't want to + // re-run this effect every time that "value" changes. + // + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [ + enableAutocomplete, + enableHighlighting, + enableLinter, + executeQuery, + onExpressionChange, + queryHistory, + theme, + ]); + + const insertAtCursor = (value: string) => { + const view = viewRef.current; + if (view === null) { + return; + } + const { from, to } = view.state.selection.ranges[0]; + view.dispatch( + view.state.update({ + changes: { from, to, insert: value }, + }) + ); + }; + + const formatExpression = () => { + setFormatError(null); + setIsFormatting(true); + + fetch( + `${pathPrefix}/${API_PATH}/format_query?${new URLSearchParams({ + query: value, + })}`, + { + cache: "no-store", + credentials: "same-origin", + } + ) + .then((resp) => { + if (!resp.ok && resp.status !== 400) { + throw new Error(`format HTTP request failed: ${resp.statusText}`); + } + + return resp.json(); + }) + .then((json) => { + if (json.status !== "success") { + throw new Error(json.error || "invalid response JSON"); + } + + const view = viewRef.current; + if (view === null) { + return; + } + + view.dispatch( + view.state.update({ + changes: { from: 0, to: view.state.doc.length, insert: json.data }, + }) + ); + setExprFormatted(true); + }) + .catch((err) => { + setFormatError(err.message); + }) + .finally(() => { + setIsFormatting(false); + }); + }; + + return ( + <> + + + + {loading ? ( + + ) : ( + + )} + + +
+ + + + + + + + {formatError && ( + Error formatting expression: {formatError} + )} + + + + ); +}; + +export default ExpressionInput; diff --git a/web/ui/mantine-ui/src/pages/query/Graph.module.css b/web/ui/mantine-ui/src/pages/query/Graph.module.css new file mode 100644 index 0000000000..5b7133058c --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/Graph.module.css @@ -0,0 +1,11 @@ +.chartWrapper { + border: 1px solid + light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-5)); + border-radius: var(--mantine-radius-default); +} + +.uplotChart { + width: 100%; + height: 100%; + padding: 15px; +} diff --git a/web/ui/mantine-ui/src/pages/query/Graph.tsx b/web/ui/mantine-ui/src/pages/query/Graph.tsx new file mode 100644 index 0000000000..c25d5a98eb --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/Graph.tsx @@ -0,0 +1,192 @@ +import { FC, useEffect, useId, useState } from "react"; +import { Alert, Skeleton, Box, LoadingOverlay, Stack } from "@mantine/core"; +import { IconAlertTriangle, IconInfoCircle } from "@tabler/icons-react"; +import { RangeQueryResult } from "../../api/responseTypes/query"; +import { SuccessAPIResponse, useAPIQuery } from "../../api/api"; +import classes from "./Graph.module.css"; +import { + GraphDisplayMode, + GraphResolution, + getEffectiveResolution, +} from "../../state/queryPageSlice"; +import "uplot/dist/uPlot.min.css"; +import "./uplot.css"; +import { useElementSize } from "@mantine/hooks"; +import UPlotChart, { UPlotChartRange } from "./UPlotChart"; +import ASTNode, { nodeType } from "../../promql/ast"; +import serializeNode from "../../promql/serialize"; + +export interface GraphProps { + expr: string; + node: ASTNode | null; + endTime: number | null; + range: number; + resolution: GraphResolution; + showExemplars: boolean; + displayMode: GraphDisplayMode; + retriggerIdx: number; + onSelectRange: (start: number, end: number) => void; +} + +const Graph: FC = ({ + expr, + node, + endTime, + range, + resolution, + showExemplars, + displayMode, + retriggerIdx, + onSelectRange, +}) => { + const { ref, width } = useElementSize(); + const [rerender, setRerender] = useState(true); + + const effectiveExpr = + node === null + ? expr + : serializeNode( + node.type === nodeType.matrixSelector + ? { + type: nodeType.vectorSelector, + name: node.name, + matchers: node.matchers, + offset: node.offset, + timestamp: node.timestamp, + startOrEnd: node.startOrEnd, + } + : node + ); + + const effectiveEndTime = (endTime !== null ? endTime : Date.now()) / 1000; + const startTime = effectiveEndTime - range / 1000; + const effectiveResolution = getEffectiveResolution(resolution, range) / 1000; + + const { data, error, isFetching, isLoading, refetch } = + useAPIQuery({ + key: [useId()], + path: "/query_range", + params: { + query: effectiveExpr, + step: effectiveResolution.toString(), + start: startTime.toString(), + end: effectiveEndTime.toString(), + }, + enabled: effectiveExpr !== "", + }); + + // Bundle the chart data and the displayed range together. This has two purposes: + // 1. If we update them separately, we cause unnecessary rerenders of the uPlot chart itself. + // 2. We want to keep displaying the old range in the chart while a query for a new range + // is still in progress. + const [dataAndRange, setDataAndRange] = useState<{ + data: SuccessAPIResponse; + range: UPlotChartRange; + } | null>(null); + + useEffect(() => { + if (data !== undefined) { + setDataAndRange({ + data: data, + range: { + startTime: startTime, + endTime: effectiveEndTime, + resolution: effectiveResolution, + }, + }); + } + // We actually want to update the displayed range only once the new data is there, + // so we don't want to include any of the range-related parameters in the dependencies. + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [data]); + + // Re-execute the query when the user presses Enter (or hits the Execute button). + useEffect(() => { + effectiveExpr !== "" && refetch(); + }, [retriggerIdx, refetch, effectiveExpr, endTime, range, resolution]); + + // The useElementSize hook above only gets a valid size on the second render, so this + // is a workaround to make the component render twice after mount. + useEffect(() => { + if (dataAndRange !== null && rerender) { + setRerender(false); + } + }, [dataAndRange, rerender, setRerender]); + + // TODO: Share all the loading/error/empty data notices with the DataTable? + + // Show a skeleton only on the first load, not on subsequent ones. + if (isLoading) { + return ( + + {Array.from(Array(5), (_, i) => ( + + ))} + + ); + } + + if (error) { + return ( + } + > + {error.message} + + ); + } + + if (dataAndRange === null) { + return No data queried yet; + } + + const { result } = dataAndRange.data.data; + + if (result.length === 0) { + return ( + }> + This query returned no data. + + ); + } + + return ( + + {node !== null && node.type === nodeType.matrixSelector && ( + } + > + Note: Range vector selectors can't be graphed, so + graphing the equivalent instant vector selector instead. + + )} + + , + // }} + // styles={{ loader: { width: "100%", height: "100%" } }} + /> + + + + ); +}; + +export default Graph; diff --git a/web/ui/mantine-ui/src/pages/query/HistogramChart.module.css b/web/ui/mantine-ui/src/pages/query/HistogramChart.module.css new file mode 100644 index 0000000000..61b9546af7 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/HistogramChart.module.css @@ -0,0 +1,106 @@ +.histogramYWrapper { + display: flex; + flex-wrap: nowrap; + align-items: flex-start; + box-sizing: border-box; + margin: 15px 0; + width: 100%; +} + +.histogramYLabels { + height: 200px; + display: flex; + flex-direction: column; +} + +.histogramYLabel { + margin-right: 8px; + height: 25%; + text-align: right; +} + +.histogramXWrapper { + flex: 1 1 auto; + display: flex; + flex-direction: column; + margin-right: 8px; +} + +.histogramXLabels { + display: flex; +} + +.histogramXLabel { + position: relative; + margin-top: 5px; + width: 100%; + text-align: right; +} + +.histogramContainer { + margin-top: 9px; + position: relative; + height: 200px; +} + +.histogramAxes { + position: absolute; + width: 100%; + height: 100%; + border-bottom: 1px solid var(--mantine-color-gray-7); + border-left: 1px solid var(--mantine-color-gray-7); + pointer-events: none; +} + +.histogramYGrid { + position: absolute; + border-bottom: 1px dashed var(--mantine-color-gray-6); + width: 100%; +} + +.histogramYTick { + position: absolute; + border-bottom: 1px solid var(--mantine-color-gray-7); + left: -5px; + height: 0px; + width: 5px; +} + +.histogramXGrid { + position: absolute; + border-left: 1px dashed var(--mantine-color-gray-6); + height: 100%; + width: 0; +} + +.histogramXTick { + position: absolute; + border-left: 1px solid var(--mantine-color-gray-7); + height: 5px; + width: 0; + bottom: -5px; +} + +.histogramBucketSlot { + position: absolute; + bottom: 0; + top: 0; +} + +.histogramBucket { + position: absolute; + width: 100%; + bottom: 0; + background-color: #2db453; + border: 1px solid #77de94; + pointer-events: none; +} + +.histogramBucketSlot:hover { + background-color: var(--mantine-color-gray-4); +} + +.histogramBucketSlot:hover .histogramBucket { + background-color: #88e1a1; + border: 1px solid #77de94; +} diff --git a/web/ui/mantine-ui/src/pages/query/HistogramChart.tsx b/web/ui/mantine-ui/src/pages/query/HistogramChart.tsx new file mode 100644 index 0000000000..a574d1df44 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/HistogramChart.tsx @@ -0,0 +1,309 @@ +import React, { FC } from "react"; +import { Histogram } from "../../types/types"; +import { + calculateDefaultExpBucketWidth, + findMinPositive, + findMaxNegative, + findZeroAxisLeft, + showZeroAxis, + findZeroBucket, + bucketRangeString, +} from "./HistogramHelpers"; +import classes from "./HistogramChart.module.css"; +import { Tooltip } from "@mantine/core"; + +interface HistogramChartProps { + histogram: Histogram; + index: number; + scale: string; +} + +const HistogramChart: FC = ({ + index, + histogram, + scale, +}) => { + const { buckets } = histogram; + if (!buckets || buckets.length === 0) { + return
No data
; + } + const formatter = Intl.NumberFormat("en", { notation: "compact" }); + + // For linear scales, the count of a histogram bucket is represented by its area rather than its height. This means it considers + // both the count and the range (width) of the bucket. For this, we can set the height of the bucket proportional + // to its frequency density (fd). The fd is the count of the bucket divided by the width of the bucket. + const fds = []; + for (const bucket of buckets) { + const left = parseFloat(bucket[1]); + const right = parseFloat(bucket[2]); + const count = parseFloat(bucket[3]); + const width = right - left; + + // This happens when a user want observations of precisely zero to be included in the zero bucket + if (width === 0) { + fds.push(0); + continue; + } + fds.push(count / width); + } + const fdMax = Math.max(...fds); + + const first = buckets[0]; + const last = buckets[buckets.length - 1]; + + const rangeMax = parseFloat(last[2]); + const rangeMin = parseFloat(first[1]); + const countMax = Math.max(...buckets.map((b) => parseFloat(b[3]))); + + const defaultExpBucketWidth = calculateDefaultExpBucketWidth(last, buckets); + + const maxPositive = rangeMax > 0 ? rangeMax : 0; + const minPositive = findMinPositive(buckets); + const maxNegative = findMaxNegative(buckets); + const minNegative = parseFloat(first[1]) < 0 ? parseFloat(first[1]) : 0; + + // Calculate the borders of positive and negative buckets in the exponential scale from left to right + const startNegative = + minNegative !== 0 ? -Math.log(Math.abs(minNegative)) : 0; + const endNegative = maxNegative !== 0 ? -Math.log(Math.abs(maxNegative)) : 0; + const startPositive = minPositive !== 0 ? Math.log(minPositive) : 0; + const endPositive = maxPositive !== 0 ? Math.log(maxPositive) : 0; + + // Calculate the width of negative, positive, and all exponential bucket ranges on the x-axis + const xWidthNegative = endNegative - startNegative; + const xWidthPositive = endPositive - startPositive; + const xWidthTotal = xWidthNegative + defaultExpBucketWidth + xWidthPositive; + + const zeroBucketIdx = findZeroBucket(buckets); + const zeroAxisLeft = findZeroAxisLeft( + scale, + rangeMin, + rangeMax, + minPositive, + maxNegative, + zeroBucketIdx, + xWidthNegative, + xWidthTotal, + defaultExpBucketWidth, + ); + const zeroAxis = showZeroAxis(zeroAxisLeft); + + return ( +
+
+ {[1, 0.75, 0.5, 0.25].map((i) => ( +
+ {scale === "linear" ? "" : formatter.format(countMax * i)} +
+ ))} +
+ 0 +
+
+
+
+ {[0, 0.25, 0.5, 0.75, 1].map((i) => ( + +
+
+
+
+ ))} +
+
+
+
+ + + +
+
+
+
+ +
+ {formatter.format(rangeMin)} +
+ {rangeMin < 0 && zeroAxis && ( +
+ 0 +
+ )} +
+ {formatter.format(rangeMax)} +
+
+
+
+
+
+ ); +}; + +interface RenderHistogramProps { + buckets: [number, string, string, string][]; + scale: string; + rangeMin: number; + rangeMax: number; + index: number; + fds: number[]; + fdMax: number; + countMax: number; + defaultExpBucketWidth: number; + minPositive: number; + maxNegative: number; + startPositive: number; + startNegative: number; + xWidthNegative: number; + xWidthTotal: number; +} + +const RenderHistogramBars: FC = ({ + buckets, + scale, + rangeMin, + rangeMax, + index, + fds, + fdMax, + countMax, + defaultExpBucketWidth, + minPositive, + maxNegative, + startPositive, + startNegative, + xWidthNegative, + xWidthTotal, +}) => { + return ( + + {buckets.map((b, bIdx) => { + const left = parseFloat(b[1]); + const right = parseFloat(b[2]); + const count = parseFloat(b[3]); + const bucketIdx = `bucket-${index}-${bIdx}-${Math.ceil(parseFloat(b[3]) * 100)}`; + + const logWidth = Math.abs( + Math.log(Math.abs(right)) - Math.log(Math.abs(left)), + ); + const expBucketWidth = + logWidth === 0 ? defaultExpBucketWidth : logWidth; + + let bucketWidth = ""; + let bucketLeft = ""; + let bucketHeight = ""; + + switch (scale) { + case "linear": { + bucketWidth = ((right - left) / (rangeMax - rangeMin)) * 100 + "%"; + bucketLeft = + ((left - rangeMin) / (rangeMax - rangeMin)) * 100 + "%"; + if (left === 0 && right === 0) { + bucketLeft = "0%"; // do not render zero-width zero bucket + bucketWidth = "0%"; + } + bucketHeight = (fds[bIdx] / fdMax) * 100 + "%"; + break; + } + case "exponential": { + let adjust = 0; // if buckets are all positive/negative, we need to remove the width of the zero bucket + if (minPositive === 0 || maxNegative === 0) { + adjust = defaultExpBucketWidth; + } + bucketWidth = (expBucketWidth / (xWidthTotal - adjust)) * 100 + "%"; + if (left < 0) { + // negative buckets boundary + bucketLeft = + (-(Math.log(Math.abs(left)) + startNegative) / + (xWidthTotal - adjust)) * + 100 + + "%"; + } else { + // positive buckets boundary + bucketLeft = + ((Math.log(left) - + startPositive + + defaultExpBucketWidth + + xWidthNegative - + adjust) / + (xWidthTotal - adjust)) * + 100 + + "%"; + } + if (left < 0 && right > 0) { + // if the bucket crosses the zero axis + bucketLeft = (xWidthNegative / xWidthTotal) * 100 + "%"; + } + if (left === 0 && right === 0) { + // do not render zero width zero bucket + bucketLeft = "0%"; + bucketWidth = "0%"; + } + + bucketHeight = (count / countMax) * 100 + "%"; + break; + } + default: + throw new Error("Invalid scale"); + } + + return ( + +
+
+
+
+ ); + })} +
+ ); +}; + +export default HistogramChart; diff --git a/web/ui/mantine-ui/src/pages/query/HistogramHelpers.ts b/web/ui/mantine-ui/src/pages/query/HistogramHelpers.ts new file mode 100644 index 0000000000..12dd3e1bd2 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/HistogramHelpers.ts @@ -0,0 +1,144 @@ +// Calculates a default width of exponential histogram bucket ranges. If the last bucket is [0, 0], +// the width is calculated using the second to last bucket. returns error if the last bucket is [-0, 0], +export function calculateDefaultExpBucketWidth( + last: [number, string, string, string], + buckets: [number, string, string, string][] +): number { + if (parseFloat(last[2]) === 0 || parseFloat(last[1]) === 0) { + if (buckets.length > 1) { + return Math.abs( + Math.log(Math.abs(parseFloat(buckets[buckets.length - 2][2]))) - + Math.log(Math.abs(parseFloat(buckets[buckets.length - 2][1]))) + ); + } else { + throw new Error( + "Only one bucket in histogram ([-0, 0]). Cannot calculate defaultExpBucketWidth." + ); + } + } else { + return Math.abs( + Math.log(Math.abs(parseFloat(last[2]))) - + Math.log(Math.abs(parseFloat(last[1]))) + ); + } +} + +// Finds the lowest positive value from the bucket ranges +// Returns 0 if no positive values are found or if there are no buckets. +export function findMinPositive(buckets: [number, string, string, string][]) { + if (!buckets || buckets.length === 0) { + return 0; // no buckets + } + for (let i = 0; i < buckets.length; i++) { + const right = parseFloat(buckets[i][2]); + const left = parseFloat(buckets[i][1]); + + if (left > 0) { + return left; + } + if (left < 0 && right > 0) { + return right; + } + if (i === buckets.length - 1) { + if (right > 0) { + return right; + } + } + } + return 0; // all buckets are negative +} + +// Finds the lowest negative value from the bucket ranges +// Returns 0 if no negative values are found or if there are no buckets. +export function findMaxNegative(buckets: [number, string, string, string][]) { + if (!buckets || buckets.length === 0) { + return 0; // no buckets + } + for (let i = 0; i < buckets.length; i++) { + const right = parseFloat(buckets[i][2]); + const left = parseFloat(buckets[i][1]); + const prevRight = i > 0 ? parseFloat(buckets[i - 1][2]) : 0; + + if (right >= 0) { + if (i === 0) { + if (left < 0) { + return left; // return the first negative bucket + } + return 0; // all buckets are positive + } + return prevRight; // return the last negative bucket + } + } + console.log("findmaxneg returning: ", buckets[buckets.length - 1][2]); + return parseFloat(buckets[buckets.length - 1][2]); // all buckets are negative +} + +// Calculates the left position of the zero axis as a percentage string. +export function findZeroAxisLeft( + scale: string, + rangeMin: number, + rangeMax: number, + minPositive: number, + maxNegative: number, + zeroBucketIdx: number, + widthNegative: number, + widthTotal: number, + expBucketWidth: number +): string { + if (scale === "linear") { + return ((0 - rangeMin) / (rangeMax - rangeMin)) * 100 + "%"; + } else { + if (maxNegative === 0) { + return "0%"; + } + if (minPositive === 0) { + return "100%"; + } + if (zeroBucketIdx === -1) { + // if there is no zero bucket, we must zero axis between buckets around zero + return (widthNegative / widthTotal) * 100 + "%"; + } + if ((widthNegative + 0.5 * expBucketWidth) / widthTotal > 0) { + return ((widthNegative + 0.5 * expBucketWidth) / widthTotal) * 100 + "%"; + } else { + return "0%"; + } + } +} + +// Determines if the zero axis should be shown such that the zero label does not overlap with the range labels. +// The zero axis is shown if it is between 5% and 95% of the graph. +export function showZeroAxis(zeroAxisLeft: string) { + const axisNumber = parseFloat(zeroAxisLeft.slice(0, -1)); + if (5 < axisNumber && axisNumber < 95) { + return true; + } + return false; +} + +// Finds the index of the bucket whose range includes zero +export function findZeroBucket( + buckets: [number, string, string, string][] +): number { + for (let i = 0; i < buckets.length; i++) { + const left = parseFloat(buckets[i][1]); + const right = parseFloat(buckets[i][2]); + if (left <= 0 && right >= 0) { + return i; + } + } + return -1; +} + +const leftDelim = (br: number): string => (br === 3 || br === 1 ? "[" : "("); +const rightDelim = (br: number): string => (br === 3 || br === 0 ? "]" : ")"); + +export const bucketRangeString = ([ + boundaryRule, + leftBoundary, + rightBoundary, + + _, +]: [number, string, string, string]): string => { + return `${leftDelim(boundaryRule)}${leftBoundary} -> ${rightBoundary}${rightDelim(boundaryRule)}`; +}; diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.module.css b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.module.css new file mode 100644 index 0000000000..cd058e6134 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.module.css @@ -0,0 +1,18 @@ +.labelValue { + cursor: pointer; +} + +.labelValue:hover { + background-color: light-dark( + var(--mantine-color-gray-2), + var(--mantine-color-gray-8) + ); + border-radius: var(--mantine-radius-sm); +} + +.promqlPill { + background-color: light-dark( + var(--mantine-color-gray-1), + var(--mantine-color-dark-5) + ); +} diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.tsx b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.tsx new file mode 100644 index 0000000000..d18c017b18 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/LabelsExplorer.tsx @@ -0,0 +1,423 @@ +import { FC, useMemo, useState } from "react"; +import { + LabelMatcher, + matchType, + nodeType, + VectorSelector, +} from "../../../promql/ast"; +import { + Alert, + Anchor, + Autocomplete, + Box, + Button, + CopyButton, + Group, + List, + Pill, + Text, + SegmentedControl, + Select, + Skeleton, + Stack, + Table, + rem, +} from "@mantine/core"; +import { escapeString } from "../../../lib/escapeString"; +import serializeNode from "../../../promql/serialize"; +import { SeriesResult } from "../../../api/responseTypes/series"; +import { useAPIQuery } from "../../../api/api"; +import { Metric } from "../../../api/responseTypes/query"; +import { + IconAlertTriangle, + IconArrowLeft, + IconCheck, + IconCodePlus, + IconCopy, + IconX, +} from "@tabler/icons-react"; +import { formatNode } from "../../../promql/format"; +import classes from "./LabelsExplorer.module.css"; +import { buttonIconStyle } from "../../../styles"; + +type LabelsExplorerProps = { + metricName: string; + insertText: (_text: string) => void; + hideLabelsExplorer: () => void; +}; + +const LabelsExplorer: FC = ({ + metricName, + insertText, + hideLabelsExplorer, +}) => { + const [expandedLabels, setExpandedLabels] = useState([]); + const [matchers, setMatchers] = useState([]); + const [newMatcher, setNewMatcher] = useState(null); + const [sortByCard, setSortByCard] = useState(true); + + const removeMatcher = (name: string) => { + setMatchers(matchers.filter((m) => m.name !== name)); + }; + + const addMatcher = () => { + if (newMatcher === null) { + throw new Error("tried to add null label matcher"); + } + + setMatchers([...matchers, newMatcher]); + setNewMatcher(null); + }; + + const matcherBadge = (m: LabelMatcher) => ( + { + removeMatcher(m.name); + }} + className={classes.promqlPill} + > + + {m.name} + {m.type} + "{escapeString(m.value)}" + + + ); + + const selector: VectorSelector = { + type: nodeType.vectorSelector, + name: metricName, + matchers, + offset: 0, + timestamp: null, + startOrEnd: null, + }; + + // Based on the selected pool (if any), load the list of targets. + const { data, error, isLoading } = useAPIQuery({ + path: `/series`, + params: { + "match[]": serializeNode(selector), + }, + }); + + // When new series data is loaded, update the corresponding label cardinality and example data. + const [numSeries, sortedLabelCards, labelExamples] = useMemo(() => { + const labelCardinalities: Record = {}; + const labelExamples: Record = + {}; + + const labelValuesByName: Record> = {}; + + if (data !== undefined) { + data.data.forEach((series: Metric) => { + Object.entries(series).forEach(([ln, lv]) => { + if (ln !== "__name__") { + if (!(ln in labelValuesByName)) { + labelValuesByName[ln] = { [lv]: 1 }; + } else { + if (!(lv in labelValuesByName[ln])) { + labelValuesByName[ln][lv] = 1; + } else { + labelValuesByName[ln][lv]++; + } + } + } + }); + }); + + Object.entries(labelValuesByName).forEach(([ln, lvs]) => { + labelCardinalities[ln] = Object.keys(lvs).length; + // labelExamples[ln] = Array.from({ length: Math.min(5, lvs.size) }, (i => () => i.next().value)(lvs.keys())); + // Sort label values by their number of occurrences within this label name. + labelExamples[ln] = Object.entries(lvs) + .sort(([, aCnt], [, bCnt]) => bCnt - aCnt) + .map(([lv, cnt]) => ({ value: lv, count: cnt })); + }); + } + + // Sort labels by cardinality if desired, so the labels with the most values are at the top. + const sortedLabelCards = Object.entries(labelCardinalities).sort((a, b) => + sortByCard ? b[1] - a[1] : 0 + ); + + return [data?.data.length, sortedLabelCards, labelExamples]; + }, [data, sortByCard]); + + if (error) { + return ( + } + > + Error: {error.message} + + ); + } + + return ( + + + {/* Selector */} + + + Selector: + + + + + {formatNode(selector, false)} + + + + + + + {({ copied, copy }) => ( + + )} + + + + {/* Filters */} + + + Filters: + + + {matchers.length > 0 ? ( + {matchers.map((m) => matcherBadge(m))} + ) : ( + <>No label filters + )} + + {/* Number of series */} + + + Results: + + <>{numSeries !== undefined ? `${numSeries} series` : "loading..."} + + + {/* Sort order */} + + + + + setSortByCard(value === "cardinality")} + data={[ + { label: "By cardinality", value: "cardinality" }, + { label: "Alphabetic", value: "alphabetic" }, + ]} + /> + + + {/* Labels and their values */} + {isLoading ? ( + + {Array.from(Array(10), (_, i) => ( + + ))} + + ) : ( + + + + Label + Values + + + + {sortedLabelCards.map(([ln, card]) => ( + + +
{ + // Without this, the page gets reloaded for forms that only have a single input field, see + // https://stackoverflow.com/questions/1370021/why-does-forms-with-single-input-field-submit-upon-pressing-enter-key-in-input. + e.preventDefault(); + }} + > + + + {ln} + + {matchers.some((m) => m.name === ln) ? ( + matcherBadge(matchers.find((m) => m.name === ln)!) + ) : newMatcher?.name === ln ? ( + +
+ )} +
+ ); +}; + +export default LabelsExplorer; diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.module.css b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.module.css new file mode 100644 index 0000000000..5be156d804 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.module.css @@ -0,0 +1,7 @@ +.typeLabel { + color: light-dark(#008080, #14bfad); +} + +.helpLabel { + color: light-dark(#800000, #ff8585); +} diff --git a/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.tsx b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.tsx new file mode 100644 index 0000000000..f0f176c15c --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/MetricsExplorer/MetricsExplorer.tsx @@ -0,0 +1,185 @@ +import { FC, useMemo, useState } from "react"; +import { useSuspenseAPIQuery } from "../../../api/api"; +import { MetadataResult } from "../../../api/responseTypes/metadata"; +import { ActionIcon, Group, Stack, Table, TextInput } from "@mantine/core"; +import React from "react"; +import { Fuzzy } from "@nexucis/fuzzy"; +import sanitizeHTML from "sanitize-html"; +import { IconCodePlus, IconCopy, IconZoomCode } from "@tabler/icons-react"; +import LabelsExplorer from "./LabelsExplorer"; +import { useDebouncedValue } from "@mantine/hooks"; +import classes from "./MetricsExplorer.module.css"; +import CustomInfiniteScroll from "../../../components/CustomInfiniteScroll"; + +const fuz = new Fuzzy({ + pre: '', + post: "", + shouldSort: true, +}); + +const sanitizeOpts = { + allowedTags: ["b"], + allowedAttributes: { b: ["style"] }, +}; + +type MetricsExplorerProps = { + metricNames: string[]; + insertText: (text: string) => void; + close: () => void; +}; + +const getSearchMatches = (input: string, expressions: string[]) => + fuz.filter(input.replace(/ /g, ""), expressions, { + pre: '', + post: "", + }); + +const MetricsExplorer: FC = ({ + metricNames, + insertText, + close, +}) => { + // Fetch the alerting rules data. + const { data } = useSuspenseAPIQuery({ + path: `/metadata`, + }); + const [selectedMetric, setSelectedMetric] = useState(null); + + const [filterText, setFilterText] = useState(""); + const [debouncedFilterText] = useDebouncedValue(filterText, 250); + + const searchMatches = useMemo(() => { + if (debouncedFilterText === "") { + return metricNames.map((m) => ({ original: m, rendered: m })); + } + return getSearchMatches(debouncedFilterText, metricNames); + }, [debouncedFilterText, metricNames]); + + const getMeta = (m: string) => + data.data[m.replace(/(_count|_sum|_bucket)$/, "")] || [ + { help: "unknown", type: "unknown", unit: "unknown" }, + ]; + + if (selectedMetric !== null) { + return ( + { + insertText(text); + close(); + }} + hideLabelsExplorer={() => setSelectedMetric(null)} + /> + ); + } + + return ( + + ) => + setFilterText(e.target.value) + } + autoFocus + /> + ( + + + + Metric + Type + Help + + + + {items.map((m) => ( + + + + {debouncedFilterText === "" ? ( + m.original + ) : ( +
+ )} + + { + setSelectedMetric(m.original); + }} + > + + + { + insertText(m.original); + close(); + }} + > + + + { + navigator.clipboard.writeText(m.original); + }} + > + + + + + + + {getMeta(m.original).map((meta, idx) => ( + + {meta.type} +
+
+ ))} +
+ + {getMeta(m.original).map((meta, idx) => ( + + {meta.help} +
+
+ ))} +
+ + ))} + +
+ )} + /> +
+ ); +}; + +export default MetricsExplorer; diff --git a/web/ui/mantine-ui/src/pages/query/QueryPage.tsx b/web/ui/mantine-ui/src/pages/query/QueryPage.tsx new file mode 100644 index 0000000000..71c969daf8 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/QueryPage.tsx @@ -0,0 +1,136 @@ +import { Alert, Box, Button, Stack } from "@mantine/core"; +import { + IconAlertCircle, + IconAlertTriangle, + IconPlus, +} from "@tabler/icons-react"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + addPanel, + newDefaultPanel, + setPanels, +} from "../../state/queryPageSlice"; +import Panel from "./QueryPanel"; +import { LabelValuesResult } from "../../api/responseTypes/labelValues"; +import { useAPIQuery } from "../../api/api"; +import { useEffect, useState } from "react"; +import { InstantQueryResult } from "../../api/responseTypes/query"; +import { humanizeDuration } from "../../lib/formatTime"; +import { decodePanelOptionsFromURLParams } from "./urlStateEncoding"; +import { buttonIconStyle } from "../../styles"; + +export default function QueryPage() { + const panels = useAppSelector((state) => state.queryPage.panels); + const dispatch = useAppDispatch(); + const [timeDelta, setTimeDelta] = useState(0); + + // Update the panels whenever the URL params change. + useEffect(() => { + const handleURLChange = () => { + const panels = decodePanelOptionsFromURLParams(window.location.search); + if (panels.length > 0) { + dispatch(setPanels(panels)); + } + }; + + handleURLChange(); + + window.addEventListener("popstate", handleURLChange); + + return () => { + window.removeEventListener("popstate", handleURLChange); + }; + }, [dispatch]); + + // Clear the query page when navigating away from it. + useEffect(() => { + return () => { + dispatch(setPanels([newDefaultPanel()])); + }; + }, [dispatch]); + + const { data: metricNamesResult, error: metricNamesError } = + useAPIQuery({ + path: "/label/__name__/values", + }); + + const { data: timeResult, error: timeError } = + useAPIQuery({ + path: "/query", + params: { + query: "time()", + }, + }); + + useEffect(() => { + if (!timeResult) { + return; + } + + if (timeResult.data.resultType !== "scalar") { + throw new Error("Unexpected result type from time query"); + } + + const browserTime = new Date().getTime() / 1000; + const serverTime = timeResult.data.result[0]; + setTimeDelta(Math.abs(browserTime - serverTime)); + }, [timeResult]); + + return ( + + {metricNamesError && ( + } + color="red" + title="Error fetching metrics list" + > + Unable to fetch list of metric names: {metricNamesError.message} + + )} + {timeError && ( + } + color="red" + title="Error fetching server time" + > + {timeError.message} + + )} + {timeDelta > 30 && ( + } + onClose={() => setTimeDelta(0)} + > + Detected a time difference of{" "} + {humanizeDuration(timeDelta * 1000)} between your + browser and the server. You may see unexpected time-shifted query + results due to the time drift. + + )} + + + {panels.map((p, idx) => ( + + ))} + + + + + ); +} diff --git a/web/ui/mantine-ui/src/pages/query/QueryPanel.module.css b/web/ui/mantine-ui/src/pages/query/QueryPanel.module.css new file mode 100644 index 0000000000..c42dfcb0ec --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/QueryPanel.module.css @@ -0,0 +1,14 @@ +.input { + font-family: "DejaVu Sans Mono"; + padding-top: 7px; + transition: none; + + &:focus-within { + outline: rem(2px) solid var(--mantine-color-blue-filled); + border-color: transparent; + } + + &:placeholder-shown { + font-family: unset; + } +} diff --git a/web/ui/mantine-ui/src/pages/query/QueryPanel.tsx b/web/ui/mantine-ui/src/pages/query/QueryPanel.tsx new file mode 100644 index 0000000000..24eb8c59cb --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/QueryPanel.tsx @@ -0,0 +1,337 @@ +import { + Group, + Tabs, + Center, + Space, + Box, + SegmentedControl, + Stack, + Skeleton, +} from "@mantine/core"; +import { + IconChartAreaFilled, + IconChartLine, + IconGraph, + IconInfoCircle, + IconTable, +} from "@tabler/icons-react"; +import { FC, Suspense, useCallback, useMemo, useState } from "react"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + addQueryToHistory, + GraphDisplayMode, + GraphResolution, + removePanel, + setExpr, + setShowTree, + setVisualizer, +} from "../../state/queryPageSlice"; +import TimeInput from "./TimeInput"; +import RangeInput from "./RangeInput"; +import ExpressionInput from "./ExpressionInput"; +import Graph from "./Graph"; +import ResolutionInput from "./ResolutionInput"; +import TableTab from "./TableTab"; +import TreeView from "./TreeView"; +import ErrorBoundary from "../../components/ErrorBoundary"; +import ASTNode from "../../promql/ast"; +import serializeNode from "../../promql/serialize"; +import ExplainView from "./ExplainViews/ExplainView"; + +export interface PanelProps { + idx: number; + metricNames: string[]; +} + +// TODO: This is duplicated everywhere, unify it. +const iconStyle = { width: "0.9rem", height: "0.9rem" }; + +const QueryPanel: FC = ({ idx, metricNames }) => { + // Used to indicate to the selected display component that it should retrigger + // the query, even if the expression has not changed (e.g. when the user presses + // the "Execute" button or hits again). + const [retriggerIdx, setRetriggerIdx] = useState(0); + + const panel = useAppSelector((state) => state.queryPage.panels[idx]); + const dispatch = useAppDispatch(); + + const [selectedNode, setSelectedNode] = useState<{ + id: string; + node: ASTNode; + } | null>(null); + + const expr = useMemo( + () => + selectedNode !== null ? serializeNode(selectedNode.node) : panel.expr, + [selectedNode, panel.expr] + ); + + const onSelectRange = useCallback( + (start: number, end: number) => + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + range: (end - start) * 1000, + endTime: end * 1000, + }, + }) + ), + // TODO: How to have panel.visualizer in the dependencies, but not re-create + // the callback every time it changes by the callback's own update? This leads + // to extra renders of the plot further down. + [dispatch, idx, panel.visualizer] + ); + + return ( + + { + setRetriggerIdx((idx) => idx + 1); + dispatch(setExpr({ idx, expr })); + + if (!metricNames.includes(expr) && expr.trim() !== "") { + dispatch(addQueryToHistory(expr)); + } + }} + treeShown={panel.showTree} + setShowTree={(showTree: boolean) => { + dispatch(setShowTree({ idx, showTree })); + if (!showTree) { + setSelectedNode(null); + } + }} + removePanel={() => { + dispatch(removePanel(idx)); + }} + /> + {panel.expr.trim() !== "" && panel.showTree && ( + + + {Array.from(Array(20), (_, i) => ( + + ))} + + } + > + { + dispatch(setShowTree({ idx, showTree: false })); + setSelectedNode(null); + }} + /> + + + )} + + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + activeTab: v as "table" | "graph", + }, + }) + ) + } + keepMounted={false} + > + + }> + Table + + }> + Graph + + } + > + Explain + + + + + + + + + + dispatch( + setVisualizer({ + idx, + visualizer: { ...panel.visualizer, range }, + }) + ) + } + /> + + dispatch( + setVisualizer({ + idx, + visualizer: { ...panel.visualizer, endTime: time }, + }) + ) + } + /> + { + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + resolution: res, + }, + }) + ); + }} + /> + + + + {/* */} + + + dispatch( + setVisualizer({ + idx, + visualizer: { + ...panel.visualizer, + displayMode: value as GraphDisplayMode, + }, + }) + ) + } + value={panel.visualizer.displayMode} + data={[ + { + value: GraphDisplayMode.Lines, + label: ( +
+ + Unstacked +
+ ), + }, + { + value: GraphDisplayMode.Stacked, + label: ( +
+ + Stacked +
+ ), + }, + // { + // value: GraphDisplayMode.Heatmap, + // label: ( + //
+ // + // Heatmap + //
+ // ), + // }, + ]} + /> +
+
+ + +
+ + + + {Array.from(Array(20), (_, i) => ( + + ))} + + } + > + { + dispatch(setShowTree({ idx, showTree: true })); + }} + /> + + + +
+
+ ); +}; + +export default QueryPanel; diff --git a/web/ui/mantine-ui/src/pages/query/RangeInput.tsx b/web/ui/mantine-ui/src/pages/query/RangeInput.tsx new file mode 100644 index 0000000000..ec70cff969 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/RangeInput.tsx @@ -0,0 +1,120 @@ +import { FC, useEffect, useState } from "react"; +import { ActionIcon, Group, TextInput } from "@mantine/core"; +import { IconMinus, IconPlus } from "@tabler/icons-react"; +import { + formatPrometheusDuration, + parsePrometheusDuration, +} from "../../lib/formatTime"; + +interface RangeInputProps { + range: number; + onChangeRange: (range: number) => void; +} + +const iconStyle = { width: "0.9rem", height: "0.9rem" }; + +const rangeSteps = [ + 1, + 10, + 60, + 5 * 60, + 15 * 60, + 30 * 60, + 60 * 60, + 2 * 60 * 60, + 6 * 60 * 60, + 12 * 60 * 60, + 24 * 60 * 60, + 48 * 60 * 60, + 7 * 24 * 60 * 60, + 14 * 24 * 60 * 60, + 28 * 24 * 60 * 60, + 56 * 24 * 60 * 60, + 112 * 24 * 60 * 60, + 182 * 24 * 60 * 60, + 365 * 24 * 60 * 60, + 730 * 24 * 60 * 60, +].map((s) => s * 1000); + +const RangeInput: FC = ({ range, onChangeRange }) => { + // TODO: Make sure that when "range" changes externally (like via the URL), + // the input is updated, either via useEffect() or some better architecture. + const [rangeInput, setRangeInput] = useState( + formatPrometheusDuration(range) + ); + + useEffect(() => { + setRangeInput(formatPrometheusDuration(range)); + }, [range]); + + const onChangeRangeInput = (rangeText: string): void => { + const newRange = parsePrometheusDuration(rangeText); + if (newRange === null) { + setRangeInput(formatPrometheusDuration(range)); + } else { + onChangeRange(newRange); + } + }; + + const increaseRange = (): void => { + for (const step of rangeSteps) { + if (range < step) { + setRangeInput(formatPrometheusDuration(step)); + onChangeRange(step); + return; + } + } + }; + + const decreaseRange = (): void => { + for (const step of rangeSteps.slice().reverse()) { + if (range > step) { + setRangeInput(formatPrometheusDuration(step)); + onChangeRange(step); + return; + } + } + }; + + return ( + + setRangeInput(event.currentTarget.value)} + onBlur={() => onChangeRangeInput(rangeInput)} + onKeyDown={(event) => + event.key === "Enter" && onChangeRangeInput(rangeInput) + } + aria-label="Range" + style={{ width: `calc(44px + ${rangeInput.length + 3}ch)` }} + leftSection={ + + + + } + rightSection={ + + + + } + leftSectionPointerEvents="all" + rightSectionPointerEvents="all" + /> + + ); +}; + +export default RangeInput; diff --git a/web/ui/mantine-ui/src/pages/query/ResolutionInput.tsx b/web/ui/mantine-ui/src/pages/query/ResolutionInput.tsx new file mode 100644 index 0000000000..8d19deb197 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/query/ResolutionInput.tsx @@ -0,0 +1,142 @@ +import { FC, useState } from "react"; +import { Select, TextInput } from "@mantine/core"; +import { + formatPrometheusDuration, + parsePrometheusDuration, +} from "../../lib/formatTime"; +import { + GraphResolution, + getEffectiveResolution, +} from "../../state/queryPageSlice"; + +interface ResolutionInputProps { + resolution: GraphResolution; + range: number; + onChangeResolution: (resolution: GraphResolution) => void; +} + +const ResolutionInput: FC = ({ + resolution, + range, + onChangeResolution, +}) => { + const [customResolutionInput, setCustomResolutionInput] = useState( + formatPrometheusDuration(getEffectiveResolution(resolution, range)) + ); + + const onChangeCustomResolutionInput = (resText: string): void => { + const newResolution = parsePrometheusDuration(resText); + + if (resolution.type === "custom" && newResolution === resolution.step) { + // Nothing changed. + return; + } + + if (newResolution === null) { + setCustomResolutionInput( + formatPrometheusDuration(getEffectiveResolution(resolution, range)) + ); + } else { + onChangeResolution({ type: "custom", step: newResolution }); + } + }; + + return ( + <> + { + setScrapePool(value); + showLimitAlert && dispatch(setShowLimitAlert(false)); + }} + searchable + /> + + o === "active" ? badgeClasses.healthOk : badgeClasses.healthInfo + } + placeholder="Filter by state" + values={(stateFilter?.filter((v) => v !== null) as string[]) || []} + onChange={(values) => setStateFilter(values)} + /> + } + placeholder="Filter by labels" + value={searchFilter || ""} + onChange={(event) => setSearchFilter(event.currentTarget.value)} + > + 0 + ? "Expand all pools" + : "Collapse all pools" + } + variant="light" + onClick={() => + dispatch( + setCollapsedPools(collapsedPools.length > 0 ? [] : scrapePools) + ) + } + > + {collapsedPools.length > 0 ? ( + + ) : ( + + )} + + + + + + {Array.from(Array(10), (_, i) => ( + + ))} + + } + > + + + + + ); +} diff --git a/web/ui/mantine-ui/src/pages/service-discovery/ServiceDiscoveryPoolsList.tsx b/web/ui/mantine-ui/src/pages/service-discovery/ServiceDiscoveryPoolsList.tsx new file mode 100644 index 0000000000..798783da2f --- /dev/null +++ b/web/ui/mantine-ui/src/pages/service-discovery/ServiceDiscoveryPoolsList.tsx @@ -0,0 +1,356 @@ +import { + Accordion, + Alert, + Anchor, + Group, + RingProgress, + Stack, + Table, + Text, +} from "@mantine/core"; +import { KVSearch } from "@nexucis/kvsearch"; +import { IconInfoCircle } from "@tabler/icons-react"; +import { useSuspenseAPIQuery } from "../../api/api"; +import { + DroppedTarget, + Labels, + Target, + TargetsResult, +} from "../../api/responseTypes/targets"; +import { FC, useMemo } from "react"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + setCollapsedPools, + setShowLimitAlert, +} from "../../state/serviceDiscoveryPageSlice"; +import CustomInfiniteScroll from "../../components/CustomInfiniteScroll"; + +import { useDebouncedValue } from "@mantine/hooks"; +import { targetPoolDisplayLimit } from "./ServiceDiscoveryPage"; +import { BooleanParam, useQueryParam, withDefault } from "use-query-params"; +import { LabelBadges } from "../../components/LabelBadges"; + +type TargetLabels = { + discoveredLabels: Labels; + labels: Labels; + isDropped: boolean; +}; + +type ScrapePool = { + targets: TargetLabels[]; + active: number; + total: number; +}; + +type ScrapePools = { + [scrapePool: string]: ScrapePool; +}; + +const activeTargetKVSearch = new KVSearch({ + shouldSort: true, + indexedKeys: [ + "labels", + "discoveredLabels", + ["discoveredLabels", /.*/], + ["labels", /.*/], + ], +}); + +const droppedTargetKVSearch = new KVSearch({ + shouldSort: true, + indexedKeys: ["discoveredLabels", ["discoveredLabels", /.*/]], +}); + +const buildPoolsData = ( + poolNames: string[], + activeTargets: Target[], + droppedTargets: DroppedTarget[], + search: string, + stateFilter: (string | null)[] +): ScrapePools => { + const pools: ScrapePools = {}; + + for (const pn of poolNames) { + pools[pn] = { + targets: [], + active: 0, + total: 0, + }; + } + + for (const target of activeTargets) { + const pool = pools[target.scrapePool]; + if (!pool) { + // TODO: Should we do better here? + throw new Error( + "Received target information for an unknown scrape pool, likely the list of scrape pools has changed. Please reload the page." + ); + } + + pool.active++; + pool.total++; + } + + const filteredActiveTargets = + stateFilter.length !== 0 && !stateFilter.includes("active") + ? [] + : search === "" + ? activeTargets + : activeTargetKVSearch + .filter(search, activeTargets) + .map((value) => value.original); + + for (const target of filteredActiveTargets) { + pools[target.scrapePool].targets.push({ + discoveredLabels: target.discoveredLabels, + labels: target.labels, + isDropped: false, + }); + } + + for (const target of droppedTargets) { + const { job: poolName } = target.discoveredLabels; + const pool = pools[poolName]; + if (!pool) { + // TODO: Should we do better here? + throw new Error( + "Received target information for an unknown scrape pool, likely the list of scrape pools has changed. Please reload the page." + ); + } + + pool.total++; + } + + const filteredDroppedTargets = + stateFilter.length !== 0 && !stateFilter.includes("dropped") + ? [] + : search === "" + ? droppedTargets + : droppedTargetKVSearch + .filter(search, droppedTargets) + .map((value) => value.original); + + for (const target of filteredDroppedTargets) { + pools[target.discoveredLabels.job].targets.push({ + discoveredLabels: target.discoveredLabels, + isDropped: true, + labels: {}, + }); + } + + return pools; +}; + +type ScrapePoolListProp = { + poolNames: string[]; + selectedPool: string | null; + stateFilter: string[]; + searchFilter: string; +}; + +const ScrapePoolList: FC = ({ + poolNames, + selectedPool, + stateFilter, + searchFilter, +}) => { + const dispatch = useAppDispatch(); + const [showEmptyPools, setShowEmptyPools] = useQueryParam( + "showEmptyPools", + withDefault(BooleanParam, true) + ); + + // Based on the selected pool (if any), load the list of targets. + const { + data: { + data: { activeTargets, droppedTargets }, + }, + } = useSuspenseAPIQuery({ + path: `/targets`, + params: { + scrapePool: selectedPool === null ? "" : selectedPool, + }, + }); + + const { collapsedPools, showLimitAlert } = useAppSelector( + (state) => state.serviceDiscoveryPage + ); + + const [debouncedSearch] = useDebouncedValue(searchFilter, 250); + + const allPools = useMemo( + () => + buildPoolsData( + selectedPool ? [selectedPool] : poolNames, + activeTargets, + droppedTargets, + debouncedSearch, + stateFilter + ), + [ + selectedPool, + poolNames, + activeTargets, + droppedTargets, + debouncedSearch, + stateFilter, + ] + ); + const allPoolNames = Object.keys(allPools); + const shownPoolNames = showEmptyPools + ? allPoolNames + : allPoolNames.filter((pn) => allPools[pn].targets.length !== 0); + + return ( + + {allPoolNames.length === 0 ? ( + }> + No scrape pools found. + + ) : ( + !showEmptyPools && + allPoolNames.length !== shownPoolNames.length && ( + } + > + Hiding {allPoolNames.length - shownPoolNames.length} empty pools due + to filters or no targets. + setShowEmptyPools(true)}> + Show empty pools + + + ) + )} + {showLimitAlert && ( + } + withCloseButton + onClose={() => dispatch(setShowLimitAlert(false))} + > + There are more than {targetPoolDisplayLimit} scrape pools. Showing + only the first one. Use the dropdown to select a different pool. + + )} + !collapsedPools.includes(p))} + onChange={(value) => + dispatch( + setCollapsedPools(allPoolNames.filter((p) => !value.includes(p))) + ) + } + > + {shownPoolNames.map((poolName) => { + const pool = allPools[poolName]; + return ( + + + + {poolName} + + + {pool.active} / {pool.total} + + + + + + + {pool.total === 0 ? ( + }> + No targets in this scrape pool. + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : pool.targets.length === 0 ? ( + }> + No targets in this pool match your filter criteria (omitted{" "} + {pool.total} filtered targets). + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : ( + ( + + + + Discovered labels + Target labels + + + + {items.map((target, i) => ( + // TODO: Find a stable and definitely unique key. + + + + + + {target.isDropped ? ( + + dropped due to relabeling rules + + ) : ( + + )} + + + ))} + +
+ )} + /> + )} +
+
+ ); + })} +
+
+ ); +}; + +export default ScrapePoolList; diff --git a/web/ui/mantine-ui/src/pages/targets/ScrapePoolsList.tsx b/web/ui/mantine-ui/src/pages/targets/ScrapePoolsList.tsx new file mode 100644 index 0000000000..be91b149bb --- /dev/null +++ b/web/ui/mantine-ui/src/pages/targets/ScrapePoolsList.tsx @@ -0,0 +1,422 @@ +import { + Accordion, + Alert, + Anchor, + Badge, + Group, + RingProgress, + Stack, + Table, + Text, + Tooltip, +} from "@mantine/core"; +import { KVSearch } from "@nexucis/kvsearch"; +import { + IconAlertTriangle, + IconHourglass, + IconInfoCircle, + IconRefresh, +} from "@tabler/icons-react"; +import { useSuspenseAPIQuery } from "../../api/api"; +import { Target, TargetsResult } from "../../api/responseTypes/targets"; +import React, { FC, useMemo } from "react"; +import { + humanizeDurationRelative, + humanizeDuration, + now, +} from "../../lib/formatTime"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + setCollapsedPools, + setShowLimitAlert, +} from "../../state/targetsPageSlice"; +import EndpointLink from "../../components/EndpointLink"; +import CustomInfiniteScroll from "../../components/CustomInfiniteScroll"; + +import badgeClasses from "../../Badge.module.css"; +import panelClasses from "../../Panel.module.css"; +import TargetLabels from "./TargetLabels"; +import { useDebouncedValue } from "@mantine/hooks"; +import { targetPoolDisplayLimit } from "./TargetsPage"; +import { BooleanParam, useQueryParam, withDefault } from "use-query-params"; +import { badgeIconStyle } from "../../styles"; + +type ScrapePool = { + targets: Target[]; + count: number; + upCount: number; + downCount: number; + unknownCount: number; +}; + +type ScrapePools = { + [scrapePool: string]: ScrapePool; +}; + +const poolPanelHealthClass = (pool: ScrapePool) => + pool.count > 1 && pool.downCount === pool.count + ? panelClasses.panelHealthErr + : pool.downCount >= 1 + ? panelClasses.panelHealthWarn + : pool.unknownCount === pool.count + ? panelClasses.panelHealthUnknown + : panelClasses.panelHealthOk; + +const healthBadgeClass = (state: string) => { + switch (state.toLowerCase()) { + case "up": + return badgeClasses.healthOk; + case "down": + return badgeClasses.healthErr; + case "unknown": + return badgeClasses.healthUnknown; + default: + // Should never happen. + return badgeClasses.healthWarn; + } +}; + +const kvSearch = new KVSearch({ + shouldSort: true, + indexedKeys: ["labels", "scrapePool", ["labels", /.*/]], +}); + +const buildPoolsData = ( + poolNames: string[], + targets: Target[], + search: string, + healthFilter: (string | null)[] +): ScrapePools => { + const pools: ScrapePools = {}; + + for (const pn of poolNames) { + pools[pn] = { + targets: [], + count: 0, + upCount: 0, + downCount: 0, + unknownCount: 0, + }; + } + + for (const target of targets) { + if (!pools[target.scrapePool]) { + // TODO: Should we do better here? + throw new Error( + "Received target information for an unknown scrape pool, likely the list of scrape pools has changed. Please reload the page." + ); + } + + pools[target.scrapePool].count++; + + switch (target.health.toLowerCase()) { + case "up": + pools[target.scrapePool].upCount++; + break; + case "down": + pools[target.scrapePool].downCount++; + break; + case "unknown": + pools[target.scrapePool].unknownCount++; + break; + } + } + + const filteredTargets: Target[] = ( + search === "" + ? targets + : kvSearch.filter(search, targets).map((value) => value.original) + ).filter( + (target) => + healthFilter.length === 0 || healthFilter.includes(target.health) + ); + + for (const target of filteredTargets) { + pools[target.scrapePool].targets.push(target); + } + + return pools; +}; + +type ScrapePoolListProp = { + poolNames: string[]; + selectedPool: string | null; + healthFilter: string[]; + searchFilter: string; +}; + +const ScrapePoolList: FC = ({ + poolNames, + selectedPool, + healthFilter, + searchFilter, +}) => { + // Based on the selected pool (if any), load the list of targets. + const { + data: { + data: { activeTargets }, + }, + } = useSuspenseAPIQuery({ + path: `/targets`, + params: { + state: "active", + scrapePool: selectedPool === null ? "" : selectedPool, + }, + }); + + const dispatch = useAppDispatch(); + const [showEmptyPools, setShowEmptyPools] = useQueryParam( + "showEmptyPools", + withDefault(BooleanParam, true) + ); + + const { collapsedPools, showLimitAlert } = useAppSelector( + (state) => state.targetsPage + ); + + const [debouncedSearch] = useDebouncedValue(searchFilter.trim(), 250); + + const allPools = useMemo( + () => + buildPoolsData( + selectedPool ? [selectedPool] : poolNames, + activeTargets, + debouncedSearch, + healthFilter + ), + [selectedPool, poolNames, activeTargets, debouncedSearch, healthFilter] + ); + + const allPoolNames = Object.keys(allPools); + const shownPoolNames = showEmptyPools + ? allPoolNames + : allPoolNames.filter((pn) => allPools[pn].targets.length !== 0); + + return ( + + {allPoolNames.length === 0 ? ( + }> + No scrape pools found. + + ) : ( + !showEmptyPools && + allPoolNames.length !== shownPoolNames.length && ( + } + > + Hiding {allPoolNames.length - shownPoolNames.length} empty pools due + to filters or no targets. + setShowEmptyPools(true)}> + Show empty pools + + + ) + )} + {showLimitAlert && ( + } + withCloseButton + onClose={() => dispatch(setShowLimitAlert(false))} + > + There are more than {targetPoolDisplayLimit} scrape pools. Showing + only the first one. Use the dropdown to select a different pool. + + )} + !collapsedPools.includes(p))} + onChange={(value) => + dispatch( + setCollapsedPools(allPoolNames.filter((p) => !value.includes(p))) + ) + } + > + {shownPoolNames.map((poolName) => { + const pool = allPools[poolName]; + return ( + + + + {poolName} + + + {pool.upCount} / {pool.count} up + + + + + + + {pool.count === 0 ? ( + }> + No active targets in this scrape pool. + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : pool.targets.length === 0 ? ( + }> + No targets in this pool match your filter criteria (omitted{" "} + {pool.count} filtered targets). + setShowEmptyPools(false)} + > + Hide empty pools + + + ) : ( + ( + + + + Endpoint + Labels + Last scrape + State + + + + {items.map((target, i) => ( + // TODO: Find a stable and definitely unique key. + + + + + + + + + + + + + + } + > + {humanizeDurationRelative( + target.lastScrape, + now() + )} + + + + + + } + > + {humanizeDuration( + target.lastScrapeDuration * 1000 + )} + + + + + + + {target.health} + + + + {target.lastError && ( + + + } + > + Error scraping target:{" "} + {target.lastError} + + + + )} + + ))} + +
+ )} + /> + )} +
+
+ ); + })} +
+
+ ); +}; + +export default ScrapePoolList; diff --git a/web/ui/mantine-ui/src/pages/targets/TargetLabels.tsx b/web/ui/mantine-ui/src/pages/targets/TargetLabels.tsx new file mode 100644 index 0000000000..472eded0f3 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/targets/TargetLabels.tsx @@ -0,0 +1,47 @@ +import { FC } from "react"; +import { Labels } from "../../api/responseTypes/targets"; +import { LabelBadges } from "../../components/LabelBadges"; +import { ActionIcon, Collapse, Group, Stack, Text } from "@mantine/core"; +import { useDisclosure } from "@mantine/hooks"; +import { IconChevronDown, IconChevronUp } from "@tabler/icons-react"; +import { actionIconStyle } from "../../styles"; + +type TargetLabelsProps = { + labels: Labels; + discoveredLabels: Labels; +}; + +const TargetLabels: FC = ({ discoveredLabels, labels }) => { + const [showDiscovered, { toggle: toggleDiscovered }] = useDisclosure(false); + + return ( + + + + + + {showDiscovered ? ( + + ) : ( + + )} + + + + + + Discovered labels: + + + + + ); +}; + +export default TargetLabels; diff --git a/web/ui/mantine-ui/src/pages/targets/TargetsPage.tsx b/web/ui/mantine-ui/src/pages/targets/TargetsPage.tsx new file mode 100644 index 0000000000..60dae60541 --- /dev/null +++ b/web/ui/mantine-ui/src/pages/targets/TargetsPage.tsx @@ -0,0 +1,154 @@ +import { + ActionIcon, + Box, + Group, + Select, + Skeleton, + TextInput, +} from "@mantine/core"; +import { + IconLayoutNavbarCollapse, + IconLayoutNavbarExpand, + IconSearch, +} from "@tabler/icons-react"; +import { StateMultiSelect } from "../../components/StateMultiSelect"; +import { Suspense } from "react"; +import badgeClasses from "../../Badge.module.css"; +import { useAppDispatch, useAppSelector } from "../../state/hooks"; +import { + setCollapsedPools, + setShowLimitAlert, +} from "../../state/targetsPageSlice"; +import { + ArrayParam, + StringParam, + useQueryParam, + withDefault, +} from "use-query-params"; +import ErrorBoundary from "../../components/ErrorBoundary"; +import ScrapePoolList from "./ScrapePoolsList"; +import { useSuspenseAPIQuery } from "../../api/api"; +import { ScrapePoolsResult } from "../../api/responseTypes/scrapePools"; +import { expandIconStyle, inputIconStyle } from "../../styles"; + +export const targetPoolDisplayLimit = 20; + +export default function TargetsPage() { + // Load the list of all available scrape pools. + const { + data: { + data: { scrapePools }, + }, + } = useSuspenseAPIQuery({ + path: `/scrape_pools`, + }); + + const dispatch = useAppDispatch(); + + const [scrapePool, setScrapePool] = useQueryParam("pool", StringParam); + const [healthFilter, setHealthFilter] = useQueryParam( + "health", + withDefault(ArrayParam, []) + ); + const [searchFilter, setSearchFilter] = useQueryParam( + "search", + withDefault(StringParam, "") + ); + + const { collapsedPools, showLimitAlert } = useAppSelector( + (state) => state.targetsPage + ); + + // When we have more than X targets, we want to limit the display by selecting the first + // scrape pool and reflecting that in the URL as well. We also want to show an alert + // about the fact that we are limiting the display, but the tricky bit is that this + // alert should only be shown once, upon the first "redirect" that causes the limiting, + // not again when the page is reloaded with the same URL parameters. That's why we remember + // `showLimitAlert` in Redux (just useState() doesn't work properly, because the component + // for some Suspense-related reasons seems to be mounted/unmounted multiple times, so the + // state cell would get initialized multiple times as well). + const limited = + scrapePools.length > targetPoolDisplayLimit && scrapePool === undefined; + if (limited) { + setScrapePool(scrapePools[0]); + dispatch(setShowLimitAlert(true)); + } + + return ( + <> + + s in some browsers, due to the limited stylability of ``s in IE10+.\n &::-ms-expand {\n background-color: transparent;\n border: 0;\n }\n\n // Remove select outline from select box in FF\n &:-moz-focusring {\n color: transparent;\n text-shadow: 0 0 0 $input-color;\n }\n\n // Customize the `:focus` state to imitate native WebKit styles.\n @include form-control-focus($ignore-warning: true);\n\n // Placeholder\n &::placeholder {\n color: $input-placeholder-color;\n // Override Firefox's unusual default opacity; see https://github.com/twbs/bootstrap/pull/11526.\n opacity: 1;\n }\n\n // Disabled and read-only inputs\n //\n // HTML5 says that controls under a fieldset > legend:first-child won't be\n // disabled if the fieldset is disabled. Due to implementation difficulty, we\n // don't honor that edge case; we style them as disabled anyway.\n &:disabled,\n &[readonly] {\n background-color: $input-disabled-bg;\n // iOS fix for unreadable disabled content; see https://github.com/twbs/bootstrap/issues/11655.\n opacity: 1;\n }\n}\n\ninput[type=\"date\"],\ninput[type=\"time\"],\ninput[type=\"datetime-local\"],\ninput[type=\"month\"] {\n &.form-control {\n appearance: none; // Fix appearance for date inputs in Safari\n }\n}\n\nselect.form-control {\n &:focus::-ms-value {\n // Suppress the nested default white text on blue background highlight given to\n // the selected option text when the (still closed) receives focus\n // in IE and (under certain conditions) Edge.\n // See https://github.com/twbs/bootstrap/issues/19398.\n color: $input-color;\n background-color: $input-bg;\n }\n }\n\n &[multiple],\n &[size]:not([size=\"1\"]) {\n height: auto;\n padding-right: $custom-select-padding-x;\n background-image: none;\n }\n\n &:disabled {\n color: $custom-select-disabled-color;\n background-color: $custom-select-disabled-bg;\n }\n\n // Hides the default caret in IE11\n &::-ms-expand {\n display: none;\n }\n\n // Remove outline from select box in FF\n &:-moz-focusring {\n color: transparent;\n text-shadow: 0 0 0 $custom-select-color;\n }\n}\n\n.custom-select-sm {\n height: $custom-select-height-sm;\n padding-top: $custom-select-padding-y-sm;\n padding-bottom: $custom-select-padding-y-sm;\n padding-left: $custom-select-padding-x-sm;\n @include font-size($custom-select-font-size-sm);\n}\n\n.custom-select-lg {\n height: $custom-select-height-lg;\n padding-top: $custom-select-padding-y-lg;\n padding-bottom: $custom-select-padding-y-lg;\n padding-left: $custom-select-padding-x-lg;\n @include font-size($custom-select-font-size-lg);\n}\n\n\n// File\n//\n// Custom file input.\n\n.custom-file {\n position: relative;\n display: inline-block;\n width: 100%;\n height: $custom-file-height;\n margin-bottom: 0;\n}\n\n.custom-file-input {\n position: relative;\n z-index: 2;\n width: 100%;\n height: $custom-file-height;\n margin: 0;\n opacity: 0;\n\n &:focus ~ .custom-file-label {\n border-color: $custom-file-focus-border-color;\n box-shadow: $custom-file-focus-box-shadow;\n }\n\n // Use [disabled] and :disabled to work around https://github.com/twbs/bootstrap/issues/28247\n &[disabled] ~ .custom-file-label,\n &:disabled ~ .custom-file-label {\n background-color: $custom-file-disabled-bg;\n }\n\n @each $lang, $value in $custom-file-text {\n &:lang(#{$lang}) ~ .custom-file-label::after {\n content: $value;\n }\n }\n\n ~ .custom-file-label[data-browse]::after {\n content: attr(data-browse);\n }\n}\n\n.custom-file-label {\n position: absolute;\n top: 0;\n right: 0;\n left: 0;\n z-index: 1;\n height: $custom-file-height;\n padding: $custom-file-padding-y $custom-file-padding-x;\n font-family: $custom-file-font-family;\n font-weight: $custom-file-font-weight;\n line-height: $custom-file-line-height;\n color: $custom-file-color;\n background-color: $custom-file-bg;\n border: $custom-file-border-width solid $custom-file-border-color;\n @include border-radius($custom-file-border-radius);\n @include box-shadow($custom-file-box-shadow);\n\n &::after {\n position: absolute;\n top: 0;\n right: 0;\n bottom: 0;\n z-index: 3;\n display: block;\n height: $custom-file-height-inner;\n padding: $custom-file-padding-y $custom-file-padding-x;\n line-height: $custom-file-line-height;\n color: $custom-file-button-color;\n content: \"Browse\";\n @include gradient-bg($custom-file-button-bg);\n border-left: inherit;\n @include border-radius(0 $custom-file-border-radius $custom-file-border-radius 0);\n }\n}\n\n// Range\n//\n// Style range inputs the same across browsers. Vendor-specific rules for pseudo\n// elements cannot be mixed. As such, there are no shared styles for focus or\n// active states on prefixed selectors.\n\n.custom-range {\n width: 100%;\n height: add($custom-range-thumb-height, $custom-range-thumb-focus-box-shadow-width * 2);\n padding: 0; // Need to reset padding\n background-color: transparent;\n appearance: none;\n\n &:focus {\n outline: none;\n\n // Pseudo-elements must be split across multiple rulesets to have an effect.\n // No box-shadow() mixin for focus accessibility.\n &::-webkit-slider-thumb { box-shadow: $custom-range-thumb-focus-box-shadow; }\n &::-moz-range-thumb { box-shadow: $custom-range-thumb-focus-box-shadow; }\n &::-ms-thumb { box-shadow: $custom-range-thumb-focus-box-shadow; }\n }\n\n &::-moz-focus-outer {\n border: 0;\n }\n\n &::-webkit-slider-thumb {\n width: $custom-range-thumb-width;\n height: $custom-range-thumb-height;\n margin-top: ($custom-range-track-height - $custom-range-thumb-height) / 2; // Webkit specific\n @include gradient-bg($custom-range-thumb-bg);\n border: $custom-range-thumb-border;\n @include border-radius($custom-range-thumb-border-radius);\n @include box-shadow($custom-range-thumb-box-shadow);\n @include transition($custom-forms-transition);\n appearance: none;\n\n &:active {\n @include gradient-bg($custom-range-thumb-active-bg);\n }\n }\n\n &::-webkit-slider-runnable-track {\n width: $custom-range-track-width;\n height: $custom-range-track-height;\n color: transparent; // Why?\n cursor: $custom-range-track-cursor;\n background-color: $custom-range-track-bg;\n border-color: transparent;\n @include border-radius($custom-range-track-border-radius);\n @include box-shadow($custom-range-track-box-shadow);\n }\n\n &::-moz-range-thumb {\n width: $custom-range-thumb-width;\n height: $custom-range-thumb-height;\n @include gradient-bg($custom-range-thumb-bg);\n border: $custom-range-thumb-border;\n @include border-radius($custom-range-thumb-border-radius);\n @include box-shadow($custom-range-thumb-box-shadow);\n @include transition($custom-forms-transition);\n appearance: none;\n\n &:active {\n @include gradient-bg($custom-range-thumb-active-bg);\n }\n }\n\n &::-moz-range-track {\n width: $custom-range-track-width;\n height: $custom-range-track-height;\n color: transparent;\n cursor: $custom-range-track-cursor;\n background-color: $custom-range-track-bg;\n border-color: transparent; // Firefox specific?\n @include border-radius($custom-range-track-border-radius);\n @include box-shadow($custom-range-track-box-shadow);\n }\n\n &::-ms-thumb {\n width: $custom-range-thumb-width;\n height: $custom-range-thumb-height;\n margin-top: 0; // Edge specific\n margin-right: $custom-range-thumb-focus-box-shadow-width; // Workaround that overflowed box-shadow is hidden.\n margin-left: $custom-range-thumb-focus-box-shadow-width; // Workaround that overflowed box-shadow is hidden.\n @include gradient-bg($custom-range-thumb-bg);\n border: $custom-range-thumb-border;\n @include border-radius($custom-range-thumb-border-radius);\n @include box-shadow($custom-range-thumb-box-shadow);\n @include transition($custom-forms-transition);\n appearance: none;\n\n &:active {\n @include gradient-bg($custom-range-thumb-active-bg);\n }\n }\n\n &::-ms-track {\n width: $custom-range-track-width;\n height: $custom-range-track-height;\n color: transparent;\n cursor: $custom-range-track-cursor;\n background-color: transparent;\n border-color: transparent;\n border-width: $custom-range-thumb-height / 2;\n @include box-shadow($custom-range-track-box-shadow);\n }\n\n &::-ms-fill-lower {\n background-color: $custom-range-track-bg;\n @include border-radius($custom-range-track-border-radius);\n }\n\n &::-ms-fill-upper {\n margin-right: 15px; // arbitrary?\n background-color: $custom-range-track-bg;\n @include border-radius($custom-range-track-border-radius);\n }\n\n &:disabled {\n &::-webkit-slider-thumb {\n background-color: $custom-range-thumb-disabled-bg;\n }\n\n &::-webkit-slider-runnable-track {\n cursor: default;\n }\n\n &::-moz-range-thumb {\n background-color: $custom-range-thumb-disabled-bg;\n }\n\n &::-moz-range-track {\n cursor: default;\n }\n\n &::-ms-thumb {\n background-color: $custom-range-thumb-disabled-bg;\n }\n }\n}\n\n.custom-control-label::before,\n.custom-file-label,\n.custom-select {\n @include transition($custom-forms-transition);\n}\n","// Base class\n//\n// Kickstart any navigation component with a set of style resets. Works with\n// `