Merge remote-tracking branch 'origin/main' into discovery-manager-shutdown-ch

This commit is contained in:
liyandi 2025-03-05 17:15:07 +08:00
commit b73c524ffa
754 changed files with 103370 additions and 73002 deletions

View file

@ -1,4 +1,4 @@
blank_issues_enabled: false
blank_issues_enabled: true
contact_links:
- name: Prometheus Community Support
url: https://prometheus.io/community/

View file

@ -16,8 +16,23 @@ updates:
directory: "/documentation/examples/remote_storage"
schedule:
interval: "monthly"
# New manteen-ui packages.
- package-ecosystem: "npm"
directory: "/web/ui"
labels:
- dependencies
- javascript
- manteen-ui
schedule:
interval: "monthly"
open-pull-requests-limit: 20
# Old react-app packages.
- package-ecosystem: "npm"
directory: "/web/ui/react-app"
labels:
- dependencies
- javascript
- old-react-ui
schedule:
interval: "monthly"
open-pull-requests-limit: 20

56
.github/stale.yml vendored
View file

@ -1,56 +0,0 @@
# Configuration for probot-stale - https://github.com/probot/stale
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 60
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: false
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- keepalive
# Set to true to ignore issues in a project (defaults to false)
exemptProjects: false
# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: false
# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: false
# Label to use when marking as stale
staleLabel: stale
# Comment to post when marking as stale. Set to `false` to disable
markComment: false
# Comment to post when removing the stale label.
# unmarkComment: >
# Your comment here.
# Comment to post when closing a stale Issue or Pull Request.
# closeComment: >
# Your comment here.
# Limit the number of actions per hour, from 1-30. Default is 30
limitPerRun: 30
# Limit to only `issues` or `pulls`
only: pulls
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
# pulls:
# daysUntilStale: 30
# markComment: >
# This pull request has been automatically marked as stale because it has not had
# recent activity. It will be closed if no further activity occurs. Thank you
# for your contributions.
# issues:
# exemptLabels:
# - confirmed

View file

@ -0,0 +1,30 @@
---
name: Dependabot auto-merge
on: pull_request
concurrency:
group: ${{ github.workflow }}-${{ (github.event.pull_request && github.event.pull_request.number) || github.ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read
jobs:
dependabot:
permissions:
contents: write
pull-requests: write
runs-on: ubuntu-latest
if: ${{ github.event.pull_request.user.login == 'dependabot[bot]' && github.repository_owner == 'prometheus' }}
steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@d7267f607e9d3fb96fc2fbe83e0af444713e90b7 # v2.3.0
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
- name: Enable auto-merge for Dependabot PRs
if: ${{steps.metadata.outputs.update-type == 'version-update:semver-minor' || steps.metadata.outputs.update-type == 'version-update:semver-patch'}}
run: gh pr merge --auto --merge "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}

View file

@ -12,8 +12,8 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: bufbuild/buf-setup-action@dde0b9351db90fbf78e345f41a57de8514bf1091 # v1.32.2
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1

View file

@ -12,8 +12,8 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: bufbuild/buf-setup-action@dde0b9351db90fbf78e345f41a57de8514bf1091 # v1.32.2
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: bufbuild/buf-setup-action@a47c93e0b1648d5651a065437926377d060baa99 # v1.50.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1

View file

@ -11,11 +11,13 @@ jobs:
container:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/setup_environment
with:
enable_npm: true
- run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1
- run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false
- run: make -C documentation/examples/remote_storage
@ -25,13 +27,13 @@ jobs:
name: More Go tests
runs-on: ubuntu-latest
container:
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/setup_environment
- run: go test --tags=dedupelabels ./...
- run: GOARCH=386 go test ./cmd/prometheus
- run: GOARCH=386 go test ./...
- uses: ./.github/promci/actions/check_proto
with:
version: "3.15.8"
@ -39,11 +41,14 @@ jobs:
test_go_oldest:
name: Go tests with previous Go version
runs-on: ubuntu-latest
env:
# Enforce the Go version.
GOTOOLCHAIN: local
container:
# The go version in this image should be N-1 wrt test_go.
image: quay.io/prometheus/golang-builder:1.21-base
image: quay.io/prometheus/golang-builder:1.22-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- run: make build
# Don't run NPM build; don't run race-detector.
- run: make test GO_ONLY=1 test-flags=""
@ -54,11 +59,11 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/setup_environment
with:
enable_go: false
@ -74,10 +79,10 @@ jobs:
name: Go tests on Windows
runs-on: windows-latest
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
with:
go-version: 1.22.x
go-version: 1.23.x
- run: |
$TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
go test $TestTargets -vet=off -v
@ -89,9 +94,9 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- run: go install ./cmd/promtool/.
- run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest
- run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
@ -107,6 +112,8 @@ jobs:
if: |
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
&&
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
&&
!(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
&&
!(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
@ -114,8 +121,8 @@ jobs:
matrix:
thread: [ 0, 1, 2 ]
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/build
with:
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
@ -127,6 +134,8 @@ jobs:
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
||
(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
||
(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
@ -137,23 +146,46 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/build
with:
parallelism: 12
thread: ${{ matrix.thread }}
build_all_status:
# This status check aggregates the individual matrix jobs of the "Build
# Prometheus for all architectures" step into a final status. Fails if a
# single matrix job fails, succeeds if all matrix jobs succeed.
# See https://github.com/orgs/community/discussions/4324 for why this is
# needed
name: Report status of build Prometheus for all architectures
runs-on: ubuntu-latest
needs: [build_all]
# The run condition needs to include always(). Otherwise actions
# behave unexpected:
# only "needs" will make the Status Report be skipped if one of the builds fails https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/using-jobs-in-a-workflow#defining-prerequisite-jobs
# And skipped is treated as success https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborat[…]n-repositories-with-code-quality-features/about-status-checks
# Adding always ensures that the status check is run independently of the
# results of Build All
if: always() && github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')
steps:
- name: Successful build
if: ${{ !(contains(needs.*.result, 'failure')) && !(contains(needs.*.result, 'cancelled')) }}
run: exit 0
- name: Failing or cancelled build
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
run: exit 1
check_generated_parser:
name: Check generated parser
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install Go
uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
with:
cache: false
go-version: 1.22.x
go-version: 1.23.x
- name: Run goyacc and check for diff
run: make install-goyacc check-generated-parser
golangci:
@ -161,20 +193,20 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install Go
uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
uses: actions/setup-go@f111f3307d8850f501ac008e886eec1fd1932a34 # v5.3.0
with:
go-version: 1.22.x
go-version: 1.23.x
- name: Install snmp_exporter/generator dependencies
run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
if: github.repository == 'prometheus/snmp_exporter'
- name: Lint
uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1
uses: golangci/golangci-lint-action@ec5d18412c0aeab7936cb16880d708ba2a64e1ae # v6.2.0
with:
args: --verbose
# Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
version: v1.59.1
version: v1.63.4
fuzzing:
uses: ./.github/workflows/fuzzing.yml
if: github.event_name == 'pull_request'
@ -187,8 +219,8 @@ jobs:
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/publish_main
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -199,10 +231,13 @@ jobs:
name: Publish release artefacts
runs-on: ubuntu-latest
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- uses: ./.github/promci/actions/publish_release
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -216,31 +251,40 @@ jobs:
needs: [test_ui, codeql]
steps:
- name: Checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: prometheus/promci@c3c93a50d581b928af720f0134b2b2dad32a6c41 # v0.4.6
- name: Install nodejs
uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2
uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"
- uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
- uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
- name: Check libraries version
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
run: ./scripts/ui_release.sh --check-package "$(echo ${{ github.ref_name }}|sed s/v2/v0/)"
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --check-package "$(./scripts/get_module_version.sh ${{ github.ref_name }})"
- name: build
run: make assets
- name: Copy files before publishing libs
run: ./scripts/ui_release.sh --copy
- name: Publish dry-run libraries
if: "!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))"
if: |
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
&&
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --publish dry-run
- name: Publish libraries
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --publish
env:
# The setup-node action writes an .npmrc file with this env variable

View file

@ -24,15 +24,15 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Initialize CodeQL
uses: github/codeql-action/init@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8
uses: github/codeql-action/init@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8
uses: github/codeql-action/autobuild@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8
uses: github/codeql-action/analyze@dd746615b3b9d728a6a37ca2045b68ca76d4841a # v3.28.8

View file

@ -18,7 +18,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set docker hub repo name
run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV
- name: Push README to Dockerhub
@ -40,7 +40,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set quay.io org name
run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV
- name: Set quay.io repo name

View file

@ -1,61 +0,0 @@
on:
repository_dispatch:
types: [funcbench_start]
name: Funcbench Workflow
permissions:
contents: read
jobs:
run_funcbench:
name: Running funcbench
if: github.event.action == 'funcbench_start'
runs-on: ubuntu-latest
env:
AUTH_FILE: ${{ secrets.TEST_INFRA_PROVIDER_AUTH }}
BRANCH: ${{ github.event.client_payload.BRANCH }}
BENCH_FUNC_REGEX: ${{ github.event.client_payload.BENCH_FUNC_REGEX }}
PACKAGE_PATH: ${{ github.event.client_payload.PACKAGE_PATH }}
GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}
GITHUB_ORG: prometheus
GITHUB_REPO: prometheus
GITHUB_STATUS_TARGET_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
LAST_COMMIT_SHA: ${{ github.event.client_payload.LAST_COMMIT_SHA }}
GKE_PROJECT_ID: macro-mile-203600
PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }}
PROVIDER: gke
ZONE: europe-west3-a
steps:
- name: Update status to pending
run: >-
curl -i -X POST
-H "Authorization: Bearer $GITHUB_TOKEN"
-H "Content-Type: application/json"
--data '{"state":"pending","context":"funcbench-status","target_url":"'$GITHUB_STATUS_TARGET_URL'"}'
"https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA"
- name: Prepare nodepool
uses: docker://prominfra/funcbench:master
with:
entrypoint: "docker_entrypoint"
args: make deploy
- name: Delete all resources
if: always()
uses: docker://prominfra/funcbench:master
with:
entrypoint: "docker_entrypoint"
args: make clean
- name: Update status to failure
if: failure()
run: >-
curl -i -X POST
-H "Authorization: Bearer $GITHUB_TOKEN"
-H "Content-Type: application/json"
--data '{"state":"failure","context":"funcbench-status","target_url":"'$GITHUB_STATUS_TARGET_URL'"}'
"https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA"
- name: Update status to success
if: success()
run: >-
curl -i -X POST
-H "Authorization: Bearer $GITHUB_TOKEN"
-H "Content-Type: application/json"
--data '{"state":"success","context":"funcbench-status","target_url":"'$GITHUB_STATUS_TARGET_URL'"}'
"https://api.github.com/repos/$GITHUB_REPOSITORY/statuses/$LAST_COMMIT_SHA"

View file

@ -21,7 +21,7 @@ jobs:
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts

View file

@ -15,6 +15,8 @@ env:
PR_NUMBER: ${{ github.event.client_payload.PR_NUMBER }}
PROVIDER: gke
RELEASE: ${{ github.event.client_payload.RELEASE }}
BENCHMARK_VERSION: ${{ github.event.client_payload.BENCHMARK_VERSION }}
BENCHMARK_DIRECTORY: ${{ github.event.client_payload.BENCHMARK_DIRECTORY }}
ZONE: europe-west3-a
jobs:
benchmark_start:

View file

@ -13,7 +13,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- run: ./scripts/sync_repo_files.sh
env:
GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}

View file

@ -21,12 +21,12 @@ jobs:
steps:
- name: "Checkout code"
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # tag=v4.1.6
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # tag=v4.2.2
with:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # tag=v2.3.3
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # tag=v2.4.0
with:
results_file: results.sarif
results_format: sarif
@ -37,7 +37,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # tag=v4.3.3
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # tag=v4.6.0
with:
name: SARIF file
path: results.sarif
@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # tag=v3.25.8
uses: github/codeql-action/upload-sarif@dd746615b3b9d728a6a37ca2045b68ca76d4841a # tag=v3.28.8
with:
sarif_file: results.sarif

31
.github/workflows/stale.yml vendored Normal file
View file

@ -0,0 +1,31 @@
name: Stale Check
on:
workflow_dispatch: {}
schedule:
- cron: '16 22 * * *'
permissions:
issues: write
pull-requests: write
jobs:
stale:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
runs-on: ubuntu-latest
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
# opt out of defaults to avoid marking issues as stale and closing them
# https://github.com/actions/stale#days-before-close
# https://github.com/actions/stale#days-before-stale
days-before-stale: -1
days-before-close: -1
# Setting it to empty string to skip comments.
# https://github.com/actions/stale#stale-pr-message
# https://github.com/actions/stale#stale-issue-message
stale-pr-message: ''
stale-issue-message: ''
operations-per-run: 30
# override days-before-stale, for only marking the pull requests as stale
days-before-pr-stale: 60
stale-pr-label: stale
exempt-pr-labels: keepalive

2
.gitignore vendored
View file

@ -22,7 +22,7 @@ benchmark.txt
/documentation/examples/remote_storage/example_write_adapter/example_write_adapter
npm_licenses.tar.bz2
/web/ui/static/react
/web/ui/static
/vendor
/.build

View file

@ -1,39 +1,62 @@
run:
timeout: 15m
skip-files:
# Skip autogenerated files.
- ^.*\.(pb|y)\.go$
skip-dirs:
# Copied it from a different source
- storage/remote/otlptranslator/prometheusremotewrite
- storage/remote/otlptranslator/prometheus
output:
sort-results: true
linters:
# Keep this list sorted alphabetically
enable:
- depguard
- errorlint
- exptostd
- gocritic
- godot
- gofumpt
- goimports
- loggercheck
- misspell
- nilnesserr
- nolintlint
- perfsprint
- predeclared
- revive
- sloglint
- testifylint
- unconvert
- unused
- usestdlibvars
- whitespace
- loggercheck
issues:
max-issues-per-linter: 0
max-same-issues: 0
# The default exclusions are too aggressive. For one, they
# essentially disable any linting on doc comments. We disable
# default exclusions here and add exclusions fitting our codebase
# further down.
exclude-use-default: false
exclude-files:
# Skip autogenerated files.
- ^.*\.(pb|y)\.go$
exclude-dirs:
# Copied it from a different source.
- storage/remote/otlptranslator/prometheusremotewrite
- storage/remote/otlptranslator/prometheus
exclude-rules:
- linters:
- errcheck
# Taken from the default exclusions (that are otherwise disabled above).
text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked
- linters:
- govet
# We use many Seek methods that do not follow the usual pattern.
text: "stdmethods: method Seek.* should have signature Seek"
- linters:
- revive
# We have stopped at some point to write doc comments on exported symbols.
# TODO(beorn7): Maybe we should enforce this again? There are ~500 offenders right now.
text: exported (.+) should have comment( \(or a comment on this block\))? or be unexported
- linters:
- gocritic
text: "appendAssign"
@ -81,8 +104,6 @@ linters-settings:
- (net/http.ResponseWriter).Write
# No need to check for errors on server's shutdown.
- (*net/http.Server).Shutdown
# Never check for logger errors.
- (github.com/go-kit/log.Logger).Log
# Never check for rollback errors as Rollback() is called when a previous error was detected.
- (github.com/prometheus/prometheus/storage.Appender).Rollback
goimports:
@ -91,21 +112,23 @@ linters-settings:
extra-rules: true
perfsprint:
# Optimizes `fmt.Errorf`.
errorf: false
errorf: true
revive:
# By default, revive will enable only the linting rules that are named in the configuration file.
# So, it's needed to explicitly set in configuration all required rules.
# The following configuration enables all the rules from the defaults.toml
# https://github.com/mgechev/revive/blob/master/defaults.toml
# So, it's needed to explicitly enable all required rules here.
rules:
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
- name: blank-imports
- name: comment-spacings
- name: context-as-argument
arguments:
# allow functions with test or bench signatures
# Allow functions with test or bench signatures.
- allowTypesBefore: "*testing.T,testing.TB"
- name: context-keys-type
- name: dot-imports
- name: early-return
arguments:
- "preserveScope"
# A lot of false positives: incorrectly identifies channel draining as "empty code block".
# See https://github.com/mgechev/revive/issues/386
- name: empty-block
@ -117,30 +140,25 @@ linters-settings:
- name: exported
- name: increment-decrement
- name: indent-error-flow
arguments:
- "preserveScope"
- name: package-comments
# TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them.
disabled: true
- name: range
- name: receiver-naming
- name: redefines-builtin-id
- name: superfluous-else
arguments:
- "preserveScope"
- name: time-naming
- name: unexported-return
- name: unreachable-code
- name: unused-parameter
disabled: true
- name: var-declaration
- name: var-naming
testifylint:
disable:
- float-compare
- go-require
enable:
- bool-compare
- compares
- empty
- error-is-as
- error-nil
- expected-actual
- len
- require-error
- suite-dont-use-pkg
- suite-extra-assert-call
enable-all: true

View file

@ -1,7 +1,7 @@
go:
# Whenever the Go version is updated here,
# .github/workflows should also be updated.
version: 1.22
version: 1.23
repository:
path: github.com/prometheus/prometheus
build:
@ -28,8 +28,6 @@ tarball:
# Whenever there are new files to include in the tarball,
# remember to make sure the new files will be generated after `make build`.
files:
- consoles
- console_libraries
- documentation/examples/prometheus.yml
- LICENSE
- NOTICE

View file

@ -1,7 +1,7 @@
---
extends: default
ignore: |
ui/react-app/node_modules
**/node_modules
rules:
braces:

View file

@ -2,6 +2,245 @@
## unreleased
## 3.2.1 / 2025-02-25
* [BUGFIX] Don't send Accept` header `escape=allow-utf-8` when `metric_name_validation_scheme: legacy` is configured. #16061
## 3.2.0 / 2025-02-17
* [CHANGE] relabel: Replace actions can now use UTF-8 characters in `targetLabel` field. Note that `$<chars>` or `${<chars>}` will be expanded. This also apply to `replacement` field for `LabelMap` action. #15851
* [CHANGE] rulefmt: Rule names can use UTF-8 characters, except `{` and `}` characters (due to common mistake checks). #15851
* [FEATURE] remote/otlp: Add feature flag `otlp-deltatocumulative` to support conversion from delta to cumulative. #15165
* [ENHANCEMENT] openstack SD: Discover Octavia loadbalancers. #15539
* [ENHANCEMENT] scrape: Add metadata for automatic metrics to WAL for `metadata-wal-records` feature. #15837
* [ENHANCEMENT] promtool: Support linting of scrape interval, through lint option `too-long-scrape-interval`. #15719
* [ENHANCEMENT] promtool: Add --ignore-unknown-fields option. #15706
* [ENHANCEMENT] ui: Make "hide empty rules" and hide empty rules" persistent #15807
* [ENHANCEMENT] web/api: Add a limit parameter to `/query` and `/query_range`. #15552
* [ENHANCEMENT] api: Add fields Node and ServerTime to `/status`. #15784
* [PERF] Scraping: defer computing labels for dropped targets until they are needed by the UI. #15261
* [BUGFIX] remotewrite2: Fix invalid metadata bug for metrics without metadata. #15829
* [BUGFIX] remotewrite2: Fix the unit field propagation. #15825
* [BUGFIX] scrape: Fix WAL metadata for histograms and summaries. #15832
* [BUGFIX] ui: Merge duplicate "Alerts page settings" sections. #15810
* [BUGFIX] PromQL: Fix `<aggr_over_time>` functions with histograms. #15711
## 3.1.0 / 2025-01-02
* [SECURITY] upgrade golang.org/x/crypto to address reported CVE-2024-45337. #15691
* [CHANGE] Notifier: Increment prometheus_notifications_errors_total by the number of affected alerts rather than per batch. #15428
* [CHANGE] API: list rules field "groupNextToken:omitempty" renamed to "groupNextToken". #15400
* [ENHANCEMENT] OTLP translate: keep identifying attributes in target_info. #15448
* [ENHANCEMENT] Paginate rule groups, add infinite scroll to rules within groups. #15677
* [ENHANCEMENT] TSDB: Improve calculation of space used by labels. #13880
* [ENHANCEMENT] Rules: new metric rule_group_last_rule_duration_sum_seconds. #15672
* [ENHANCEMENT] Observability: Export 'go_sync_mutex_wait_total_seconds_total' metric. #15339
* [ENHANCEMEN] Remote-Write: optionally use a DNS resolver that picks a random IP. #15329
* [PERF] Optimize `l=~".+"` matcher. #15474, #15684
* [PERF] TSDB: Cache all symbols for compaction . #15455
* [PERF] TSDB: MemPostings: keep a map of label values slices. #15426
* [PERF] Remote-Write: Remove interning hook. #15456
* [PERF] Scrape: optimize string manipulation for experimental native histograms with custom buckets. #15453
* [PERF] TSDB: reduce memory allocations. #15465, #15427
* [PERF] Storage: Implement limit in mergeGenericQuerier. #14489
* [PERF] TSDB: Optimize inverse matching. #14144
* [PERF] Regex: use stack memory for lowercase copy of string. #15210
* [PERF] TSDB: When deleting from postings index, pause to unlock and let readers read. #15242
* [BUGFIX] Main: Avoid possible segfault at exit. (#15724)
* [BUGFIX] Rules: Do not run rules concurrently if uncertain about dependencies. #15560
* [BUGFIX] PromQL: Adds test for `absent`, `absent_over_time` and `deriv` func with histograms. #15667
* [BUGFIX] PromQL: Fix various bugs related to quoting UTF-8 characters. #15531
* [BUGFIX] Scrape: fix nil panic after scrape loop reload. #15563
* [BUGFIX] Remote-write: fix panic on repeated log message. #15562
* [BUGFIX] Scrape: reload would ignore always_scrape_classic_histograms and convert_classic_histograms_to_nhcb configs. #15489
* [BUGFIX] TSDB: fix data corruption in experimental native histograms. #15482
* [BUGFIX] PromQL: Ignore histograms in all time related functions. #15479
* [BUGFIX] OTLP receiver: Convert metric metadata. #15416
* [BUGFIX] PromQL: Fix `resets` function for histograms. #15527
* [BUGFIX] PromQL: Fix behaviour of `changes()` for mix of histograms and floats. #15469
* [BUGFIX] PromQL: Fix behaviour of some aggregations with histograms. #15432
* [BUGFIX] allow quoted exemplar keys in openmetrics text format. #15260
* [BUGFIX] TSDB: fixes for rare conditions when loading write-behind-log (WBL). #15380
* [BUGFIX] `round()` function did not remove `__name__` label. #15250
* [BUGFIX] Promtool: analyze block shows metric name with 0 cardinality. #15438
* [BUGFIX] PromQL: Fix `count_values` for histograms. #15422
* [BUGFIX] PromQL: fix issues with comparison binary operations with `bool` modifier and native histograms. #15413
* [BUGFIX] PromQL: fix incorrect "native histogram ignored in aggregation" annotations. #15414
* [BUGFIX] PromQL: Corrects the behaviour of some operator and aggregators with Native Histograms. #15245
* [BUGFIX] TSDB: Always return unknown hint for first sample in non-gauge histogram chunk. #15343
* [BUGFIX] PromQL: Clamp functions: Ignore any points with native histograms. #15169
* [BUGFIX] TSDB: Fix race on stale values in headAppender. #15322
* [BUGFIX] UI: Fix selector / series formatting for empty metric names. #15340
* [BUGFIX] OTLP receiver: Allow colons in non-standard units. #15710
## 3.0.1 / 2024-11-28
The first bug fix release for Prometheus 3.
* [BUGFIX] Promql: Make subqueries left open. #15431
* [BUGFIX] Fix memory leak when query log is enabled. #15434
* [BUGFIX] Support utf8 names on /v1/label/:name/values endpoint. #15399
## 3.0.0 / 2024-11-14
This release includes new features such as a brand new UI and UTF-8 support enabled by default. As this marks the first new major version in seven years, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. For users that want to upgrade we recommend to read through our [migration guide](https://prometheus.io/docs/prometheus/3.0/migration/).
* [CHANGE] Set the `GOMAXPROCS` variable automatically to match the Linux CPU quota. Use `--no-auto-gomaxprocs` to disable it. The `auto-gomaxprocs` feature flag was removed. #15376
* [CHANGE] Set the `GOMEMLIMIT` variable automatically to match the Linux container memory limit. Use `--no-auto-gomemlimit` to disable it. The `auto-gomemlimit` feature flag was removed. #15373
* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136
* [CHANGE] Remote-write: default enable_http2 to false. #15219
* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164
* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178
* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657
* [CHANGE] Disallow configuring AM with the v1 api. #13883
* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505
* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930
* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894
* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160
* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906
* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion` is enabled. #14738
* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111
* [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872
* [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904
* [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365
* [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365
* [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705, #15258
* [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807
* [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770
* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747
* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526
* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643
* [FEATURE] OTLP receiver: Ability to skip UTF-8 normalization using `otlp.translation_strategy = NoUTF8EscapingWithSuffixes` configuration option. #15384
* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769, #15011
* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710
* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196
* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694
* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096
* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082
* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677
* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546
* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909
* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929
* [ENHANCEMENT] Consul SD: Support catalog filters. #11224
* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875
* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975
* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932
* [PERF] TSDB: Grow postings by doubling. #14721
* [PERF] Relabeling: Optimize adding a constant label pair. #12180
* [BUGFIX] Scraping: Don't log errors on empty scrapes. #15357
* [BUGFIX] UI: fix selector / series formatting for empty metric names. #15341
* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941
* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941
* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251
* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095
* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910
* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854
* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884
* [BUGFIX] PromQL: Unary negation of native histograms. #14821
* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025
* [BUGFIX] Autoreload: Reload invalid yaml files. #14947
* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029
## 2.53.3 / 2024-11-04
* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685, #14740
## 2.53.2 / 2024-08-09
Fix a bug where Prometheus would crash with a segmentation fault if a remote-read
request accessed a block on disk at about the same time as TSDB created a new block.
[BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515,#14523
## 2.55.1 / 2024-11-04
* [BUGFIX] `round()` function did not remove `__name__` label. #15250
## 2.55.0 / 2024-10-22
* [FEATURE] PromQL: Add experimental `info` function. #14495
* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727
* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817
* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815
* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734
* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200
* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346
* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403
* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506
* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532
* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706
* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612
* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379
* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450
* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477
* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985
* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621
* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413
* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816
* [ENHANCEMENT] API: Support multiple listening addresses. #14665
* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934
* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120
* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729
* [BUGFIX] PromQL: make sort_by_label stable. #14985
* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147
* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622
* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810
* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766
* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716
* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821
* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042
## 2.54.1 / 2024-08-27
* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps (mixing samples of the same series with and without timestamps is still rejected). #14685
* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654
* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538
* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514
* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605
## 2.54.0 / 2024-08-09
Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/).
This is experimental at this time and may still change.
Remote-write v2 is enabled by default, but can be disabled via feature-flag `web.remote-write-receiver.accepted-protobuf-messages`.
* [CHANGE] Remote-Write: `highest_timestamp_in_seconds` and `queue_highest_sent_timestamp_seconds` metrics now initialized to 0. #14437
* [CHANGE] API: Split warnings from info annotations in API response. #14327
* [FEATURE] Remote-Write: Version 2.0 experimental, plus metadata in WAL via feature flag `metadata-wal-records` (defaults on). #14395,#14427,#14444
* [FEATURE] PromQL: add limitk() and limit_ratio() aggregation operators. #12503
* [ENHANCEMENT] PromQL: Accept underscores in literal numbers, e.g. 1_000_000 for 1 million. #12821
* [ENHANCEMENT] PromQL: float literal numbers and durations are now interchangeable (experimental). Example: `time() - my_timestamp > 10m`. #9138
* [ENHANCEMENT] PromQL: use Kahan summation for sum(). #14074,#14362
* [ENHANCEMENT] PromQL (experimental native histograms): Optimize `histogram_count` and `histogram_sum` functions. #14097
* [ENHANCEMENT] TSDB: Better support for out-of-order experimental native histogram samples. #14438
* [ENHANCEMENT] TSDB: Optimise seek within index. #14393
* [ENHANCEMENT] TSDB: Optimise deletion of stale series. #14307
* [ENHANCEMENT] TSDB: Reduce locking to optimise adding and removing series. #13286,#14286
* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396,#14584
* [ENHANCEMENT] Regexps: Optimize patterns with multiple prefixes. #13843,#14368
* [ENHANCEMENT] Regexps: Optimize patterns containing multiple literal strings. #14173
* [ENHANCEMENT] AWS SD: expose Primary IPv6 addresses as __meta_ec2_primary_ipv6_addresses. #14156
* [ENHANCEMENT] Docker SD: add MatchFirstNetwork for containers with multiple networks. #10490
* [ENHANCEMENT] OpenStack SD: Use `flavor.original_name` if available. #14312
* [ENHANCEMENT] UI (experimental native histograms): more accurate representation. #13680,#14430
* [ENHANCEMENT] Agent: `out_of_order_time_window` config option now applies to agent. #14094
* [ENHANCEMENT] Notifier: Send any outstanding Alertmanager notifications when shutting down. #14290
* [ENHANCEMENT] Rules: Add label-matcher support to Rules API. #10194
* [ENHANCEMENT] HTTP API: Add url to message logged on error while sending response. #14209
* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584
* [BUGFIX] CLI: escape `|` characters when generating docs. #14420
* [BUGFIX] PromQL (experimental native histograms): Fix some binary operators between native histogram values. #14454
* [BUGFIX] TSDB: LabelNames API could fail during compaction. #14279
* [BUGFIX] TSDB: Fix rare issue where pending OOO read can be left dangling if creating querier fails. #14341
* [BUGFIX] TSDB: fix check for context cancellation in LabelNamesFor. #14302
* [BUGFIX] Rules: Fix rare panic on reload. #14366
* [BUGFIX] Config: In YAML marshalling, do not output a regexp field if it was never set. #14004
* [BUGFIX] Remote-Write: reject samples with future timestamps. #14304
* [BUGFIX] Remote-Write: Fix data corruption in remote write if max_sample_age is applied. #14078
* [BUGFIX] Notifier: Fix Alertmanager discovery not updating under heavy load. #14174
* [BUGFIX] Regexes: some Unicode characters were not matched by case-insensitive comparison. #14170,#14299
* [BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515
## 2.53.1 / 2024-07-10
Fix a bug which would drop samples in remote-write if the sending flow stalled
@ -35,6 +274,7 @@ This release changes the default for GOGC, the Go runtime control for the trade-
## 2.52.0 / 2024-05-07
* [CHANGE] TSDB: Fix the predicate checking for blocks which are beyond the retention period to include the ones right at the retention boundary. #9633
* [CHANGE] Scrape: Multiple samples (even with different timestamps) are treated as duplicates during one scrape.
* [FEATURE] Kubernetes SD: Add a new metric `prometheus_sd_kubernetes_failures_total` to track failed requests to Kubernetes API. #13554
* [FEATURE] Kubernetes SD: Add node and zone metadata labels when using the endpointslice role. #13935
* [FEATURE] Azure SD/Remote Write: Allow usage of Azure authorization SDK. #13099
@ -48,7 +288,7 @@ This release changes the default for GOGC, the Go runtime control for the trade-
* [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754
* [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772
* [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823
* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846
* [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667
* [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852
@ -585,7 +825,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2
## 2.33.0 / 2022-01-29
* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121
* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121
* [CHANGE] Web: Promote remote-write-receiver to stable. #10119
* [FEATURE] Config: Add `stripPort` template function. #10002
* [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045
@ -822,7 +1062,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec.
* [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682
* [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659
* [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790
* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723
* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723
* [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737
* [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766
@ -1748,7 +1988,7 @@ information, read the announcement blog post and migration guide.
## 1.7.0 / 2017-06-06
* [CHANGE] Compress remote storage requests and responses with unframed/raw snappy.
* [CHANGE] Properly ellide secrets in config.
* [CHANGE] Properly elide secrets in config.
* [FEATURE] Add OpenStack service discovery.
* [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces.
* [FEATURE] Add metric for discovered number of Alertmanagers.

View file

@ -2,27 +2,23 @@ ARG ARCH="amd64"
ARG OS="linux"
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
LABEL maintainer="The Prometheus Authors <prometheus-developers@googlegroups.com>"
LABEL org.opencontainers.image.source="https://github.com/prometheus/prometheus"
ARG ARCH="amd64"
ARG OS="linux"
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
COPY console_libraries/ /usr/share/prometheus/console_libraries/
COPY consoles/ /usr/share/prometheus/consoles/
COPY LICENSE /LICENSE
COPY NOTICE /NOTICE
COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2
WORKDIR /prometheus
RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ && \
chown -R nobody:nobody /etc/prometheus /prometheus
RUN chown -R nobody:nobody /etc/prometheus /prometheus
USER nobody
EXPOSE 9090
VOLUME [ "/prometheus" ]
ENTRYPOINT [ "/bin/prometheus" ]
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
"--storage.tsdb.path=/prometheus", \
"--web.console.libraries=/usr/share/prometheus/console_libraries", \
"--web.console.templates=/usr/share/prometheus/consoles" ]
"--storage.tsdb.path=/prometheus" ]

View file

@ -2,7 +2,6 @@
General maintainers:
* Bryan Boreham (bjboreham@gmail.com / @bboreham)
* Levi Harrison (levi@leviharrison.dev / @LeviHarrison)
* Ayoub Mrini (ayoubmrini424@gmail.com / @machine424)
* Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie)
@ -17,9 +16,8 @@ Maintainers for specific parts of the codebase:
George Krajcsovits (<gyorgy.krajcsovits@grafana.com> / @krajorama)
* `storage`
* `remote`: Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( <npazosmendez@gmail.com> / @npazosmendez), Alex Greenbank ( <alex.greenbank@grafana.com> / @alexgreenbank)
* `otlptranslator`: Arve Knudsen (<arve.knudsen@gmail.com> / @aknuds1), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `otlptranslator`: Arthur Silva Sens (<arthursens2005@gmail.com> / @ArthurSens), Arve Knudsen (<arve.knudsen@gmail.com> / @aknuds1), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `tsdb`: Ganesh Vernekar (<ganesh@grafana.com> / @codesome), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `agent`: Robert Fratto (<robert.fratto@grafana.com> / @rfratto)
* `web`
* `ui`: Julius Volz (<julius.volz@gmail.com> / @juliusv)
* `module`: Augustin Husson (<husson.augustin@gmail.com> @nexucis)

View file

@ -30,6 +30,11 @@ include Makefile.common
DOCKER_IMAGE_NAME ?= prometheus
# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set
ifdef PREBUILT_ASSETS_STATIC_DIR
SKIP_UI_BUILD = true
endif
.PHONY: update-npm-deps
update-npm-deps:
@echo ">> updating npm dependencies"
@ -42,13 +47,17 @@ upgrade-npm-deps:
.PHONY: ui-bump-version
ui-bump-version:
version=$$(sed s/2/0/ < VERSION) && ./scripts/ui_release.sh --bump-version "$${version}"
version=$$(./scripts/get_module_version.sh) && ./scripts/ui_release.sh --bump-version "$${version}"
cd web/ui && npm install
git add "./web/ui/package-lock.json" "./**/package.json"
.PHONY: ui-install
ui-install:
cd $(UI_PATH) && npm install
# The old React app has been separated from the npm workspaces setup to avoid
# issues with conflicting dependencies. This is a temporary solution until the
# new Mantine-based UI is fully integrated and the old app can be removed.
cd $(UI_PATH)/react-app && npm install
.PHONY: ui-build
ui-build:
@ -65,10 +74,30 @@ ui-test:
.PHONY: ui-lint
ui-lint:
cd $(UI_PATH) && npm run lint
# The old React app has been separated from the npm workspaces setup to avoid
# issues with conflicting dependencies. This is a temporary solution until the
# new Mantine-based UI is fully integrated and the old app can be removed.
cd $(UI_PATH)/react-app && npm run lint
.PHONY: assets
ifndef SKIP_UI_BUILD
assets: ui-install ui-build
.PHONY: npm_licenses
npm_licenses: ui-install
@echo ">> bundling npm licenses"
rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
ln -s . npm_licenses
find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
rm -f npm_licenses
else
assets:
@echo '>> skipping assets build, pre-built assets provided'
npm_licenses:
@echo '>> skipping assets npm licenses, pre-built assets provided'
endif
.PHONY: assets-compress
assets-compress: assets
@echo '>> compressing assets'
@ -117,14 +146,6 @@ else
test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
.PHONY: npm_licenses
npm_licenses: ui-install
@echo ">> bundling npm licenses"
rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
ln -s . npm_licenses
find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
rm -f npm_licenses
.PHONY: tarball
tarball: npm_licenses common-tarball

View file

@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v1.59.1
GOLANGCI_LINT_VERSION ?= v1.63.4
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
@ -275,3 +275,9 @@ $(1)_precheck:
exit 1; \
fi
endef
govulncheck: install-govulncheck
govulncheck ./...
install-govulncheck:
command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest

View file

@ -12,9 +12,10 @@ examples and guides.</p>
[![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub]
[![Go Report Card](https://goreportcard.com/badge/github.com/prometheus/prometheus)](https://goreportcard.com/report/github.com/prometheus/prometheus)
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486)
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus)
[![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/prometheus/badge)](https://clomonitor.io/projects/cncf/prometheus)
[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/prometheus/prometheus)
[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/prometheus.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:prometheus)
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus)
</div>
@ -66,9 +67,9 @@ Prometheus will now be reachable at <http://localhost:9090/>.
To build Prometheus from source code, You need:
* Go [version 1.17 or greater](https://golang.org/doc/install).
* NodeJS [version 16 or greater](https://nodejs.org/).
* npm [version 7 or greater](https://www.npmjs.com/).
* Go [version 1.22 or greater](https://golang.org/doc/install).
* NodeJS [version 22 or greater](https://nodejs.org/).
* npm [version 8 or greater](https://www.npmjs.com/).
Start by cloning the repository:
@ -114,7 +115,7 @@ The Makefile provides several targets:
Prometheus is bundled with many service discovery plugins.
When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml)
file to disable some service discoveries. The file is a yaml-formated list of go
file to disable some service discoveries. The file is a yaml-formatted list of go
import path that will be built into the Prometheus binary.
After you have changed the file, you
@ -157,8 +158,19 @@ This is experimental.
### Prometheus code base
In order to comply with [go mod](https://go.dev/ref/mod#versions) rules,
Prometheus release number do not exactly match Go module releases. For the
Prometheus v2.y.z releases, we are publishing equivalent v0.y.z tags.
Prometheus release number do not exactly match Go module releases.
For the
Prometheus v3.y.z releases, we are publishing equivalent v0.3y.z tags. The y in v0.3y.z is always padded to two digits, with a leading zero if needed.
Therefore, a user that would want to use Prometheus v3.0.0 as a library could do:
```shell
go get github.com/prometheus/prometheus@v0.300.0
```
For the
Prometheus v2.y.z releases, we published the equivalent v0.y.z tags.
Therefore, a user that would want to use Prometheus v2.35.0 as a library could do:
@ -176,7 +188,7 @@ For more information on building, running, and developing on the React-based UI,
## More information
* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v2.x.y will be displayed as v0.x.y.
* Godoc documentation is available via [pkg.go.dev](https://pkg.go.dev/github.com/prometheus/prometheus). Due to peculiarities of Go Modules, v3.y.z will be displayed as v0.3y.z (the y in v0.3y.z is always padded to two digits, with a leading zero if needed), while v2.y.z will be displayed as v0.y.z.
* See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels.
## Contributing

View file

@ -5,60 +5,15 @@ This page describes the release process and the currently planned schedule for u
## Release schedule
Release cadence of first pre-releases being cut is 6 weeks.
Please see [the v2.55 RELEASE.md](https://github.com/prometheus/prometheus/blob/release-2.55/RELEASE.md) for the v2 release series schedule.
| release series | date of first pre-release (year-month-day) | release shepherd |
|----------------|--------------------------------------------|---------------------------------------------|
| v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) |
| v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) |
| v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) |
| v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) |
| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) |
| v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) |
| v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) |
| v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) |
| v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) |
| v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) |
| v2.14 | 2019-11-06 | Chris Marchbanks (GitHub: @csmarchbanks) |
| v2.15 | 2019-12-18 | Bartek Plotka (GitHub: @bwplotka) |
| v2.16 | 2020-01-29 | Callum Styan (GitHub: @cstyan) |
| v2.17 | 2020-03-11 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.18 | 2020-04-22 | Bartek Plotka (GitHub: @bwplotka) |
| v2.19 | 2020-06-03 | Ganesh Vernekar (GitHub: @codesome) |
| v2.20 | 2020-07-15 | Björn Rabenstein (GitHub: @beorn7) |
| v2.21 | 2020-08-26 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.22 | 2020-10-07 | Frederic Branczyk (GitHub: @brancz) |
| v2.23 | 2020-11-18 | Ganesh Vernekar (GitHub: @codesome) |
| v2.24 | 2020-12-30 | Björn Rabenstein (GitHub: @beorn7) |
| v2.25 | 2021-02-10 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.26 | 2021-03-24 | Bartek Plotka (GitHub: @bwplotka) |
| v2.27 | 2021-05-05 | Chris Marchbanks (GitHub: @csmarchbanks) |
| v2.28 | 2021-06-16 | Julius Volz (GitHub: @juliusv) |
| v2.29 | 2021-07-28 | Frederic Branczyk (GitHub: @brancz) |
| v2.30 | 2021-09-08 | Ganesh Vernekar (GitHub: @codesome) |
| v2.31 | 2021-10-20 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.32 | 2021-12-01 | Julius Volz (GitHub: @juliusv) |
| v2.33 | 2022-01-12 | Björn Rabenstein (GitHub: @beorn7) |
| v2.34 | 2022-02-23 | Chris Marchbanks (GitHub: @csmarchbanks) |
| v2.35 | 2022-04-06 | Augustin Husson (GitHub: @nexucis) |
| v2.36 | 2022-05-18 | Matthias Loibl (GitHub: @metalmatze) |
| v2.37 LTS | 2022-06-29 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.38 | 2022-08-10 | Julius Volz (GitHub: @juliusv) |
| v2.39 | 2022-09-21 | Ganesh Vernekar (GitHub: @codesome) |
| v2.40 | 2022-11-02 | Ganesh Vernekar (GitHub: @codesome) |
| v2.41 | 2022-12-14 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.42 | 2023-01-25 | Kemal Akkoyun (GitHub: @kakkoyun) |
| v2.43 | 2023-03-08 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.44 | 2023-04-19 | Bryan Boreham (GitHub: @bboreham) |
| v2.45 LTS | 2023-05-31 | Jesus Vazquez (Github: @jesusvazquez) |
| v2.46 | 2023-07-12 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) |
| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) |
| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) |
| v2.50 | 2024-01-16 | Augustin Husson (GitHub: @nexucis) |
| v2.51 | 2024-03-07 | Bryan Boreham (GitHub: @bboreham) |
| v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) |
| v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) |
| v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) |
| release series | date of first pre-release (year-month-day) | release shepherd |
|----------------|--------------------------------------------|-----------------------------------|
| v3.0 | 2024-11-14 | Jan Fajerski (GitHub: @jan--f) |
| v3.1 | 2024-12-17 | Bryan Boreham (GitHub: @bboreham) |
| v3.2 | 2025-01-28 | Jan Fajerski (GitHub: @jan--f) |
| v3.3 | 2025-03-11 | Ayoub Mrini (Github: @machine424) |
| v3.4 | 2025-04-22 | **volunteer welcome** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@ -187,7 +142,7 @@ the Prometheus server, we use major version zero releases for the libraries.
Tag the new library release via the following commands:
```bash
tag="v$(sed s/2/0/ < VERSION)"
tag="v$(./scripts/get_module_version.sh)"
git tag -s "${tag}" -m "${tag}"
git push origin "${tag}"
```
@ -203,7 +158,7 @@ Then release with `git tag-release`.
Signing a tag with a GPG key is appreciated, but in case you can't add a GPG key to your Github account using the following [procedure](https://help.github.com/articles/generating-a-gpg-key/), you can replace the `-s` flag by `-a` flag of the `git tag` command to only annotate the tag without signing.
Once a tag is created, the release process through CircleCI will be triggered for this tag and Circle CI will draft the GitHub release using the `prombot` account.
Once a tag is created, the release process through Github Actions will be triggered for this tag and Github Actions will draft the GitHub release using the `prombot` account.
Finally, wait for the build step for the tag to finish. The point here is to wait for tarballs to be uploaded to the Github release and the container images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification.
**Note:** for a release candidate version ensure the _This is a pre-release_ box is checked when drafting the release in the Github UI. The CI job should take care of this but it's a good idea to double check before clicking _Publish release_.`

48
SECURITY-INSIGHTS.yml Normal file
View file

@ -0,0 +1,48 @@
header:
schema-version: '1.0.0'
expiration-date: '2025-07-30T01:00:00.000Z'
last-updated: '2024-07-30'
last-reviewed: '2024-07-30'
project-url: https://github.com/prometheus/prometheus
changelog: https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md
license: https://github.com/prometheus/prometheus/blob/main/LICENSE
project-lifecycle:
status: active
bug-fixes-only: false
core-maintainers:
- https://github.com/prometheus/prometheus/blob/main/MAINTAINERS.md
contribution-policy:
accepts-pull-requests: true
accepts-automated-pull-requests: true
dependencies:
third-party-packages: true
dependencies-lists:
- https://github.com/prometheus/prometheus/blob/main/go.mod
- https://github.com/prometheus/prometheus/blob/main/web/ui/package.json
env-dependencies-policy:
policy-url: https://github.com/prometheus/prometheus/blob/main/CONTRIBUTING.md#dependency-management
distribution-points:
- https://github.com/prometheus/prometheus/releases
documentation:
- https://prometheus.io/docs/introduction/overview/
security-contacts:
- type: email
value: prometheus-team@googlegroups.com
security-testing:
- tool-type: sca
tool-name: Dependabot
tool-version: latest
integration:
ad-hoc: false
ci: true
before-release: true
- tool-type: sast
tool-name: CodeQL
tool-version: latest
integration:
ad-hoc: false
ci: true
before-release: true
vulnerability-reporting:
accepts-vulnerability-reports: true
security-policy: https://github.com/prometheus/prometheus/security/policy

View file

@ -1 +1 @@
2.53.1
3.2.1

File diff suppressed because it is too large Load diff

View file

@ -31,9 +31,9 @@ import (
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/config"
@ -42,6 +42,11 @@ import (
"github.com/prometheus/prometheus/rules"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
const startupTime = 10 * time.Second
var (
@ -120,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
fakeInputFile := "fake-input-file"
expectedExitStatus := 2
@ -206,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} {
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
for _, tc := range []struct {
size string
exitCode int
}{
{
size: "9MB",
exitCode: 1,
},
{
size: "257MB",
exitCode: 1,
},
{
size: "10",
exitCode: 2,
},
{
size: "1GB",
exitCode: 1,
},
{
size: "12MB",
exitCode: 0,
},
} {
t.Run(tc.size, func(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
err = prom.Start()
require.NoError(t, err)
err = prom.Start()
require.NoError(t, err)
if expectedExitStatus == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
if tc.exitCode == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
}
return
}
continue
}
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, expectedExitStatus, status.ExitStatus())
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, tc.exitCode, status.ExitStatus())
})
}
}
func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) {
t.Parallel()
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} {
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
for _, tc := range []struct {
size string
exitCode int
}{
{
size: "512KB",
exitCode: 1,
},
{
size: "1MB",
exitCode: 0,
},
} {
t.Run(tc.size, func(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
err = prom.Start()
require.NoError(t, err)
err = prom.Start()
require.NoError(t, err)
if expectedExitStatus == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
if tc.exitCode == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
}
return
}
continue
}
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, expectedExitStatus, status.ExitStatus())
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, tc.exitCode, status.ExitStatus())
})
}
}
@ -290,7 +338,7 @@ func TestTimeMetrics(t *testing.T) {
tmpDir := t.TempDir()
reg := prometheus.NewRegistry()
db, err := openDBWithMetrics(tmpDir, log.NewNopLogger(), reg, nil, nil)
db, err := openDBWithMetrics(tmpDir, promslog.NewNopLogger(), reg, nil, nil)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
@ -348,7 +396,9 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
}
func TestAgentSuccessfulStartup(t *testing.T) {
prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
require.NoError(t, prom.Start())
actualExitStatus := 0
@ -366,7 +416,9 @@ func TestAgentSuccessfulStartup(t *testing.T) {
}
func TestAgentFailedStartupWithServerFlag(t *testing.T) {
prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
output := bytes.Buffer{}
prom.Stderr = &output
@ -393,7 +445,9 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) {
}
func TestAgentFailedStartupWithInvalidConfig(t *testing.T) {
prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
require.NoError(t, prom.Start())
actualExitStatus := 0
@ -414,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
testcases := []struct {
mode string
@ -428,10 +483,11 @@ func TestModeSpecificFlags(t *testing.T) {
for _, tc := range testcases {
t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) {
t.Parallel()
args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"}
if tc.mode == "agent" {
args = append(args, "--enable-feature=agent", "--config.file="+agentConfig)
args = append(args, "--agent", "--config.file="+agentConfig)
} else {
args = append(args, "--config.file="+promConfig)
}
@ -479,6 +535,8 @@ func TestDocumentation(t *testing.T) {
if runtime.GOOS == "windows" {
t.SkipNow()
}
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
@ -503,6 +561,8 @@ func TestDocumentation(t *testing.T) {
}
func TestRwProtoMsgFlagParser(t *testing.T) {
t.Parallel()
defaultOpts := config.RemoteWriteProtoMsgs{
config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2,
}

View file

@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t))

View file

@ -125,12 +125,61 @@ func (p *queryLogTest) query(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 200, r.StatusCode)
case ruleOrigin:
time.Sleep(2 * time.Second)
// Poll the /api/v1/rules endpoint until a new rule evaluation is detected.
var lastEvalTime time.Time
for {
r, err := http.Get(fmt.Sprintf("http://%s:%d/api/v1/rules", p.host, p.port))
require.NoError(t, err)
rulesBody, err := io.ReadAll(r.Body)
require.NoError(t, err)
defer r.Body.Close()
// Parse the rules response to find the last evaluation time.
newEvalTime := parseLastEvaluation(rulesBody)
if newEvalTime.After(lastEvalTime) {
if !lastEvalTime.IsZero() {
break
}
lastEvalTime = newEvalTime
}
time.Sleep(100 * time.Millisecond)
}
default:
panic("can't query this origin")
}
}
// parseLastEvaluation extracts the last evaluation timestamp from the /api/v1/rules response.
func parseLastEvaluation(rulesBody []byte) time.Time {
var ruleResponse struct {
Status string `json:"status"`
Data struct {
Groups []struct {
Rules []struct {
LastEvaluation string `json:"lastEvaluation"`
} `json:"rules"`
} `json:"groups"`
} `json:"data"`
}
err := json.Unmarshal(rulesBody, &ruleResponse)
if err != nil {
return time.Time{}
}
for _, group := range ruleResponse.Data.Groups {
for _, rule := range group.Rules {
if evalTime, err := time.Parse(time.RFC3339Nano, rule.LastEvaluation); err == nil {
return evalTime
}
}
}
return time.Time{}
}
// queryString returns the expected queryString of a this test.
func (p *queryLogTest) queryString() string {
switch p.origin {
@ -322,7 +371,7 @@ func (p *queryLogTest) run(t *testing.T) {
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
require.GreaterOrEqual(t, len(ql), qc, "no queries logged")
}
p.validateLastQuery(t, ql)
qc = len(ql)
@ -353,7 +402,7 @@ func (p *queryLogTest) run(t *testing.T) {
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
require.GreaterOrEqual(t, len(ql), qc, "no queries logged")
}
p.validateLastQuery(t, ql)
@ -393,6 +442,7 @@ func readQueryLog(t *testing.T, path string) []queryLogLine {
file, err := os.Open(path)
require.NoError(t, err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
var q queryLogLine
@ -406,6 +456,7 @@ func TestQueryLog(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
cwd, err := os.Getwd()
require.NoError(t, err)
@ -424,6 +475,7 @@ func TestQueryLog(t *testing.T) {
}
t.Run(p.String(), func(t *testing.T) {
t.Parallel()
p.run(t)
})
}

View file

@ -0,0 +1,229 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bufio"
"encoding/json"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/util/testutil"
)
const configReloadMetric = "prometheus_config_last_reload_successful"
func TestAutoReloadConfig_ValidToValid(t *testing.T) {
steps := []struct {
configText string
expectedInterval string
expectedMetric float64
}{
{
configText: `
global:
scrape_interval: 30s
`,
expectedInterval: "30s",
expectedMetric: 1,
},
{
configText: `
global:
scrape_interval: 15s
`,
expectedInterval: "15s",
expectedMetric: 1,
},
{
configText: `
global:
scrape_interval: 30s
`,
expectedInterval: "30s",
expectedMetric: 1,
},
}
runTestSteps(t, steps)
}
func TestAutoReloadConfig_ValidToInvalidToValid(t *testing.T) {
steps := []struct {
configText string
expectedInterval string
expectedMetric float64
}{
{
configText: `
global:
scrape_interval: 30s
`,
expectedInterval: "30s",
expectedMetric: 1,
},
{
configText: `
global:
scrape_interval: 15s
invalid_syntax
`,
expectedInterval: "30s",
expectedMetric: 0,
},
{
configText: `
global:
scrape_interval: 30s
`,
expectedInterval: "30s",
expectedMetric: 1,
},
}
runTestSteps(t, steps)
}
func runTestSteps(t *testing.T, steps []struct {
configText string
expectedInterval string
expectedMetric float64
},
) {
configDir := t.TempDir()
configFilePath := filepath.Join(configDir, "prometheus.yml")
t.Logf("Config file path: %s", configFilePath)
require.NoError(t, os.WriteFile(configFilePath, []byte(steps[0].configText), 0o644), "Failed to write initial config file")
port := testutil.RandomUnprivilegedPort(t)
runPrometheusWithLogging(t, configFilePath, port)
baseURL := "http://localhost:" + strconv.Itoa(port)
require.Eventually(t, func() bool {
resp, err := http.Get(baseURL + "/-/ready")
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode == http.StatusOK
}, 5*time.Second, 100*time.Millisecond, "Prometheus didn't become ready in time")
for i, step := range steps {
t.Logf("Step %d", i)
require.NoError(t, os.WriteFile(configFilePath, []byte(step.configText), 0o644), "Failed to write config file for step")
require.Eventually(t, func() bool {
return verifyScrapeInterval(t, baseURL, step.expectedInterval) &&
verifyConfigReloadMetric(t, baseURL, step.expectedMetric)
}, 10*time.Second, 500*time.Millisecond, "Prometheus config reload didn't happen in time")
}
}
func verifyScrapeInterval(t *testing.T, baseURL, expectedInterval string) bool {
resp, err := http.Get(baseURL + "/api/v1/status/config")
require.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
config := struct {
Data struct {
YAML string `json:"yaml"`
} `json:"data"`
}{}
require.NoError(t, json.Unmarshal(body, &config))
return strings.Contains(config.Data.YAML, "scrape_interval: "+expectedInterval)
}
func verifyConfigReloadMetric(t *testing.T, baseURL string, expectedValue float64) bool {
resp, err := http.Get(baseURL + "/metrics")
require.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
lines := string(body)
var actualValue float64
found := false
for _, line := range strings.Split(lines, "\n") {
if strings.HasPrefix(line, configReloadMetric) {
parts := strings.Fields(line)
if len(parts) >= 2 {
actualValue, err = strconv.ParseFloat(parts[1], 64)
require.NoError(t, err)
found = true
break
}
}
}
return found && actualValue == expectedValue
}
func captureLogsToTLog(t *testing.T, r io.Reader) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
t.Log(scanner.Text())
}
if err := scanner.Err(); err != nil {
t.Logf("Error reading logs: %v", err)
}
}
func runPrometheusWithLogging(t *testing.T, configFilePath string, port int) {
stdoutPipe, stdoutWriter := io.Pipe()
stderrPipe, stderrWriter := io.Pipe()
var wg sync.WaitGroup
wg.Add(2)
prom := exec.Command(promPath, "-test.main", "--enable-feature=auto-reload-config", "--config.file="+configFilePath, "--config.auto-reload-interval=1s", "--web.listen-address=0.0.0.0:"+strconv.Itoa(port))
prom.Stdout = stdoutWriter
prom.Stderr = stderrWriter
go func() {
defer wg.Done()
captureLogsToTLog(t, stdoutPipe)
}()
go func() {
defer wg.Done()
captureLogsToTLog(t, stderrPipe)
}()
t.Cleanup(func() {
prom.Process.Kill()
prom.Wait()
stdoutWriter.Close()
stderrWriter.Close()
wg.Wait()
})
require.NoError(t, prom.Start())
}

View file

@ -0,0 +1,193 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bytes"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"os/exec"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
"github.com/prometheus/prometheus/util/testutil"
)
func TestScrapeFailureLogFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
// Tracks the number of requests made to the mock server.
var requestCount atomic.Int32
// Starts a server that always returns HTTP 500 errors.
mockServerAddress := startGarbageServer(t, &requestCount)
// Create a temporary directory for Prometheus configuration and logs.
tempDir := t.TempDir()
// Define file paths for the scrape failure log and Prometheus configuration.
// Like other files, the scrape failure log file should be relative to the
// config file. Therefore, we split the name we put in the file and the full
// path used to check the content of the file.
scrapeFailureLogFileName := "scrape_failure.log"
scrapeFailureLogFile := filepath.Join(tempDir, scrapeFailureLogFileName)
promConfigFile := filepath.Join(tempDir, "prometheus.yml")
// Step 1: Set up an initial Prometheus configuration that globally
// specifies a scrape failure log file.
promConfig := fmt.Sprintf(`
global:
scrape_interval: 500ms
scrape_failure_log_file: %s
scrape_configs:
- job_name: 'test_job'
static_configs:
- targets: ['%s']
`, scrapeFailureLogFileName, mockServerAddress)
err := os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
require.NoError(t, err, "Failed to write Prometheus configuration file")
// Start Prometheus with the generated configuration and a random port, enabling the lifecycle API.
port := testutil.RandomUnprivilegedPort(t)
params := []string{
"-test.main",
"--config.file=" + promConfigFile,
"--storage.tsdb.path=" + filepath.Join(tempDir, "data"),
fmt.Sprintf("--web.listen-address=127.0.0.1:%d", port),
"--web.enable-lifecycle",
}
prometheusProcess := exec.Command(promPath, params...)
prometheusProcess.Stdout = os.Stdout
prometheusProcess.Stderr = os.Stderr
err = prometheusProcess.Start()
require.NoError(t, err, "Failed to start Prometheus")
defer prometheusProcess.Process.Kill()
// Wait until the mock server receives at least two requests from Prometheus.
require.Eventually(t, func() bool {
return requestCount.Load() >= 2
}, 30*time.Second, 500*time.Millisecond, "Expected at least two requests to the mock server")
// Verify that the scrape failures have been logged to the specified file.
content, err := os.ReadFile(scrapeFailureLogFile)
require.NoError(t, err, "Failed to read scrape failure log")
require.Contains(t, string(content), "server returned HTTP status 500 Internal Server Error", "Expected scrape failure log entry not found")
// Step 2: Update the Prometheus configuration to remove the scrape failure
// log file setting.
promConfig = fmt.Sprintf(`
global:
scrape_interval: 1s
scrape_configs:
- job_name: 'test_job'
static_configs:
- targets: ['%s']
`, mockServerAddress)
err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
require.NoError(t, err, "Failed to update Prometheus configuration file")
// Reload Prometheus with the updated configuration.
reloadPrometheus(t, port)
// Count the number of lines in the scrape failure log file before any
// further requests.
preReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
// Wait for at least two more requests to the mock server to ensure
// Prometheus continues scraping.
requestsBeforeReload := requestCount.Load()
require.Eventually(t, func() bool {
return requestCount.Load() >= requestsBeforeReload+2
}, 30*time.Second, 500*time.Millisecond, "Expected two more requests to the mock server after configuration reload")
// Ensure that no new lines were added to the scrape failure log file after
// the configuration change.
require.Equal(t, preReloadLogLineCount, countLinesInFile(scrapeFailureLogFile), "No new lines should be added to the scrape failure log file after removing the log setting")
// Step 3: Re-add the scrape failure log file setting, but this time under
// scrape_configs, and reload Prometheus.
promConfig = fmt.Sprintf(`
global:
scrape_interval: 1s
scrape_configs:
- job_name: 'test_job'
scrape_failure_log_file: %s
static_configs:
- targets: ['%s']
`, scrapeFailureLogFileName, mockServerAddress)
err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
require.NoError(t, err, "Failed to update Prometheus configuration file")
// Reload Prometheus with the updated configuration.
reloadPrometheus(t, port)
// Wait for at least two more requests to the mock server and verify that
// new log entries are created.
postReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
requestsBeforeReAddingLog := requestCount.Load()
require.Eventually(t, func() bool {
return requestCount.Load() >= requestsBeforeReAddingLog+2
}, 30*time.Second, 500*time.Millisecond, "Expected two additional requests after re-adding the log setting")
// Confirm that new lines were added to the scrape failure log file.
require.Greater(t, countLinesInFile(scrapeFailureLogFile), postReloadLogLineCount, "New lines should be added to the scrape failure log file after re-adding the log setting")
}
// reloadPrometheus sends a reload request to the Prometheus server to apply
// updated configurations.
func reloadPrometheus(t *testing.T, port int) {
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/-/reload", port), "", nil)
require.NoError(t, err, "Failed to reload Prometheus")
require.Equal(t, http.StatusOK, resp.StatusCode, "Unexpected status code when reloading Prometheus")
}
// startGarbageServer sets up a mock server that returns a 500 Internal Server Error
// for all requests. It also increments the request count each time it's hit.
func startGarbageServer(t *testing.T, requestCount *atomic.Int32) string {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
requestCount.Inc()
w.WriteHeader(http.StatusInternalServerError)
}))
t.Cleanup(server.Close)
parsedURL, err := url.Parse(server.URL)
require.NoError(t, err, "Failed to parse mock server URL")
return parsedURL.Host
}
// countLinesInFile counts and returns the number of lines in the specified file.
func countLinesInFile(filePath string) int {
data, err := os.ReadFile(filePath)
if err != nil {
return 0 // Return 0 if the file doesn't exist or can't be read.
}
return bytes.Count(data, []byte{'\n'})
}

View file

@ -34,8 +34,8 @@ import (
)
var (
errNotNativeHistogram = fmt.Errorf("not a native histogram")
errNotEnoughData = fmt.Errorf("not enough data")
errNotNativeHistogram = errors.New("not a native histogram")
errNotEnoughData = errors.New("not enough data")
outputHeader = `Bucket stats for each histogram series over time
------------------------------------------------
@ -169,7 +169,7 @@ func querySamples(ctx context.Context, api v1.API, query string, end time.Time)
matrix, ok := values.(model.Matrix)
if !ok {
return nil, fmt.Errorf("query of buckets resulted in non-Matrix")
return nil, errors.New("query of buckets resulted in non-Matrix")
}
return matrix, nil
@ -259,7 +259,7 @@ func getBucketCountsAtTime(matrix model.Matrix, numBuckets, timeIdx int) ([]int,
prev := matrix[i].Values[timeIdx]
// Assume the results are nicely aligned.
if curr.Timestamp != prev.Timestamp {
return counts, fmt.Errorf("matrix result is not time aligned")
return counts, errors.New("matrix result is not time aligned")
}
counts[i+1] = int(curr.Value - prev.Value)
}

View file

@ -109,6 +109,7 @@ func init() {
}
func TestGetBucketCountsAtTime(t *testing.T) {
t.Parallel()
cases := []struct {
matrix model.Matrix
length int
@ -137,6 +138,7 @@ func TestGetBucketCountsAtTime(t *testing.T) {
for _, c := range cases {
t.Run(fmt.Sprintf("exampleMatrix@%d", c.timeIdx), func(t *testing.T) {
t.Parallel()
res, err := getBucketCountsAtTime(c.matrix, c.length, c.timeIdx)
require.NoError(t, err)
require.Equal(t, c.expected, res)
@ -145,6 +147,7 @@ func TestGetBucketCountsAtTime(t *testing.T) {
}
func TestCalcClassicBucketStatistics(t *testing.T) {
t.Parallel()
cases := []struct {
matrix model.Matrix
expected *statistics
@ -162,6 +165,7 @@ func TestCalcClassicBucketStatistics(t *testing.T) {
for i, c := range cases {
t.Run(fmt.Sprintf("case %d", i), func(t *testing.T) {
t.Parallel()
res, err := calcClassicBucketStatistics(c.matrix)
require.NoError(t, err)
require.Equal(t, c.expected, res)

View file

@ -21,9 +21,10 @@ import (
"math"
"time"
"github.com/go-kit/log"
"github.com/oklog/ulid"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/textparse"
"github.com/prometheus/prometheus/tsdb"
@ -48,7 +49,7 @@ func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) {
_, ts, _ := p.Series()
if ts == nil {
return 0, 0, fmt.Errorf("expected timestamp for series got none")
return 0, 0, errors.New("expected timestamp for series got none")
}
if *ts > maxt {
@ -85,7 +86,7 @@ func getCompatibleBlockDuration(maxBlockDuration int64) int64 {
return blockDuration
}
func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool, customLabels map[string]string) (returnErr error) {
blockDuration := getCompatibleBlockDuration(maxBlockDuration)
mint = blockDuration * (mint / blockDuration)
@ -102,6 +103,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
nextSampleTs int64 = math.MaxInt64
)
lb := labels.NewBuilder(labels.EmptyLabels())
for t := mint; t <= maxt; t += blockDuration {
tsUpper := t + blockDuration
if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper {
@ -118,7 +121,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
// also need to append samples throughout the whole block range. To allow that, we
// pretend that the block is twice as large here, but only really add sample in the
// original interval later.
w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, 2*blockDuration)
w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), outputDir, 2*blockDuration)
if err != nil {
return fmt.Errorf("block writer: %w", err)
}
@ -146,7 +149,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
_, ts, v := p.Series()
if ts == nil {
l := labels.Labels{}
p.Metric(&l)
p.Labels(&l)
return fmt.Errorf("expected timestamp for series %v, got none", l)
}
if *ts < t {
@ -160,9 +163,15 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
}
l := labels.Labels{}
p.Metric(&l)
p.Labels(&l)
if _, err := app.Append(0, l, *ts, v); err != nil {
lb.Reset(l)
for name, value := range customLabels {
lb.Set(name, value)
}
lbls := lb.Labels()
if _, err := app.Append(0, lbls, *ts, v); err != nil {
return fmt.Errorf("add sample: %w", err)
}
@ -221,13 +230,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
return nil
}
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) (err error) {
p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps.
maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil {
return fmt.Errorf("getting min and max timestamp: %w", err)
}
if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet); err != nil {
if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet, customLabels); err != nil {
return fmt.Errorf("block creation: %w", err)
}
return nil

View file

@ -45,7 +45,7 @@ func sortSamples(samples []backfillSample) {
})
}
func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample {
func queryAllSeries(t testing.TB, q storage.Querier, _, _ int64) []backfillSample {
ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
samples := []backfillSample{}
for ss.Next() {
@ -86,12 +86,14 @@ func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, exp
}
func TestBackfill(t *testing.T) {
t.Parallel()
tests := []struct {
ToParse string
IsOk bool
Description string
MaxSamplesInAppender int
MaxBlockDuration time.Duration
Labels map[string]string
Expected struct {
MinTime int64
MaxTime int64
@ -636,6 +638,49 @@ http_requests_total{code="400"} 1024 7199
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1 1624463088.000
http_requests_total{code="200"} 2 1629503088.000
http_requests_total{code="200"} 3 1629863088.000
# EOF
`,
IsOk: true,
Description: "Sample with external labels.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 2048 * time.Hour,
Labels: map[string]string{"cluster_id": "123", "org_id": "999"},
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1629863088000,
NumBlocks: 2,
BlockDuration: int64(1458 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
},
{
Timestamp: 1629503088000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
},
{
Timestamp: 1629863088000,
Value: 3,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
},
},
},
},
{
ToParse: `# HELP rpc_duration_seconds A summary of the RPC duration in seconds.
# TYPE rpc_duration_seconds summary
@ -685,11 +730,12 @@ after_eof 1 2
}
for _, test := range tests {
t.Run(test.Description, func(t *testing.T) {
t.Parallel()
t.Logf("Test:%s", test.Description)
outputDir := t.TempDir()
err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration)
err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration, test.Labels)
if !test.IsOk {
require.Error(t, err, test.Description)

View file

@ -32,20 +32,19 @@ import (
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
"github.com/prometheus/exporter-toolkit/web"
"gopkg.in/yaml.v2"
dto "github.com/prometheus/client_model/go"
promconfig "github.com/prometheus/common/config"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@ -58,24 +57,35 @@ import (
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/util/documentcli"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
const (
successExitCode = 0
failureExitCode = 1
// Exit code 3 is used for "one or more lint issues detected".
lintErrExitCode = 3
lintOptionAll = "all"
lintOptionDuplicateRules = "duplicate-rules"
lintOptionNone = "none"
checkHealth = "/-/healthy"
checkReadiness = "/-/ready"
lintOptionAll = "all"
lintOptionDuplicateRules = "duplicate-rules"
lintOptionTooLongScrapeInterval = "too-long-scrape-interval"
lintOptionNone = "none"
checkHealth = "/-/healthy"
checkReadiness = "/-/ready"
)
var lintOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
var (
lintRulesOptions = []string{lintOptionAll, lintOptionDuplicateRules, lintOptionNone}
// Same as lintRulesOptions, but including scrape config linting options as well.
lintConfigOptions = append(append([]string{}, lintRulesOptions...), lintOptionTooLongScrapeInterval)
)
func main() {
var (
@ -92,6 +102,10 @@ func main() {
app.HelpFlag.Short('h')
checkCmd := app.Command("check", "Check the resources for validity.")
checkLookbackDelta := checkCmd.Flag(
"query.lookback-delta",
"The server's maximum query lookback duration.",
).Default("5m").Duration()
experimental := app.Flag("experimental", "Enable experimental commands.").Bool()
@ -108,11 +122,12 @@ func main() {
checkConfigSyntaxOnly := checkConfigCmd.Flag("syntax-only", "Only check the config file syntax, ignoring file and content validation referenced in the config").Bool()
checkConfigLint := checkConfigCmd.Flag(
"lint",
"Linting checks to apply to the rules specified in the config. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
"Linting checks to apply to the rules/scrape configs specified in the config. Available options are: "+strings.Join(lintConfigOptions, ", ")+". Use --lint=none to disable linting",
).Default(lintOptionDuplicateRules).String()
checkConfigLintFatal := checkConfigCmd.Flag(
"lint-fatal",
"Make lint errors exit with exit code 3.").Default("false").Bool()
checkConfigIgnoreUnknownFields := checkConfigCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule groups read by the config files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
checkWebConfigCmd := checkCmd.Command("web-config", "Check if the web config files are valid or not.")
webConfigFiles := checkWebConfigCmd.Arg(
@ -135,11 +150,12 @@ func main() {
).ExistingFiles()
checkRulesLint := checkRulesCmd.Flag(
"lint",
"Linting checks to apply. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
"Linting checks to apply. Available options are: "+strings.Join(lintRulesOptions, ", ")+". Use --lint=none to disable linting",
).Default(lintOptionDuplicateRules).String()
checkRulesLintFatal := checkRulesCmd.Flag(
"lint-fatal",
"Make lint errors exit with exit code 3.").Default("false").Bool()
checkRulesIgnoreUnknownFields := checkRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
@ -204,13 +220,16 @@ func main() {
pushMetricsHeaders := pushMetricsCmd.Flag("header", "Prometheus remote write header.").StringMap()
testCmd := app.Command("test", "Unit testing.")
junitOutFile := testCmd.Flag("junit", "File path to store JUnit XML test results.").OpenFile(os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
testRulesRun := testRulesCmd.Flag("run", "If set, will only run test groups whose names match the regular expression. Can be specified multiple times.").Strings()
testRulesFiles := testRulesCmd.Arg(
"test-rule-file",
"The unit test file.",
).Required().ExistingFiles()
testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool()
testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool()
testRulesIgnoreUnknownFields := testRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the test files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()
defaultDBPath := "data/"
tsdbCmd := app.Command("tsdb", "Run tsdb commands.")
@ -235,14 +254,14 @@ func main() {
tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.")
dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String()
dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.")
dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String()
dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
@ -252,6 +271,7 @@ func main() {
importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool()
maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("<duration>").Duration()
openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.")
openMetricsLabels := openMetricsImportCmd.Flag("label", "Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value").StringMap()
importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String()
importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String()
importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.")
@ -284,7 +304,7 @@ func main() {
promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Currently unused.").Default("").Strings()
documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden()
@ -303,40 +323,35 @@ func main() {
kingpin.Fatalf("Cannot set base auth in the server URL and use a http.config.file at the same time")
}
var err error
httpConfig, _, err := config_util.LoadHTTPConfigFile(httpConfigFilePath)
httpConfig, _, err := promconfig.LoadHTTPConfigFile(httpConfigFilePath)
if err != nil {
kingpin.Fatalf("Failed to load HTTP config file: %v", err)
}
httpRoundTripper, err = promconfig.NewRoundTripperFromConfig(*httpConfig, "promtool", config_util.WithUserAgent("promtool/"+version.Version))
httpRoundTripper, err = promconfig.NewRoundTripperFromConfig(*httpConfig, "promtool", promconfig.WithUserAgent(version.ComponentUserAgent("promtool")))
if err != nil {
kingpin.Fatalf("Failed to create a new HTTP round tripper: %v", err)
}
}
var noDefaultScrapePort bool
for _, f := range *featureList {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
case "no-default-scrape-port":
noDefaultScrapePort = true
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
fmt.Printf(" WARNING: Option for --enable-feature is a no-op after promotion to a stable feature: %q\n", o)
default:
fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o)
fmt.Printf(" WARNING: --enable-feature is currently a no-op")
}
}
}
switch parsedCmd {
case sdCheckCmd.FullCommand():
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newConfigLintConfig(*checkConfigLint, *checkConfigLintFatal, *checkConfigIgnoreUnknownFields, model.Duration(*checkLookbackDelta)), *configFiles...))
case checkServerHealthCmd.FullCommand():
os.Exit(checkErr(CheckServerStatus(serverURL, checkHealth, httpRoundTripper)))
@ -348,7 +363,7 @@ func main() {
os.Exit(CheckWebConfig(*webConfigFiles...))
case checkRulesCmd.FullCommand():
os.Exit(CheckRules(newLintConfig(*checkRulesLint, *checkRulesLintFatal), *ruleFiles...))
os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesIgnoreUnknownFields), *ruleFiles...))
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics(*checkMetricsExtended))
@ -378,13 +393,19 @@ func main() {
os.Exit(QueryLabels(serverURL, httpRoundTripper, *queryLabelsMatch, *queryLabelsName, *queryLabelsBegin, *queryLabelsEnd, p))
case testRulesCmd.FullCommand():
os.Exit(RulesUnitTest(
results := io.Discard
if *junitOutFile != nil {
results = *junitOutFile
}
os.Exit(RulesUnitTestResult(results,
promqltest.LazyLoaderOpts{
EnableAtModifier: true,
EnableNegativeOffset: true,
},
*testRulesRun,
*testRulesDiff,
*testRulesDebug,
*testRulesIgnoreUnknownFields,
*testRulesFiles...),
)
@ -403,7 +424,7 @@ func main() {
os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics)))
// TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand():
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration, *openMetricsLabels))
case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...)))
@ -435,18 +456,20 @@ func checkExperimental(f bool) {
}
}
var errLint = fmt.Errorf("lint error")
var errLint = errors.New("lint error")
type lintConfig struct {
all bool
duplicateRules bool
fatal bool
type rulesLintConfig struct {
all bool
duplicateRules bool
fatal bool
ignoreUnknownFields bool
}
func newLintConfig(stringVal string, fatal bool) lintConfig {
func newRulesLintConfig(stringVal string, fatal, ignoreUnknownFields bool) rulesLintConfig {
items := strings.Split(stringVal, ",")
ls := lintConfig{
fatal: fatal,
ls := rulesLintConfig{
fatal: fatal,
ignoreUnknownFields: ignoreUnknownFields,
}
for _, setting := range items {
switch setting {
@ -456,17 +479,58 @@ func newLintConfig(stringVal string, fatal bool) lintConfig {
ls.duplicateRules = true
case lintOptionNone:
default:
fmt.Printf("WARNING: unknown lint option %s\n", setting)
fmt.Printf("WARNING: unknown lint option: %q\n", setting)
}
}
return ls
}
func (ls lintConfig) lintDuplicateRules() bool {
func (ls rulesLintConfig) lintDuplicateRules() bool {
return ls.all || ls.duplicateRules
}
// Check server status - healthy & ready.
type configLintConfig struct {
rulesLintConfig
lookbackDelta model.Duration
}
func newConfigLintConfig(optionsStr string, fatal, ignoreUnknownFields bool, lookbackDelta model.Duration) configLintConfig {
c := configLintConfig{
rulesLintConfig: rulesLintConfig{
fatal: fatal,
},
}
lintNone := false
var rulesOptions []string
for _, option := range strings.Split(optionsStr, ",") {
switch option {
case lintOptionAll, lintOptionTooLongScrapeInterval:
c.lookbackDelta = lookbackDelta
if option == lintOptionAll {
rulesOptions = append(rulesOptions, lintOptionAll)
}
case lintOptionNone:
lintNone = true
default:
rulesOptions = append(rulesOptions, option)
}
}
if lintNone {
c.lookbackDelta = 0
rulesOptions = nil
}
if len(rulesOptions) > 0 {
c.rulesLintConfig = newRulesLintConfig(strings.Join(rulesOptions, ","), fatal, ignoreUnknownFields)
}
return c
}
// CheckServerStatus - healthy & ready.
func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
if serverURL.Scheme == "" {
serverURL.Scheme = "http"
@ -504,12 +568,12 @@ func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper ht
}
// CheckConfig validates configuration files.
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files ...string) int {
func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings configLintConfig, files ...string) int {
failed := false
hasErrors := false
for _, f := range files {
ruleFiles, err := checkConfig(agentMode, f, checkSyntaxOnly)
ruleFiles, scrapeConfigs, err := checkConfig(agentMode, f, checkSyntaxOnly)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
hasErrors = true
@ -522,12 +586,12 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files
}
fmt.Println()
rulesFailed, rulesHasErrors := checkRules(ruleFiles, lintSettings)
if rulesFailed {
failed = rulesFailed
}
if rulesHasErrors {
hasErrors = rulesHasErrors
if !checkSyntaxOnly {
scrapeConfigsFailed := lintScrapeConfigs(scrapeConfigs, lintSettings)
failed = failed || scrapeConfigsFailed
rulesFailed, rulesHaveErrors := checkRules(ruleFiles, lintSettings.rulesLintConfig)
failed = failed || rulesFailed
hasErrors = hasErrors || rulesHaveErrors
}
}
if failed && hasErrors {
@ -566,12 +630,12 @@ func checkFileExists(fn string) error {
return err
}
func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) {
func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, []*config.ScrapeConfig, error) {
fmt.Println("Checking", filename)
cfg, err := config.LoadFile(filename, agentMode, false, log.NewNopLogger())
cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger())
if err != nil {
return nil, err
return nil, nil, err
}
var ruleFiles []string
@ -579,15 +643,15 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, rf := range cfg.RuleFiles {
rfs, err := filepath.Glob(rf)
if err != nil {
return nil, err
return nil, nil, err
}
// If an explicit file was given, error if it is not accessible.
if !strings.Contains(rf, "*") {
if len(rfs) == 0 {
return nil, fmt.Errorf("%q does not point to an existing file", rf)
return nil, nil, fmt.Errorf("%q does not point to an existing file", rf)
}
if err := checkFileExists(rfs[0]); err != nil {
return nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
return nil, nil, fmt.Errorf("error checking rule file %q: %w", rfs[0], err)
}
}
ruleFiles = append(ruleFiles, rfs...)
@ -601,26 +665,26 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
var err error
scfgs, err = cfg.GetScrapeConfigs()
if err != nil {
return nil, fmt.Errorf("error loading scrape configs: %w", err)
return nil, nil, fmt.Errorf("error loading scrape configs: %w", err)
}
}
for _, scfg := range scfgs {
if !checkSyntaxOnly && scfg.HTTPClientConfig.Authorization != nil {
if err := checkFileExists(scfg.HTTPClientConfig.Authorization.CredentialsFile); err != nil {
return nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
return nil, nil, fmt.Errorf("error checking authorization credentials or bearer token file %q: %w", scfg.HTTPClientConfig.Authorization.CredentialsFile, err)
}
}
if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
return nil, err
return nil, nil, err
}
for _, c := range scfg.ServiceDiscoveryConfigs {
switch c := c.(type) {
case *kubernetes.SDConfig:
if err := checkTLSConfig(c.HTTPClientConfig.TLSConfig, checkSyntaxOnly); err != nil {
return nil, err
return nil, nil, err
}
case *file.SDConfig:
if checkSyntaxOnly {
@ -629,17 +693,17 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
return nil, err
return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
return nil, fmt.Errorf("checking SD file %q: %w", file, err)
return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForScrapeConfig(targetGroups, scfg); err != nil {
return nil, err
return nil, nil, err
}
}
continue
@ -648,7 +712,7 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
}
case discovery.StaticConfig:
if err := checkTargetGroupsForScrapeConfig(c, scfg); err != nil {
return nil, err
return nil, nil, err
}
}
}
@ -665,18 +729,18 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
for _, file := range c.Files {
files, err := filepath.Glob(file)
if err != nil {
return nil, err
return nil, nil, err
}
if len(files) != 0 {
for _, f := range files {
var targetGroups []*targetgroup.Group
targetGroups, err = checkSDFile(f)
if err != nil {
return nil, fmt.Errorf("checking SD file %q: %w", file, err)
return nil, nil, fmt.Errorf("checking SD file %q: %w", file, err)
}
if err := checkTargetGroupsForAlertmanager(targetGroups, amcfg); err != nil {
return nil, err
return nil, nil, err
}
}
continue
@ -685,15 +749,15 @@ func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]strin
}
case discovery.StaticConfig:
if err := checkTargetGroupsForAlertmanager(c, amcfg); err != nil {
return nil, err
return nil, nil, err
}
}
}
}
return ruleFiles, nil
return ruleFiles, scfgs, nil
}
func checkTLSConfig(tlsConfig config_util.TLSConfig, checkSyntaxOnly bool) error {
func checkTLSConfig(tlsConfig promconfig.TLSConfig, checkSyntaxOnly bool) error {
if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 {
return fmt.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile)
}
@ -752,7 +816,7 @@ func checkSDFile(filename string) ([]*targetgroup.Group, error) {
}
// CheckRules validates rule files.
func CheckRules(ls lintConfig, files ...string) int {
func CheckRules(ls rulesLintConfig, files ...string) int {
failed := false
hasErrors := false
if len(files) == 0 {
@ -772,7 +836,7 @@ func CheckRules(ls lintConfig, files ...string) int {
}
// checkRulesFromStdin validates rule from stdin.
func checkRulesFromStdin(ls lintConfig) (bool, bool) {
func checkRulesFromStdin(ls rulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
fmt.Println("Checking standard input")
@ -781,7 +845,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return true, true
}
rgs, errs := rulefmt.Parse(data)
rgs, errs := rulefmt.Parse(data, ls.ignoreUnknownFields)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
@ -810,12 +874,12 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
}
// checkRules validates rule files.
func checkRules(files []string, ls lintConfig) (bool, bool) {
func checkRules(files []string, ls rulesLintConfig) (bool, bool) {
failed := false
hasErrors := false
for _, f := range files {
fmt.Println("Checking", f)
rgs, errs := rulefmt.ParseFile(f)
rgs, errs := rulefmt.ParseFile(f, ls.ignoreUnknownFields)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
@ -844,7 +908,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
return failed, hasErrors
}
func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []error) {
func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings rulesLintConfig) (int, []error) {
numRules := 0
for _, rg := range rgs.Groups {
numRules += len(rg.Rules)
@ -868,6 +932,16 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []e
return numRules, nil
}
func lintScrapeConfigs(scrapeConfigs []*config.ScrapeConfig, lintSettings configLintConfig) bool {
for _, scfg := range scrapeConfigs {
if lintSettings.lookbackDelta > 0 && scfg.ScrapeInterval >= lintSettings.lookbackDelta {
fmt.Fprintf(os.Stderr, " FAILED: too long scrape interval found, data point will be marked as stale - job: %s, interval: %s\n", scfg.JobName, scfg.ScrapeInterval)
return true
}
}
return false
}
type compareRuleType struct {
metric string
label labels.Labels
@ -889,40 +963,40 @@ func compare(a, b compareRuleType) int {
func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType {
var duplicates []compareRuleType
var rules compareRuleTypes
var cRules compareRuleTypes
for _, group := range groups {
for _, rule := range group.Rules {
rules = append(rules, compareRuleType{
cRules = append(cRules, compareRuleType{
metric: ruleMetric(rule),
label: labels.FromMap(rule.Labels),
label: rules.FromMaps(group.Labels, rule.Labels),
})
}
}
if len(rules) < 2 {
if len(cRules) < 2 {
return duplicates
}
sort.Sort(rules)
sort.Sort(cRules)
last := rules[0]
for i := 1; i < len(rules); i++ {
if compare(last, rules[i]) == 0 {
last := cRules[0]
for i := 1; i < len(cRules); i++ {
if compare(last, cRules[i]) == 0 {
// Don't add a duplicated rule multiple times.
if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 {
duplicates = append(duplicates, rules[i])
duplicates = append(duplicates, cRules[i])
}
}
last = rules[i]
last = cRules[i]
}
return duplicates
}
func ruleMetric(rule rulefmt.RuleNode) string {
if rule.Alert.Value != "" {
return rule.Alert.Value
func ruleMetric(rule rulefmt.Rule) string {
if rule.Alert != "" {
return rule.Alert
}
return rule.Record.Value
return rule.Record
}
var checkMetricsUsage = strings.TrimSpace(`
@ -1176,7 +1250,7 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu
return fmt.Errorf("new api client error: %w", err)
}
ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api)
ruleImporter := newRuleImporter(promslog.New(&promslog.Config{}), cfg, api)
errs := ruleImporter.loadGroups(ctx, files)
for _, err := range errs {
if err != nil {
@ -1210,7 +1284,7 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c
lb := labels.NewBuilder(labels.EmptyLabels())
for _, tg := range targetGroups {
var failures []error
targets, failures = scrape.TargetsFromGroup(tg, scfg, false, targets, lb)
targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb)
if len(failures) > 0 {
first := failures[0]
return first
@ -1250,7 +1324,7 @@ func labelsSetPromQL(query, labelMatchType, name, value string) error {
return fmt.Errorf("invalid label match type: %s", labelMatchType)
}
parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
parser.Inspect(expr, func(node parser.Node, _ []parser.Node) error {
if n, ok := node.(*parser.VectorSelector); ok {
var found bool
for i, l := range n.LabelMatchers {
@ -1281,7 +1355,7 @@ func labelsDeletePromQL(query, name string) error {
return err
}
parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
parser.Inspect(expr, func(node parser.Node, _ []parser.Node) error {
if n, ok := node.(*parser.VectorSelector); ok {
for i, l := range n.LabelMatchers {
if l.Name == name {

View file

@ -31,12 +31,19 @@ import (
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/promql/promqltest"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
var promtoolPath = os.Args[0]
func TestMain(m *testing.M) {
@ -53,6 +60,7 @@ func TestMain(m *testing.M) {
}
func TestQueryRange(t *testing.T) {
t.Parallel()
s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`)
defer s.Close()
@ -76,6 +84,7 @@ func TestQueryRange(t *testing.T) {
}
func TestQueryInstant(t *testing.T) {
t.Parallel()
s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "vector", "result": []}}`)
defer s.Close()
@ -107,6 +116,7 @@ func mockServer(code int, body string) (*httptest.Server, func() *http.Request)
}
func TestCheckSDFile(t *testing.T) {
t.Parallel()
cases := []struct {
name string
file string
@ -126,8 +136,8 @@ func TestCheckSDFile(t *testing.T) {
},
{
name: "bad file extension",
file: "./testdata/bad-sd-file-extension.nonexistant",
err: "invalid file extension: \".nonexistant\"",
file: "./testdata/bad-sd-file-extension.nonexistent",
err: "invalid file extension: \".nonexistent\"",
},
{
name: "bad format",
@ -137,9 +147,10 @@ func TestCheckSDFile(t *testing.T) {
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
_, err := checkSDFile(test.file)
if test.err != "" {
require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -148,6 +159,7 @@ func TestCheckSDFile(t *testing.T) {
}
func TestCheckDuplicates(t *testing.T) {
t.Parallel()
cases := []struct {
name string
ruleFile string
@ -172,7 +184,8 @@ func TestCheckDuplicates(t *testing.T) {
for _, test := range cases {
c := test
t.Run(c.name, func(t *testing.T) {
rgs, err := rulefmt.ParseFile(c.ruleFile)
t.Parallel()
rgs, err := rulefmt.ParseFile(c.ruleFile, false)
require.Empty(t, err)
dups := checkDuplicates(rgs.Groups)
require.Equal(t, c.expectedDups, dups)
@ -181,7 +194,7 @@ func TestCheckDuplicates(t *testing.T) {
}
func BenchmarkCheckDuplicates(b *testing.B) {
rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml")
rgs, err := rulefmt.ParseFile("./testdata/rules_large.yml", false)
require.Empty(b, err)
b.ResetTimer()
@ -191,6 +204,7 @@ func BenchmarkCheckDuplicates(b *testing.B) {
}
func TestCheckTargetConfig(t *testing.T) {
t.Parallel()
cases := []struct {
name string
file string
@ -219,9 +233,10 @@ func TestCheckTargetConfig(t *testing.T) {
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, false)
t.Parallel()
_, _, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -230,6 +245,7 @@ func TestCheckTargetConfig(t *testing.T) {
}
func TestCheckConfigSyntax(t *testing.T) {
t.Parallel()
cases := []struct {
name string
file string
@ -302,13 +318,14 @@ func TestCheckConfigSyntax(t *testing.T) {
}
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly)
t.Parallel()
_, _, err := checkConfig(false, "testdata/"+test.file, test.syntaxOnly)
expectedErrMsg := test.err
if strings.Contains(runtime.GOOS, "windows") {
expectedErrMsg = test.errWindows
}
if expectedErrMsg != "" {
require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", test.err, err.Error())
require.EqualErrorf(t, err, expectedErrMsg, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -317,6 +334,7 @@ func TestCheckConfigSyntax(t *testing.T) {
}
func TestAuthorizationConfig(t *testing.T) {
t.Parallel()
cases := []struct {
name string
file string
@ -336,9 +354,10 @@ func TestAuthorizationConfig(t *testing.T) {
for _, test := range cases {
t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, false)
t.Parallel()
_, _, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error())
require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -350,6 +369,7 @@ func TestCheckMetricsExtended(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping on windows")
}
t.Parallel()
f, err := os.Open("testdata/metrics-test.prom")
require.NoError(t, err)
@ -386,6 +406,7 @@ func TestExitCodes(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
for _, c := range []struct {
file string
@ -410,8 +431,10 @@ func TestExitCodes(t *testing.T) {
},
} {
t.Run(c.file, func(t *testing.T) {
t.Parallel()
for _, lintFatal := range []bool{true, false} {
t.Run(strconv.FormatBool(lintFatal), func(t *testing.T) {
t.Parallel()
args := []string{"-test.main", "check", "config", "testdata/" + c.file}
if lintFatal {
args = append(args, "--lint-fatal")
@ -442,6 +465,7 @@ func TestDocumentation(t *testing.T) {
if runtime.GOOS == "windows" {
t.SkipNow()
}
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
@ -484,7 +508,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false))
require.Equal(t, successExitCode, exitCode, "")
})
@ -506,7 +530,7 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false))
require.Equal(t, failureExitCode, exitCode, "")
})
@ -528,24 +552,115 @@ func TestCheckRules(t *testing.T) {
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true))
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false))
require.Equal(t, lintErrExitCode, exitCode, "")
})
}
func TestCheckRulesWithRuleFiles(t *testing.T) {
t.Run("rules-good", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml")
t.Parallel()
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false), "./testdata/rules.yml")
require.Equal(t, successExitCode, exitCode, "")
})
t.Run("rules-bad", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml")
t.Parallel()
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, false, false), "./testdata/rules-bad.yml")
require.Equal(t, failureExitCode, exitCode, "")
})
t.Run("rules-lint-fatal", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml")
t.Parallel()
exitCode := CheckRules(newRulesLintConfig(lintOptionDuplicateRules, true, false), "./testdata/prometheus-rules.lint.yml")
require.Equal(t, lintErrExitCode, exitCode, "")
})
}
func TestCheckScrapeConfigs(t *testing.T) {
for _, tc := range []struct {
name string
lookbackDelta model.Duration
expectError bool
}{
{
name: "scrape interval less than lookback delta",
lookbackDelta: model.Duration(11 * time.Minute),
expectError: false,
},
{
name: "scrape interval greater than lookback delta",
lookbackDelta: model.Duration(5 * time.Minute),
expectError: true,
},
{
name: "scrape interval same as lookback delta",
lookbackDelta: model.Duration(10 * time.Minute),
expectError: true,
},
} {
t.Run(tc.name, func(t *testing.T) {
// Non-fatal linting.
code := CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, false, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Non-fatal linting should return success")
// Fatal linting.
code = CheckConfig(false, false, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
if tc.expectError {
require.Equal(t, lintErrExitCode, code, "Fatal linting should return error")
} else {
require.Equal(t, successExitCode, code, "Fatal linting should return success when there are no problems")
}
// Check syntax only, no linting.
code = CheckConfig(false, true, newConfigLintConfig(lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, "Fatal linting should return success when checking syntax only")
// Lint option "none" should disable linting.
code = CheckConfig(false, false, newConfigLintConfig(lintOptionNone+","+lintOptionTooLongScrapeInterval, true, false, tc.lookbackDelta), "./testdata/prometheus-config.lint.too_long_scrape_interval.yml")
require.Equal(t, successExitCode, code, `Fatal linting should return success when lint option "none" is specified`)
})
}
}
func TestTSDBDumpCommand(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
storage := promqltest.LoadedStorage(t, `
load 1m
metric{foo="bar"} 1 2 3
`)
t.Cleanup(func() { storage.Close() })
for _, c := range []struct {
name string
subCmd string
sandboxDirRoot string
}{
{
name: "dump",
subCmd: "dump",
},
{
name: "dump with sandbox dir root",
subCmd: "dump",
sandboxDirRoot: t.TempDir(),
},
{
name: "dump-openmetrics",
subCmd: "dump-openmetrics",
},
{
name: "dump-openmetrics with sandbox dir root",
subCmd: "dump-openmetrics",
sandboxDirRoot: t.TempDir(),
},
} {
t.Run(c.name, func(t *testing.T) {
t.Parallel()
args := []string{"-test.main", "tsdb", c.subCmd, storage.Dir()}
cmd := exec.Command(promtoolPath, args...)
require.NoError(t, cmd.Run())
})
}
}

View file

@ -31,7 +31,7 @@ import (
"github.com/prometheus/prometheus/util/fmtutil"
)
// Push metrics to a prometheus remote write (for testing purpose only).
// PushMetrics to a prometheus remote write (for testing purpose only).
func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int {
addressURL, err := url.Parse(url.String())
if err != nil {
@ -101,6 +101,7 @@ func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[strin
return successExitCode
}
// TODO(bwplotka): Add PRW 2.0 support.
func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]string) bool {
metricsData, err := fmtutil.MetricTextToWriteRequest(bytes.NewReader(data), labels)
if err != nil {
@ -116,7 +117,7 @@ func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]s
// Encode the request body into snappy encoding.
compressed := snappy.Encode(nil, raw)
err = client.Store(context.Background(), compressed, 0)
_, err = client.Store(context.Background(), compressed, 0)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return false

View file

@ -16,12 +16,12 @@ package main
import (
"context"
"fmt"
"log/slog"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
@ -38,7 +38,7 @@ type queryRangeAPI interface {
}
type ruleImporter struct {
logger log.Logger
logger *slog.Logger
config ruleImporterConfig
apiClient queryRangeAPI
@ -57,8 +57,8 @@ type ruleImporterConfig struct {
// newRuleImporter creates a new rule importer that can be used to parse and evaluate recording rule files and create new series
// written to disk in blocks.
func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter {
level.Info(logger).Log("backfiller", "new rule importer", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822))
func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter {
logger.Info("new rule importer", "component", "backfiller", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822))
return &ruleImporter{
logger: logger,
config: config,
@ -69,7 +69,7 @@ func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient que
// loadGroups parses groups from a list of recording rule files.
func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string) (errs []error) {
groups, errs := importer.ruleManager.LoadGroups(importer.config.evalInterval, labels.Labels{}, "", nil, filenames...)
groups, errs := importer.ruleManager.LoadGroups(importer.config.evalInterval, labels.Labels{}, "", nil, false, filenames...)
if errs != nil {
return errs
}
@ -80,10 +80,10 @@ func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string)
// importAll evaluates all the recording rules and creates new time series and writes them to disk in blocks.
func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) {
for name, group := range importer.groups {
level.Info(importer.logger).Log("backfiller", "processing group", "name", name)
importer.logger.Info("processing group", "component", "backfiller", "name", name)
for i, r := range group.Rules() {
level.Info(importer.logger).Log("backfiller", "processing rule", "id", i, "name", r.Name())
importer.logger.Info("processing rule", "component", "backfiller", "id", i, "name", r.Name())
if err := importer.importRule(ctx, r.Query().String(), r.Name(), r.Labels(), importer.config.start, importer.config.end, int64(importer.config.maxBlockDuration/time.Millisecond), group); err != nil {
errs = append(errs, err)
}
@ -124,7 +124,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
return fmt.Errorf("query range: %w", err)
}
if warnings != nil {
level.Warn(importer.logger).Log("msg", "Range query returned warnings.", "warnings", warnings)
importer.logger.Warn("Range query returned warnings.", "warnings", warnings)
}
// To prevent races with compaction, a block writer only allows appending samples
@ -133,7 +133,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
// also need to append samples throughout the whole block range. To allow that, we
// pretend that the block is twice as large here, but only really add sample in the
// original interval later.
w, err := tsdb.NewBlockWriter(log.NewNopLogger(), importer.config.outputDir, 2*blockDuration)
w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), importer.config.outputDir, 2*blockDuration)
if err != nil {
return fmt.Errorf("new block writer: %w", err)
}

View file

@ -21,9 +21,9 @@ import (
"testing"
"time"
"github.com/go-kit/log"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
@ -35,7 +35,7 @@ type mockQueryRangeAPI struct {
samples model.Matrix
}
func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, query string, r v1.Range, opts ...v1.Option) (model.Value, v1.Warnings, error) {
func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, _ string, _ v1.Range, _ ...v1.Option) (model.Value, v1.Warnings, error) {
return mockAPI.samples, v1.Warnings{}, nil
}
@ -43,6 +43,7 @@ const defaultBlockDuration = time.Duration(tsdb.DefaultBlockDuration) * time.Mil
// TestBackfillRuleIntegration is an integration test that runs all the rule importer code to confirm the parts work together.
func TestBackfillRuleIntegration(t *testing.T) {
t.Parallel()
const (
testMaxSampleCount = 50
testValue = 123
@ -72,6 +73,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
}
for _, tt := range testCases {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
tmpDir := t.TempDir()
ctx := context.Background()
@ -161,7 +163,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
}
func newTestRuleImporter(_ context.Context, start time.Time, tmpDir string, testSamples model.Matrix, maxBlockDuration time.Duration) (*ruleImporter, error) {
logger := log.NewNopLogger()
logger := promslog.NewNopLogger()
cfg := ruleImporterConfig{
outputDir: tmpDir,
start: start.Add(-10 * time.Hour),
@ -210,6 +212,7 @@ func createMultiRuleTestFiles(path string) error {
// TestBackfillLabels confirms that the labels in the rule file override the labels from the metrics
// received from Prometheus Query API, including the __name__ label.
func TestBackfillLabels(t *testing.T) {
t.Parallel()
tmpDir := t.TempDir()
ctx := context.Background()
@ -251,6 +254,7 @@ func TestBackfillLabels(t *testing.T) {
require.NoError(t, err)
t.Run("correct-labels", func(t *testing.T) {
t.Parallel()
selectedSeries := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
for selectedSeries.Next() {
series := selectedSeries.At()

View file

@ -20,9 +20,9 @@ import (
"os"
"time"
"github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@ -38,10 +38,10 @@ type sdCheckResult struct {
}
// CheckSD performs service discovery for the given job name and reports the results.
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int {
logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, _ prometheus.Registerer) int {
logger := promslog.New(&promslog.Config{})
cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
cfg, err := config.LoadFile(sdConfigFiles, false, logger)
if err != nil {
fmt.Fprintln(os.Stderr, "Cannot load config", err)
return failureExitCode
@ -114,7 +114,7 @@ outerLoop:
}
results := []sdCheckResult{}
for _, tgs := range sdCheckResults {
results = append(results, getSDCheckResult(tgs, scrapeConfig, noDefaultScrapePort)...)
results = append(results, getSDCheckResult(tgs, scrapeConfig)...)
}
res, err := json.MarshalIndent(results, "", " ")
@ -127,7 +127,7 @@ outerLoop:
return successExitCode
}
func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig, noDefaultScrapePort bool) []sdCheckResult {
func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult {
sdCheckResults := []sdCheckResult{}
lb := labels.NewBuilder(labels.EmptyLabels())
for _, targetGroup := range targetGroups {
@ -144,7 +144,9 @@ func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.Sc
}
}
res, orig, err := scrape.PopulateLabels(lb, scrapeConfig, noDefaultScrapePort)
scrape.PopulateDiscoveredLabels(lb, scrapeConfig, target, targetGroup.Labels)
orig := lb.Labels()
res, err := scrape.PopulateLabels(lb, scrapeConfig, target, targetGroup.Labels)
result := sdCheckResult{
DiscoveredLabels: orig,
Labels: res,

View file

@ -29,6 +29,7 @@ import (
)
func TestSDCheckResult(t *testing.T) {
t.Parallel()
targetGroups := []*targetgroup.Group{{
Targets: []model.LabelSet{
map[model.LabelName]model.LabelValue{"__address__": "localhost:8080", "foo": "bar"},
@ -70,5 +71,5 @@ func TestSDCheckResult(t *testing.T) {
},
}
testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig, true))
testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig))
}

View file

@ -6,7 +6,7 @@ scrape_configs:
alerting:
alertmanagers:
- scheme: http
api_version: v1
api_version: v2
file_sd_configs:
- files:
- nonexistent_file.yml

View file

@ -0,0 +1,3 @@
scrape_configs:
- job_name: too_long_scrape_interval_test
scrape_interval: 10m

View file

@ -0,0 +1,33 @@
# This is the rules file. It has an extra "ownership"
# field in the second group. promtool should ignore this field
# and not return an error with --ignore-unknown-fields.
groups:
- name: alerts
namespace: "foobar"
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- name: rules
ownership:
service: "test"
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
# A recording rule that doesn't depend on input series.
- record: fixed_data
expr: 1
# Subquery with default resolution test.
- record: suquery_interval_test
expr: count_over_time(up[5m:])

View file

@ -0,0 +1,21 @@
# Minimal test case to see that --ignore-unknown-fields
# is working as expected. It should not return an error
# when any extra fields are present in the rules file.
rule_files:
- rules_extrafields.yml
evaluation_interval: 1m
tests:
- name: extra ownership field test
input_series:
- series: test
values: 1
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test

View file

@ -69,13 +69,13 @@ tests:
eval_time: 2m
exp_samples:
- labels: "test_histogram_repeat"
histogram: "{{count:2 sum:3 buckets:[2]}}"
histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}"
- expr: test_histogram_increase
eval_time: 2m
exp_samples:
- labels: "test_histogram_increase"
histogram: "{{count:4 sum:5.6 buckets:[4]}}"
histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}"
# Ensure a value is stale as soon as it is marked as such.
- expr: test_stale
@ -89,11 +89,11 @@ tests:
# Ensure lookback delta is respected, when a value is missing.
- expr: timestamp(test_missing)
eval_time: 5m
eval_time: 4m59s
exp_samples:
- value: 0
- expr: timestamp(test_missing)
eval_time: 5m1s
eval_time: 5m
exp_samples: []
# Minimal test case to check edge case of a single sample.
@ -113,7 +113,7 @@ tests:
- expr: count_over_time(fixed_data[1h])
eval_time: 1h
exp_samples:
- value: 61
- value: 60
- expr: timestamp(fixed_data)
eval_time: 1h
exp_samples:
@ -183,7 +183,7 @@ tests:
- expr: job:test:count_over_time1m
eval_time: 1m
exp_samples:
- value: 61
- value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 1m10s
@ -194,7 +194,7 @@ tests:
- expr: job:test:count_over_time1m
eval_time: 2m
exp_samples:
- value: 61
- value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 2m59s999ms

View file

@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"runtime"
@ -32,9 +33,10 @@ import (
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"go.uber.org/atomic"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage"
@ -60,7 +62,7 @@ type writeBenchmark struct {
memprof *os.File
blockprof *os.File
mtxprof *os.File
logger log.Logger
logger *slog.Logger
}
func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error {
@ -68,7 +70,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
outPath: outPath,
samplesFile: samplesFile,
numMetrics: numMetrics,
logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)),
logger: promslog.New(&promslog.Config{}),
}
if b.outPath == "" {
dir, err := os.MkdirTemp("", "tsdb_bench")
@ -87,9 +89,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
dir := filepath.Join(b.outPath, "storage")
l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller)
st, err := tsdb.Open(dir, l, nil, &tsdb.Options{
st, err := tsdb.Open(dir, b.logger, nil, &tsdb.Options{
RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond),
MinBlockDuration: int64(2 * time.Hour / time.Millisecond),
}, tsdb.NewDBStats())
@ -315,12 +315,11 @@ func readPrometheusLabels(r io.Reader, n int) ([]labels.Labels, error) {
i := 0
for scanner.Scan() && i < n {
m := make([]labels.Label, 0, 10)
r := strings.NewReplacer("\"", "", "{", "", "}", "")
s := r.Replace(scanner.Text())
labelChunks := strings.Split(s, ",")
m := make([]labels.Label, 0, len(labelChunks))
for _, labelChunk := range labelChunks {
split := strings.Split(labelChunk, ":")
m = append(m, labels.Label{Name: split[0], Value: split[1]})
@ -367,25 +366,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) {
fmt.Fprintf(tw,
"%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n",
meta.ULID,
getFormatedTime(meta.MinTime, humanReadable),
getFormatedTime(meta.MaxTime, humanReadable),
getFormattedTime(meta.MinTime, humanReadable),
getFormattedTime(meta.MaxTime, humanReadable),
time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond,
meta.Stats.NumSamples,
meta.Stats.NumChunks,
meta.Stats.NumSeries,
getFormatedBytes(b.Size(), humanReadable),
getFormattedBytes(b.Size(), humanReadable),
)
}
}
func getFormatedTime(timestamp int64, humanReadable bool) string {
func getFormattedTime(timestamp int64, humanReadable bool) string {
if humanReadable {
return time.Unix(timestamp/1000, 0).UTC().String()
}
return strconv.FormatInt(timestamp, 10)
}
func getFormatedBytes(bytes int64, humanReadable bool) string {
func getFormattedBytes(bytes int64, humanReadable bool) string {
if humanReadable {
return units.Base2Bytes(bytes).String()
}
@ -405,7 +404,7 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error)
}
}
b, err := db.Block(blockID)
b, err := db.Block(blockID, tsdb.DefaultPostingsDecoderFactory)
if err != nil {
return nil, nil, err
}
@ -589,7 +588,10 @@ func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExten
if err != nil {
return err
}
postings = index.Intersect(postings, index.NewListPostings(refs))
// Only intersect postings if matchers are specified.
if len(matchers) > 0 {
postings = index.Intersect(postings, index.NewListPostings(refs))
}
count := 0
for postings.Next() {
count++
@ -662,7 +664,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
if !ok {
return fmt.Errorf("chunk is not FloatHistogramChunk")
return errors.New("chunk is not FloatHistogramChunk")
}
it := fhchk.Iterator(nil)
bucketCount := 0
@ -677,7 +679,7 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
hchk, ok := chk.(*chunkenc.HistogramChunk)
if !ok {
return fmt.Errorf("chunk is not HistogramChunk")
return errors.New("chunk is not HistogramChunk")
}
it := hchk.Iterator(nil)
bucketCount := 0
@ -733,7 +735,7 @@ func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt i
for _, mset := range matcherSets {
sets = append(sets, q.Select(ctx, true, nil, mset...))
}
ss = storage.NewMergeSeriesSet(sets, storage.ChainedSeriesMerge)
ss = storage.NewMergeSeriesSet(sets, 0, storage.ChainedSeriesMerge)
} else {
ss = q.Select(ctx, false, nil, matcherSets[0]...)
}
@ -823,18 +825,32 @@ func checkErr(err error) int {
return 0
}
func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
inputFile, err := fileutil.OpenMmapFile(path)
func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) int {
var buf []byte
info, err := os.Stat(path)
if err != nil {
return checkErr(err)
}
defer inputFile.Close()
if info.Mode()&(os.ModeNamedPipe|os.ModeCharDevice) != 0 {
// Read the pipe chunks by chunks as it cannot be mmap-ed
buf, err = os.ReadFile(path)
if err != nil {
return checkErr(err)
}
} else {
inputFile, err := fileutil.OpenMmapFile(path)
if err != nil {
return checkErr(err)
}
defer inputFile.Close()
buf = inputFile.Bytes()
}
if err := os.MkdirAll(outputDir, 0o777); err != nil {
return checkErr(fmt.Errorf("create output dir: %w", err))
}
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
return checkErr(backfill(5000, buf, outputDir, humanReadable, quiet, maxBlockDuration, customLabels))
}
func displayHistogram(dataType string, datas []int, total int) {
@ -866,16 +882,16 @@ func displayHistogram(dataType string, datas []int, total int) {
fmt.Println()
}
func generateBucket(min, max int) (start, end, step int) {
s := (max - min) / 10
func generateBucket(minVal, maxVal int) (start, end, step int) {
s := (maxVal - minVal) / 10
step = 10
for step < s && step <= 10000 {
step *= 10
}
start = min - min%step
end = max - max%step + step
start = minVal - minVal%step
end = maxVal - maxVal%step + step
return
}

View file

@ -0,0 +1,69 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !windows
package main
import (
"bytes"
"io"
"math"
"os"
"path"
"syscall"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/tsdb"
)
func TestTSDBDumpOpenMetricsRoundTripPipe(t *testing.T) {
initialMetrics, err := os.ReadFile("testdata/dump-openmetrics-roundtrip-test.prom")
require.NoError(t, err)
initialMetrics = normalizeNewLine(initialMetrics)
pipeDir := t.TempDir()
dbDir := t.TempDir()
// create pipe
pipe := path.Join(pipeDir, "pipe")
err = syscall.Mkfifo(pipe, 0o666)
require.NoError(t, err)
go func() {
// open pipe to write
in, err := os.OpenFile(pipe, os.O_WRONLY, os.ModeNamedPipe)
require.NoError(t, err)
defer func() { require.NoError(t, in.Close()) }()
_, err = io.Copy(in, bytes.NewReader(initialMetrics))
require.NoError(t, err)
}()
// Import samples from OM format
code := backfillOpenMetrics(pipe, dbDir, false, false, 2*time.Hour, map[string]string{})
require.Equal(t, 0, code)
db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil)
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, db.Close())
})
// Dump the blocks into OM format
dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
// Should get back the initial metrics.
require.Equal(t, string(initialMetrics), dumpedMetrics)
}

View file

@ -20,6 +20,7 @@ import (
"math"
"os"
"runtime"
"slices"
"strings"
"testing"
"time"
@ -31,6 +32,7 @@ import (
)
func TestGenerateBucket(t *testing.T) {
t.Parallel()
tcs := []struct {
min, max int
start, end, step int
@ -54,7 +56,7 @@ func TestGenerateBucket(t *testing.T) {
}
// getDumpedSamples dumps samples and returns them.
func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string {
func getDumpedSamples(t *testing.T, databasePath, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string {
t.Helper()
oldStdout := os.Stdout
@ -63,8 +65,8 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin
err := dumpSamples(
context.Background(),
path,
t.TempDir(),
databasePath,
sandboxDirRoot,
mint,
maxt,
match,
@ -95,13 +97,15 @@ func TestTSDBDump(t *testing.T) {
heavy_metric{foo="bar"} 5 4 3 2 1
heavy_metric{foo="foo"} 5 4 3 2 1
`)
t.Cleanup(func() { storage.Close() })
tests := []struct {
name string
mint int64
maxt int64
match []string
expectedDump string
name string
mint int64
maxt int64
sandboxDirRoot string
match []string
expectedDump string
}{
{
name: "default match",
@ -110,6 +114,14 @@ func TestTSDBDump(t *testing.T) {
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "default match with sandbox dir root set",
mint: math.MinInt64,
maxt: math.MaxInt64,
sandboxDirRoot: t.TempDir(),
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "same matcher twice",
mint: math.MinInt64,
@ -148,28 +160,51 @@ func TestTSDBDump(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.mint, tt.maxt, tt.match, formatSeriesSet)
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, tt.mint, tt.maxt, tt.match, formatSeriesSet)
expectedMetrics, err := os.ReadFile(tt.expectedDump)
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
// even though in case of one matcher samples are not sorted, the order in the cases above should stay the same.
require.Equal(t, string(expectedMetrics), dumpedMetrics)
// Sort both, because Prometheus does not guarantee the output order.
require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
})
}
}
func sortLines(buf string) string {
lines := strings.Split(buf, "\n")
slices.Sort(lines)
return strings.Join(lines, "\n")
}
func TestTSDBDumpOpenMetrics(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 1m
my_counter{foo="bar", baz="abc"} 1 2 3 4 5
my_gauge{bar="foo", abc="baz"} 9 8 0 4 7
`)
t.Cleanup(func() { storage.Close() })
expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom")
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
dumpedMetrics := getDumpedSamples(t, storage.Dir(), math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
require.Equal(t, string(expectedMetrics), dumpedMetrics)
tests := []struct {
name string
sandboxDirRoot string
}{
{
name: "default match",
},
{
name: "default match with sandbox dir root set",
sandboxDirRoot: t.TempDir(),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom")
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
})
}
}
func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
@ -179,7 +214,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
dbDir := t.TempDir()
// Import samples from OM format
err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour)
err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour, map[string]string{})
require.NoError(t, err)
db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil)
require.NoError(t, err)
@ -188,7 +223,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
})
// Dump the blocks into OM format
dumpedMetrics := getDumpedSamples(t, dbDir, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
// Should get back the initial metrics.
require.Equal(t, string(initialMetrics), dumpedMetrics)

View file

@ -18,6 +18,7 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sort"
@ -25,13 +26,14 @@ import (
"strings"
"time"
"github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/grafana/regexp"
"github.com/nsf/jsondiff"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
@ -39,12 +41,18 @@ import (
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/junitxml"
)
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, ignoreUnknownFields, files...)
}
func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug, ignoreUnknownFields bool, files ...string) int {
failed := false
junit := &junitxml.JUnitXML{}
var run *regexp.Regexp
if runStrings != nil {
@ -52,7 +60,7 @@ func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, dif
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag); errs != nil {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, ignoreUnknownFields, junit.Suite(f)); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -64,25 +72,30 @@ func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, dif
}
fmt.Println()
}
err := junit.WriteXML(results)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to write JUnit XML: %s\n", err)
}
if failed {
return failureExitCode
}
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error {
fmt.Println("Unit Testing: ", filename)
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug, ignoreUnknownFields bool, ts *junitxml.TestSuite) []error {
b, err := os.ReadFile(filename)
if err != nil {
ts.Abort(err)
return []error{err}
}
var unitTestInp unitTestFile
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
ts.Abort(err)
return []error{err}
}
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
ts.Abort(err)
return []error{err}
}
@ -91,29 +104,38 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg
}
evalInterval := time.Duration(unitTestInp.EvaluationInterval)
ts.Settime(time.Now().Format("2006-01-02T15:04:05"))
// Giving number for groups mentioned in the file for ordering.
// Lower number group should be evaluated before higher number group.
groupOrderMap := make(map[string]int)
for i, gn := range unitTestInp.GroupEvalOrder {
if _, ok := groupOrderMap[gn]; ok {
return []error{fmt.Errorf("group name repeated in evaluation order: %s", gn)}
err := fmt.Errorf("group name repeated in evaluation order: %s", gn)
ts.Abort(err)
return []error{err}
}
groupOrderMap[gn] = i
}
// Testing.
var errs []error
for _, t := range unitTestInp.Tests {
for i, t := range unitTestInp.Tests {
if !matchesRun(t.TestGroupName, run) {
continue
}
testname := t.TestGroupName
if testname == "" {
testname = fmt.Sprintf("unnamed#%d", i)
}
tc := ts.Case(testname)
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...)
ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, ignoreUnknownFields, unitTestInp.RuleFiles...)
if ers != nil {
for _, e := range ers {
tc.Fail(e.Error())
}
errs = append(errs, ers...)
}
}
@ -176,7 +198,14 @@ type testGroup struct {
}
// test performs the unit tests.
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) {
func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug, ignoreUnknownFields bool, ruleFiles ...string) (outErr []error) {
if debug {
testStart := time.Now()
fmt.Printf("DEBUG: Starting test %s\n", testname)
defer func() {
fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart))
}()
}
// Setup testing suite.
suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts)
if err != nil {
@ -195,11 +224,11 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()),
Appendable: suite.Storage(),
Context: context.Background(),
NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {},
Logger: log.NewNopLogger(),
NotifyFunc: func(_ context.Context, _ string, _ ...*rules.Alert) {},
Logger: promslog.NewNopLogger(),
}
m := rules.NewManager(opts)
groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...)
groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ignoreUnknownFields, ruleFiles...)
if ers != nil {
return ers
}
@ -460,6 +489,32 @@ Outer:
}
}
if debug {
ts := tg.maxEvalTime()
// Potentially a test can be specified at a time with fractional seconds,
// which PromQL cannot represent, so round up to the next whole second.
ts = (ts + time.Second).Truncate(time.Second)
expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts)
q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts))
if err != nil {
fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err)
return errs
}
res := q.Exec(suite.Context())
if res.Err != nil {
fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err)
return errs
}
switch v := res.Value.(type) {
case promql.Matrix:
fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts)
fmt.Println(v.String())
default:
fmt.Printf("DEBUG: Got unexpected type %T\n", v)
return errs
}
}
if len(errs) > 0 {
return errs
}

View file

@ -14,14 +14,19 @@
package main
import (
"bytes"
"encoding/xml"
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/util/junitxml"
)
func TestRulesUnitTest(t *testing.T) {
t.Parallel()
type args struct {
files []string
}
@ -125,25 +130,75 @@ func TestRulesUnitTest(t *testing.T) {
want: 0,
},
}
reuseFiles := []string{}
reuseCount := [2]int{}
for _, tt := range tests {
if (tt.queryOpts == promqltest.LazyLoaderOpts{
EnableNegativeOffset: true,
} || tt.queryOpts == promqltest.LazyLoaderOpts{
EnableAtModifier: true,
}) {
reuseFiles = append(reuseFiles, tt.args.files...)
reuseCount[tt.want] += len(tt.args.files)
}
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want {
t.Parallel()
if got := RulesUnitTest(tt.queryOpts, nil, false, false, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
}
t.Run("Junit xml output ", func(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, false, reuseFiles...); got != 1 {
t.Errorf("RulesUnitTestResults() = %v, want 1", got)
}
var test junitxml.JUnitXML
output := buf.Bytes()
err := xml.Unmarshal(output, &test)
if err != nil {
fmt.Println("error in decoding XML:", err)
return
}
var total int
var passes int
var failures int
var cases int
total = len(test.Suites)
if total != len(reuseFiles) {
t.Errorf("JUnit output had %d testsuite elements; expected %d\n", total, len(reuseFiles))
}
for _, i := range test.Suites {
if i.FailureCount == 0 {
passes++
} else {
failures++
}
cases += len(i.Cases)
}
if total != passes+failures {
t.Errorf("JUnit output mismatch: Total testsuites (%d) does not equal the sum of passes (%d) and failures (%d).", total, passes, failures)
}
if cases < total {
t.Errorf("JUnit output had %d suites without test cases\n", total-cases)
}
})
}
func TestRulesUnitTestRun(t *testing.T) {
t.Parallel()
type args struct {
run []string
files []string
}
tests := []struct {
name string
args args
queryOpts promqltest.LazyLoaderOpts
want int
name string
args args
queryOpts promqltest.LazyLoaderOpts
want int
ignoreUnknownFields bool
}{
{
name: "Test all without run arg",
@ -177,10 +232,19 @@ func TestRulesUnitTestRun(t *testing.T) {
},
want: 1,
},
{
name: "Test all with extra fields",
args: args{
files: []string{"./testdata/rules_run_extrafields.yml"},
},
ignoreUnknownFields: true,
want: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...)
t.Parallel()
got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.ignoreUnknownFields, tt.args.files...)
require.Equal(t, tt.want, got)
})
}

View file

@ -16,6 +16,8 @@ package config
import (
"errors"
"fmt"
"log/slog"
"mime"
"net/url"
"os"
"path/filepath"
@ -25,18 +27,17 @@ import (
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/sigv4"
"github.com/prometheus/sigv4"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/storage/remote/azuread"
"github.com/prometheus/prometheus/storage/remote/googleiam"
)
var (
@ -66,8 +67,13 @@ var (
}
)
const (
LegacyValidationConfig = "legacy"
UTF8ValidationConfig = "utf8"
)
// Load parses the YAML input s into a Config.
func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
func Load(s string, logger *slog.Logger) (*Config, error) {
cfg := &Config{}
// If the entire config body is empty the UnmarshalYAML method is
// never called. We thus have to set the DefaultConfig at the entry
@ -79,10 +85,6 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
return nil, err
}
if !expandExternalLabels {
return cfg, nil
}
b := labels.NewScratchBuilder(0)
cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) {
newV := os.Expand(v.Value, func(s string) string {
@ -92,26 +94,41 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
if v := os.Getenv(s); v != "" {
return v
}
level.Warn(logger).Log("msg", "Empty environment variable", "name", s)
logger.Warn("Empty environment variable", "name", s)
return ""
})
if newV != v.Value {
level.Debug(logger).Log("msg", "External label replaced", "label", v.Name, "input", v.Value, "output", newV)
logger.Debug("External label replaced", "label", v.Name, "input", v.Value, "output", newV)
}
// Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024
b.Add(v.Name, newV)
})
cfg.GlobalConfig.ExternalLabels = b.Labels()
if !b.Labels().IsEmpty() {
cfg.GlobalConfig.ExternalLabels = b.Labels()
}
switch cfg.OTLPConfig.TranslationStrategy {
case UnderscoreEscapingWithSuffixes:
case "":
case NoUTF8EscapingWithSuffixes:
if cfg.GlobalConfig.MetricNameValidationScheme == LegacyValidationConfig {
return nil, errors.New("OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled")
}
default:
return nil, fmt.Errorf("unsupported OTLP translation strategy %q", cfg.OTLPConfig.TranslationStrategy)
}
cfg.loaded = true
return cfg, nil
}
// LoadFile parses the given YAML file into a Config.
func LoadFile(filename string, agentMode, expandExternalLabels bool, logger log.Logger) (*Config, error) {
// LoadFile parses and validates the given YAML file into a read-only Config.
// Callers should never write to or shallow copy the returned Config.
func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) {
content, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
cfg, err := Load(string(content), expandExternalLabels, logger)
cfg, err := Load(string(content), logger)
if err != nil {
return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err)
}
@ -139,6 +156,7 @@ var (
// DefaultConfig is the default top-level configuration.
DefaultConfig = Config{
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
}
// DefaultGlobalConfig is the default global configuration.
@ -160,13 +178,13 @@ var (
// DefaultScrapeConfig is the default scrape configuration.
DefaultScrapeConfig = ScrapeConfig{
// ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals.
ScrapeClassicHistograms: false,
MetricsPath: "/metrics",
Scheme: "http",
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
EnableCompression: true,
AlwaysScrapeClassicHistograms: false,
MetricsPath: "/metrics",
Scheme: "http",
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
EnableCompression: true,
}
// DefaultAlertmanagerConfig is the default alertmanager configuration.
@ -177,13 +195,18 @@ var (
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{
FollowRedirects: true,
EnableHTTP2: false,
}
// DefaultRemoteWriteConfig is the default remote write configuration.
DefaultRemoteWriteConfig = RemoteWriteConfig{
RemoteTimeout: model.Duration(30 * time.Second),
ProtobufMessage: RemoteWriteProtoMsgV1,
QueueConfig: DefaultQueueConfig,
MetadataConfig: DefaultMetadataConfig,
HTTPClientConfig: config.DefaultHTTPClientConfig,
HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig,
}
// DefaultQueueConfig is the default remote queue configuration.
@ -215,6 +238,7 @@ var (
// DefaultRemoteReadConfig is the default remote read configuration.
DefaultRemoteReadConfig = RemoteReadConfig{
RemoteTimeout: model.Duration(1 * time.Minute),
ChunkedReadLimit: DefaultChunkedReadLimit,
HTTPClientConfig: config.DefaultHTTPClientConfig,
FilterExternalLabels: true,
}
@ -227,6 +251,11 @@ var (
DefaultExemplarsConfig = ExemplarsConfig{
MaxExemplars: 100000,
}
// DefaultOTLPConfig is the default OTLP configuration.
DefaultOTLPConfig = OTLPConfig{
TranslationStrategy: UnderscoreEscapingWithSuffixes,
}
)
// Config is the top-level configuration for Prometheus's config files.
@ -242,9 +271,13 @@ type Config struct {
RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"`
RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"`
OTLPConfig OTLPConfig `yaml:"otlp,omitempty"`
loaded bool // Certain methods require configuration to use Load validation.
}
// SetDirectory joins any relative file paths with dir.
// This method writes to config, and it's not concurrency safe.
func (c *Config) SetDirectory(dir string) {
c.GlobalConfig.SetDirectory(dir)
c.AlertingConfig.SetDirectory(dir)
@ -274,24 +307,26 @@ func (c Config) String() string {
return string(b)
}
// GetScrapeConfigs returns the scrape configurations.
// GetScrapeConfigs returns the read-only, validated scrape configurations including
// the ones from the scrape_config_files.
// This method does not write to config, and it's concurrency safe (the pointer receiver is for efficiency).
// This method also assumes the Config was created by Load or LoadFile function, it returns error
// if it was not. We can't re-validate or apply globals here due to races,
// read more https://github.com/prometheus/prometheus/issues/15538.
func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
scfgs := make([]*ScrapeConfig, len(c.ScrapeConfigs))
if !c.loaded {
// Programmatic error, we warn before more confusing errors would happen due to lack of the globalization.
return nil, errors.New("scrape config cannot be fetched, main config was not validated and loaded correctly; should not happen")
}
scfgs := make([]*ScrapeConfig, len(c.ScrapeConfigs))
jobNames := map[string]string{}
for i, scfg := range c.ScrapeConfigs {
// We do these checks for library users that would not call validate in
// Unmarshal.
if err := scfg.Validate(c.GlobalConfig); err != nil {
return nil, err
}
if _, ok := jobNames[scfg.JobName]; ok {
return nil, fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
}
jobNames[scfg.JobName] = "main config file"
scfgs[i] = scfg
}
// Re-read and validate the dynamic scrape config rules.
for _, pat := range c.ScrapeConfigFiles {
fs, err := filepath.Glob(pat)
if err != nil {
@ -327,6 +362,7 @@ func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
// NOTE: This method should not be used outside of this package. Use Load or LoadFile instead.
func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
*c = DefaultConfig
// We want to set c to the defaults and then overwrite it with the input.
@ -363,18 +399,18 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
}
// Do global overrides and validate unique names.
// Do global overrides and validation.
jobNames := map[string]struct{}{}
for _, scfg := range c.ScrapeConfigs {
if err := scfg.Validate(c.GlobalConfig); err != nil {
return err
}
if _, ok := jobNames[scfg.JobName]; ok {
return fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
}
jobNames[scfg.JobName] = struct{}{}
}
rwNames := map[string]struct{}{}
for _, rwcfg := range c.RemoteWriteConfigs {
if rwcfg == nil {
@ -418,6 +454,8 @@ type GlobalConfig struct {
RuleQueryOffset model.Duration `yaml:"rule_query_offset,omitempty"`
// File to which PromQL queries are logged.
QueryLogFile string `yaml:"query_log_file,omitempty"`
// File to which scrape failures are logged.
ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The labels to add to any timeseries that this Prometheus instance scrapes.
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
// An uncompressed response body larger than this many bytes will cause the
@ -441,6 +479,8 @@ type GlobalConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
}
// ScrapeProtocol represents supported protocol for scraping metrics.
@ -461,15 +501,30 @@ func (s ScrapeProtocol) Validate() error {
return nil
}
// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol.
func (s ScrapeProtocol) HeaderMediaType() string {
if _, ok := ScrapeProtocolsHeaders[s]; !ok {
return ""
}
mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s])
if err != nil {
return ""
}
return mediaType
}
var (
PrometheusProto ScrapeProtocol = "PrometheusProto"
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
PrometheusText0_0_4: "text/plain;version=0.0.4",
PrometheusText1_0_0: "text/plain;version=1.0.0",
OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1",
OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0",
}
@ -479,6 +534,7 @@ var (
DefaultScrapeProtocols = []ScrapeProtocol{
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText1_0_0,
PrometheusText0_0_4,
}
@ -490,6 +546,7 @@ var (
PrometheusProto,
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText1_0_0,
PrometheusText0_0_4,
}
)
@ -515,6 +572,7 @@ func validateAcceptScrapeProtocols(sps []ScrapeProtocol) error {
// SetDirectory joins any relative file paths with dir.
func (c *GlobalConfig) SetDirectory(dir string) {
c.QueryLogFile = config.JoinDir(dir, c.QueryLogFile)
c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -577,6 +635,7 @@ func (c *GlobalConfig) isZero() bool {
c.EvaluationInterval == 0 &&
c.RuleQueryOffset == 0 &&
c.QueryLogFile == "" &&
c.ScrapeFailureLogFile == "" &&
c.ScrapeProtocols == nil
}
@ -614,10 +673,19 @@ type ScrapeConfig struct {
// The protocols to negotiate during a scrape. It tells clients what
// protocol are accepted by Prometheus and with what preference (most wanted is first).
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
// OpenMetricsText1.0.0, PrometheusText0.0.4.
// OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4.
ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"`
// Whether to scrape a classic histogram that is also exposed as a native histogram.
ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"`
// The fallback protocol to use if the Content-Type provided by the target
// is not provided, blank, or not one of the expected values.
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
// OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4.
ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"`
// Whether to scrape a classic histogram, even if it is also exposed as a native histogram.
AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"`
// Whether to convert all scraped classic histograms into a native histogram with custom buckets.
ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"`
// File to which scrape failures are logged.
ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The HTTP resource path on which to fetch metrics from targets.
MetricsPath string `yaml:"metrics_path,omitempty"`
// The URL scheme with which to fetch metrics from targets.
@ -651,6 +719,8 @@ type ScrapeConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -668,6 +738,7 @@ type ScrapeConfig struct {
func (c *ScrapeConfig) SetDirectory(dir string) {
c.ServiceDiscoveryConfigs.SetDirectory(dir)
c.HTTPClientConfig.SetDirectory(dir)
c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -749,6 +820,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c.KeepDroppedTargets == 0 {
c.KeepDroppedTargets = globalConfig.KeepDroppedTargets
}
if c.ScrapeFailureLogFile == "" {
c.ScrapeFailureLogFile = globalConfig.ScrapeFailureLogFile
}
if c.ScrapeProtocols == nil {
c.ScrapeProtocols = globalConfig.ScrapeProtocols
@ -757,6 +831,25 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
}
if c.ScrapeFallbackProtocol != "" {
if err := c.ScrapeFallbackProtocol.Validate(); err != nil {
return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err)
}
}
switch globalConfig.MetricNameValidationScheme {
case LegacyValidationConfig:
case "", UTF8ValidationConfig:
if model.NameValidationScheme != model.UTF8Validation {
panic("utf8 name validation requested but model.NameValidationScheme is not set to UTF8")
}
default:
return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
}
if c.MetricNameValidationScheme == "" {
c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
}
return nil
}
@ -919,6 +1012,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig {
// AlertmanagerAPIVersion represents a version of the
// github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'.
// 'v1' is no longer supported.
type AlertmanagerAPIVersion string
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -948,7 +1042,7 @@ const (
)
var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{
AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2,
AlertmanagerAPIVersionV2,
}
// AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with.
@ -1000,7 +1094,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
if httpClientConfigAuthEnabled && c.SigV4Config != nil {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured")
return errors.New("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured")
}
// Check for users putting URLs in target groups.
@ -1085,8 +1179,9 @@ func (m RemoteWriteProtoMsgs) String() string {
}
var (
// RemoteWriteProtoMsgV1 represents the deprecated `prometheus.WriteRequest` protobuf
// message introduced in the https://prometheus.io/docs/specs/remote_write_spec/.
// RemoteWriteProtoMsgV1 represents the `prometheus.WriteRequest` protobuf
// message introduced in the https://prometheus.io/docs/specs/remote_write_spec/,
// which will eventually be deprecated.
//
// NOTE: This string is used for both HTTP header values and config value, so don't change
// this reference.
@ -1108,6 +1203,7 @@ type RemoteWriteConfig struct {
Name string `yaml:"name,omitempty"`
SendExemplars bool `yaml:"send_exemplars,omitempty"`
SendNativeHistograms bool `yaml:"send_native_histograms,omitempty"`
RoundRobinDNS bool `yaml:"round_robin_dns,omitempty"`
// ProtobufMessage specifies the protobuf message to use against the remote
// receiver as specified in https://prometheus.io/docs/specs/remote_write_spec_2_0/
ProtobufMessage RemoteWriteProtoMsg `yaml:"protobuf_message,omitempty"`
@ -1119,6 +1215,7 @@ type RemoteWriteConfig struct {
MetadataConfig MetadataConfig `yaml:"metadata_config,omitempty"`
SigV4Config *sigv4.SigV4Config `yaml:"sigv4,omitempty"`
AzureADConfig *azuread.AzureADConfig `yaml:"azuread,omitempty"`
GoogleIAMConfig *googleiam.Config `yaml:"google_iam,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
@ -1156,17 +1253,33 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return err
}
httpClientConfigAuthEnabled := c.HTTPClientConfig.BasicAuth != nil ||
c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
return validateAuthConfigs(c)
}
if httpClientConfigAuthEnabled && (c.SigV4Config != nil || c.AzureADConfig != nil) {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
// validateAuthConfigs validates that at most one of basic_auth, authorization, oauth2, sigv4, azuread or google_iam must be configured.
func validateAuthConfigs(c *RemoteWriteConfig) error {
var authConfigured []string
if c.HTTPClientConfig.BasicAuth != nil {
authConfigured = append(authConfigured, "basic_auth")
}
if c.SigV4Config != nil && c.AzureADConfig != nil {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
if c.HTTPClientConfig.Authorization != nil {
authConfigured = append(authConfigured, "authorization")
}
if c.HTTPClientConfig.OAuth2 != nil {
authConfigured = append(authConfigured, "oauth2")
}
if c.SigV4Config != nil {
authConfigured = append(authConfigured, "sigv4")
}
if c.AzureADConfig != nil {
authConfigured = append(authConfigured, "azuread")
}
if c.GoogleIAMConfig != nil {
authConfigured = append(authConfigured, "google_iam")
}
if len(authConfigured) > 1 {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, azuread or google_iam must be configured. Currently configured: %v", authConfigured)
}
return nil
}
@ -1185,7 +1298,7 @@ func validateHeadersForTracing(headers map[string]string) error {
func validateHeaders(headers map[string]string) error {
for header := range headers {
if strings.ToLower(header) == "authorization" {
return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter")
return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, azuread or google_iam parameter")
}
if _, ok := reservedHeaders[strings.ToLower(header)]; ok {
return fmt.Errorf("%s is a reserved header. It must not be changed", header)
@ -1233,13 +1346,20 @@ type MetadataConfig struct {
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
}
const (
// DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows.
// 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset.
DefaultChunkedReadLimit = 5e+7
)
// RemoteReadConfig is the configuration for reading from remote storage.
type RemoteReadConfig struct {
URL *config.URL `yaml:"url"`
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
ReadRecent bool `yaml:"read_recent,omitempty"`
Name string `yaml:"name,omitempty"`
URL *config.URL `yaml:"url"`
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
ChunkedReadLimit uint64 `yaml:"chunked_read_limit,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
ReadRecent bool `yaml:"read_recent,omitempty"`
Name string `yaml:"name,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -1304,3 +1424,50 @@ func getGoGCEnv() int {
}
return DefaultRuntimeConfig.GoGC
}
type translationStrategyOption string
var (
// NoUTF8EscapingWithSuffixes will accept metric/label names as they are.
// Unit and type suffixes may be added to metric names, according to certain rules.
NoUTF8EscapingWithSuffixes translationStrategyOption = "NoUTF8EscapingWithSuffixes"
// UnderscoreEscapingWithSuffixes is the default option for translating OTLP to Prometheus.
// This option will translate metric name characters that are not alphanumerics/underscores/colons to underscores,
// and label name characters that are not alphanumerics/underscores to underscores.
// Unit and type suffixes may be appended to metric names, according to certain rules.
UnderscoreEscapingWithSuffixes translationStrategyOption = "UnderscoreEscapingWithSuffixes"
)
// OTLPConfig is the configuration for writing to the OTLP endpoint.
type OTLPConfig struct {
PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"`
TranslationStrategy translationStrategyOption `yaml:"translation_strategy,omitempty"`
KeepIdentifyingResourceAttributes bool `yaml:"keep_identifying_resource_attributes,omitempty"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *OTLPConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
*c = DefaultOTLPConfig
type plain OTLPConfig
if err := unmarshal((*plain)(c)); err != nil {
return err
}
seen := map[string]struct{}{}
var err error
for i, attr := range c.PromoteResourceAttributes {
attr = strings.TrimSpace(attr)
if attr == "" {
err = errors.Join(err, errors.New("empty promoted OTel resource attribute"))
continue
}
if _, exists := seen[attr]; exists {
err = errors.Join(err, fmt.Errorf("duplicated promoted OTel resource attribute %q", attr))
continue
}
seen[attr] = struct{}{}
c.PromoteResourceAttributes[i] = attr
}
return err
}

View file

@ -18,6 +18,8 @@ package config
const ruleFilesConfigFile = "testdata/rules_abs_path.good.yml"
var ruleFilesExpectedConf = &Config{
loaded: true,
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{

View file

@ -16,6 +16,7 @@ package config
import (
"crypto/tls"
"encoding/json"
"fmt"
"net/url"
"os"
"path/filepath"
@ -23,10 +24,10 @@ import (
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -61,6 +62,11 @@ import (
"github.com/prometheus/prometheus/util/testutil"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
func mustParseURL(u string) *config.URL {
parsed, err := url.Parse(u)
if err != nil {
@ -77,14 +83,17 @@ const (
globLabelNameLengthLimit = 200
globLabelValueLengthLimit = 200
globalGoGC = 42
globScrapeFailureLogFile = "testdata/fail.log"
)
var expectedConf = &Config{
loaded: true,
GlobalConfig: GlobalConfig{
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EvaluationInterval: model.Duration(30 * time.Second),
QueryLogFile: "",
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EvaluationInterval: model.Duration(30 * time.Second),
QueryLogFile: "testdata/query.log",
ScrapeFailureLogFile: globScrapeFailureLogFile,
ExternalLabels: labels.FromStrings("foo", "bar", "monitor", "codelab"),
@ -134,7 +143,7 @@ var expectedConf = &Config{
},
},
FollowRedirects: true,
EnableHTTP2: true,
EnableHTTP2: false,
},
},
{
@ -150,18 +159,26 @@ var expectedConf = &Config{
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
},
FollowRedirects: true,
EnableHTTP2: true,
EnableHTTP2: false,
},
Headers: map[string]string{"name": "value"},
},
},
OTLPConfig: OTLPConfig{
PromoteResourceAttributes: []string{
"k8s.cluster.name", "k8s.job.name", "k8s.namespace.name",
},
TranslationStrategy: UnderscoreEscapingWithSuffixes,
},
RemoteReadConfigs: []*RemoteReadConfig{
{
URL: mustParseURL("http://remote1/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
ReadRecent: true,
Name: "default",
URL: mustParseURL("http://remote1/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
ChunkedReadLimit: DefaultChunkedReadLimit,
ReadRecent: true,
Name: "default",
HTTPClientConfig: config.HTTPClientConfig{
FollowRedirects: true,
EnableHTTP2: false,
@ -171,6 +188,7 @@ var expectedConf = &Config{
{
URL: mustParseURL("http://remote3/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
ChunkedReadLimit: DefaultChunkedReadLimit,
ReadRecent: false,
Name: "read_special",
RequiredMatchers: model.LabelSet{"job": "special"},
@ -190,18 +208,20 @@ var expectedConf = &Config{
{
JobName: "prometheus",
HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFallbackProtocol: PrometheusText0_0_4,
ScrapeFailureLogFile: "testdata/fail_prom.log",
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -216,6 +236,15 @@ var expectedConf = &Config{
TLSConfig: config.TLSConfig{
MinVersion: config.TLSVersion(tls.VersionTLS10),
},
HTTPHeaders: &config.Headers{
Headers: map[string]config.Header{
"foo": {
Values: []string{"foobar"},
Secrets: []config.Secret{"bar", "foo"},
Files: []string{filepath.FromSlash("testdata/valid_password_file")},
},
},
},
},
ServiceDiscoveryConfigs: discovery.Configs{
@ -305,6 +334,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: 210,
LabelValueLengthLimit: 210,
ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4},
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.HTTPClientConfig{
BasicAuth: &config.BasicAuth{
@ -402,6 +432,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -457,6 +488,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: "/metrics",
Scheme: "http",
@ -490,6 +522,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -529,6 +562,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -568,6 +602,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -597,6 +632,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -634,6 +670,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -668,6 +705,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -709,6 +747,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -740,6 +779,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -774,6 +814,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -801,6 +842,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -831,6 +873,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: "/federate",
Scheme: DefaultScrapeConfig.Scheme,
@ -861,6 +904,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -891,6 +935,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -918,6 +963,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -953,6 +999,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -987,6 +1034,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1018,6 +1066,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1048,6 +1097,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1082,6 +1132,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1119,6 +1170,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1175,6 +1227,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1202,6 +1255,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1240,6 +1294,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1284,6 +1339,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1319,6 +1375,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1348,6 +1405,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1380,6 +1438,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1444,46 +1503,147 @@ var expectedConf = &Config{
},
}
func TestYAMLNotLongerSupportedAMApi(t *testing.T) {
_, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger())
require.Error(t, err)
}
func TestYAMLRoundtrip(t *testing.T) {
want, err := LoadFile("testdata/roundtrip.good.yml", false, false, log.NewNopLogger())
want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
got, err := Load(string(out), promslog.NewNopLogger())
require.NoError(t, err)
require.Equal(t, want, got)
}
func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, log.NewNopLogger())
want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
got, err := Load(string(out), promslog.NewNopLogger())
require.NoError(t, err)
require.True(t, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
require.False(t, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
}
func TestOTLPSanitizeResourceAttributes(t *testing.T) {
t.Run("good config", func(t *testing.T) {
want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
var got Config
require.NoError(t, yaml.UnmarshalStrict(out, &got))
require.Equal(t, []string{"k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"}, got.OTLPConfig.PromoteResourceAttributes)
})
t.Run("bad config", func(t *testing.T) {
_, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger())
require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`)
require.ErrorContains(t, err, `empty promoted OTel resource attribute`)
})
}
func TestOTLPAllowServiceNameInTargetInfo(t *testing.T) {
t.Run("good config", func(t *testing.T) {
want, err := LoadFile(filepath.Join("testdata", "otlp_allow_keep_identifying_resource_attributes.good.yml"), false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
var got Config
require.NoError(t, yaml.UnmarshalStrict(out, &got))
require.True(t, got.OTLPConfig.KeepIdentifyingResourceAttributes)
})
}
func TestOTLPAllowUTF8(t *testing.T) {
t.Run("good config", func(t *testing.T) {
fpath := filepath.Join("testdata", "otlp_allow_utf8.good.yml")
verify := func(t *testing.T, conf *Config, err error) {
t.Helper()
require.NoError(t, err)
require.Equal(t, NoUTF8EscapingWithSuffixes, conf.OTLPConfig.TranslationStrategy)
}
t.Run("LoadFile", func(t *testing.T) {
conf, err := LoadFile(fpath, false, promslog.NewNopLogger())
verify(t, conf, err)
})
t.Run("Load", func(t *testing.T) {
content, err := os.ReadFile(fpath)
require.NoError(t, err)
conf, err := Load(string(content), promslog.NewNopLogger())
verify(t, conf, err)
})
})
t.Run("incompatible config", func(t *testing.T) {
fpath := filepath.Join("testdata", "otlp_allow_utf8.incompatible.yml")
verify := func(t *testing.T, err error) {
t.Helper()
require.ErrorContains(t, err, `OTLP translation strategy NoUTF8EscapingWithSuffixes is not allowed when UTF8 is disabled`)
}
t.Run("LoadFile", func(t *testing.T) {
_, err := LoadFile(fpath, false, promslog.NewNopLogger())
verify(t, err)
})
t.Run("Load", func(t *testing.T) {
content, err := os.ReadFile(fpath)
require.NoError(t, err)
_, err = Load(string(content), promslog.NewNopLogger())
t.Log("err", err)
verify(t, err)
})
})
t.Run("bad config", func(t *testing.T) {
fpath := filepath.Join("testdata", "otlp_allow_utf8.bad.yml")
verify := func(t *testing.T, err error) {
t.Helper()
require.ErrorContains(t, err, `unsupported OTLP translation strategy "Invalid"`)
}
t.Run("LoadFile", func(t *testing.T) {
_, err := LoadFile(fpath, false, promslog.NewNopLogger())
verify(t, err)
})
t.Run("Load", func(t *testing.T) {
content, err := os.ReadFile(fpath)
require.NoError(t, err)
_, err = Load(string(content), promslog.NewNopLogger())
verify(t, err)
})
})
}
func TestLoadConfig(t *testing.T) {
// Parse a valid file that sets a global scrape timeout. This tests whether parsing
// an overwritten default field in the global config permanently changes the default.
_, err := LoadFile("testdata/global_timeout.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger())
c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
require.Equal(t, expectedConf, c)
}
func TestScrapeIntervalLarger(t *testing.T) {
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger())
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.ScrapeConfigs, 1)
for _, sc := range c.ScrapeConfigs {
@ -1493,7 +1653,7 @@ func TestScrapeIntervalLarger(t *testing.T) {
// YAML marshaling must not reveal authentication credentials.
func TestElideSecrets(t *testing.T) {
c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger())
c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
secretRe := regexp.MustCompile(`\\u003csecret\\u003e|<secret>`)
@ -1503,38 +1663,38 @@ func TestElideSecrets(t *testing.T) {
yamlConfig := string(config)
matches := secretRe.FindAllStringIndex(yamlConfig, -1)
require.Len(t, matches, 22, "wrong number of secret matches found")
require.Len(t, matches, 24, "wrong number of secret matches found")
require.NotContains(t, yamlConfig, "mysecret",
"yaml marshal reveals authentication credentials.")
}
func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) {
// Parse a valid file that sets a rule files with an absolute path
c, err := LoadFile(ruleFilesConfigFile, false, false, log.NewNopLogger())
c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger())
require.NoError(t, err)
require.Equal(t, ruleFilesExpectedConf, c)
}
func TestKubernetesEmptyAPIServer(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
func TestKubernetesWithKubeConfig(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
func TestKubernetesSelectors(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
@ -1800,7 +1960,7 @@ var expectedErrors = []struct {
},
{
filename: "remote_write_authorization_header.bad.yml",
errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter`,
errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, azuread or google_iam parameter`,
},
{
filename: "remote_write_wrong_msg.bad.yml",
@ -2004,24 +2164,39 @@ var expectedErrors = []struct {
},
{
filename: "scrape_config_files_scrape_protocols.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`,
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`,
},
{
filename: "scrape_config_files_scrape_protocols2.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`,
},
{
filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`,
},
{
filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml",
errMsg: `unmarshal errors`,
},
}
func TestBadConfigs(t *testing.T) {
model.NameValidationScheme = model.LegacyValidation
defer func() {
model.NameValidationScheme = model.UTF8Validation
}()
for _, ee := range expectedErrors {
_, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger())
require.Error(t, err, "%s", ee.filename)
require.Contains(t, err.Error(), ee.errMsg,
_, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger())
require.ErrorContains(t, err, ee.errMsg,
"Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err)
}
}
func TestBadStaticConfigsJSON(t *testing.T) {
model.NameValidationScheme = model.LegacyValidation
defer func() {
model.NameValidationScheme = model.UTF8Validation
}()
content, err := os.ReadFile("testdata/static_config.bad.json")
require.NoError(t, err)
var tg targetgroup.Group
@ -2030,6 +2205,10 @@ func TestBadStaticConfigsJSON(t *testing.T) {
}
func TestBadStaticConfigsYML(t *testing.T) {
model.NameValidationScheme = model.LegacyValidation
defer func() {
model.NameValidationScheme = model.UTF8Validation
}()
content, err := os.ReadFile("testdata/static_config.bad.yml")
require.NoError(t, err)
var tg targetgroup.Group
@ -2038,48 +2217,46 @@ func TestBadStaticConfigsYML(t *testing.T) {
}
func TestEmptyConfig(t *testing.T) {
c, err := Load("", false, log.NewNopLogger())
c, err := Load("", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
exp.loaded = true
require.Equal(t, exp, *c)
require.Equal(t, 75, c.Runtime.GoGC)
}
func TestExpandExternalLabels(t *testing.T) {
// Cleanup ant TEST env variable that could exist on the system.
os.Setenv("TEST", "")
c, err := LoadFile("testdata/external_labels.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels)
c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels)
os.Setenv("TEST", "TestValue")
c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels)
}
func TestAgentMode(t *testing.T) {
_, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, log.NewNopLogger())
_, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field alerting is not allowed in agent mode")
_, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, log.NewNopLogger())
_, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field alerting is not allowed in agent mode")
_, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, log.NewNopLogger())
_, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field rule_files is not allowed in agent mode")
_, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, log.NewNopLogger())
_, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field remote_read is not allowed in agent mode")
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger())
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger())
require.NoError(t, err)
require.Empty(t, c.RemoteWriteConfigs)
c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger())
c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.RemoteWriteConfigs, 1)
require.Equal(
@ -2090,10 +2267,10 @@ func TestAgentMode(t *testing.T) {
}
func TestEmptyGlobalBlock(t *testing.T) {
c, err := Load("global:\n", false, log.NewNopLogger())
c, err := Load("global:\n", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
exp.Runtime = DefaultRuntimeConfig
exp.loaded = true
require.Equal(t, exp, *c)
}
@ -2245,7 +2422,7 @@ func TestGetScrapeConfigs(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger())
c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger())
require.NoError(t, err)
scfgs, err := c.GetScrapeConfigs()
@ -2263,7 +2440,7 @@ func kubernetesSDHostURL() config.URL {
}
func TestScrapeConfigDisableCompression(t *testing.T) {
want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger())
want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@ -2274,3 +2451,118 @@ func TestScrapeConfigDisableCompression(t *testing.T) {
require.False(t, got.ScrapeConfigs[0].EnableCompression)
}
func TestScrapeConfigNameValidationSettings(t *testing.T) {
model.NameValidationScheme = model.UTF8Validation
defer func() {
model.NameValidationScheme = model.LegacyValidation
}()
tests := []struct {
name string
inputFile string
expectScheme string
}{
{
name: "blank config implies default",
inputFile: "scrape_config_default_validation_mode",
expectScheme: "",
},
{
name: "global setting implies local settings",
inputFile: "scrape_config_global_validation_mode",
expectScheme: "legacy",
},
{
name: "local setting",
inputFile: "scrape_config_local_validation_mode",
expectScheme: "legacy",
},
{
name: "local setting overrides global setting",
inputFile: "scrape_config_local_global_validation_mode",
expectScheme: "utf8",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
require.Equal(t, tc.expectScheme, got.ScrapeConfigs[0].MetricNameValidationScheme)
})
}
}
func TestScrapeProtocolHeader(t *testing.T) {
tests := []struct {
name string
proto ScrapeProtocol
expectedValue string
}{
{
name: "blank",
proto: ScrapeProtocol(""),
expectedValue: "",
},
{
name: "invalid",
proto: ScrapeProtocol("invalid"),
expectedValue: "",
},
{
name: "prometheus protobuf",
proto: PrometheusProto,
expectedValue: "application/vnd.google.protobuf",
},
{
name: "prometheus text 0.0.4",
proto: PrometheusText0_0_4,
expectedValue: "text/plain",
},
{
name: "prometheus text 1.0.0",
proto: PrometheusText1_0_0,
expectedValue: "text/plain",
},
{
name: "openmetrics 0.0.1",
proto: OpenMetricsText0_0_1,
expectedValue: "application/openmetrics-text",
},
{
name: "openmetrics 1.0.0",
proto: OpenMetricsText1_0_0,
expectedValue: "application/openmetrics-text",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
mediaType := tc.proto.HeaderMediaType()
require.Equal(t, tc.expectedValue, mediaType)
})
}
}
// Regression test against https://github.com/prometheus/prometheus/issues/15538
func TestGetScrapeConfigs_Loaded(t *testing.T) {
t.Run("without load", func(t *testing.T) {
c := &Config{}
_, err := c.GetScrapeConfigs()
require.EqualError(t, err, "scrape config cannot be fetched, main config was not validated and loaded correctly; should not happen")
})
t.Run("with load", func(t *testing.T) {
c, err := Load("", promslog.NewNopLogger())
require.NoError(t, err)
_, err = c.GetScrapeConfigs()
require.NoError(t, err)
})
}

View file

@ -16,6 +16,8 @@ package config
const ruleFilesConfigFile = "testdata/rules_abs_path_windows.good.yml"
var ruleFilesExpectedConf = &Config{
loaded: true,
GlobalConfig: DefaultGlobalConfig,
Runtime: DefaultRuntimeConfig,
RuleFiles: []string{

92
config/reload.go Normal file
View file

@ -0,0 +1,92 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"gopkg.in/yaml.v2"
)
type ExternalFilesConfig struct {
RuleFiles []string `yaml:"rule_files"`
ScrapeConfigFiles []string `yaml:"scrape_config_files"`
}
// GenerateChecksum generates a checksum of the YAML file and the files it references.
func GenerateChecksum(yamlFilePath string) (string, error) {
hash := sha256.New()
yamlContent, err := os.ReadFile(yamlFilePath)
if err != nil {
return "", fmt.Errorf("error reading YAML file: %w", err)
}
_, err = hash.Write(yamlContent)
if err != nil {
return "", fmt.Errorf("error writing YAML file to hash: %w", err)
}
var config ExternalFilesConfig
if err := yaml.Unmarshal(yamlContent, &config); err != nil {
return "", fmt.Errorf("error unmarshalling YAML: %w", err)
}
dir := filepath.Dir(yamlFilePath)
for i, file := range config.RuleFiles {
config.RuleFiles[i] = filepath.Join(dir, file)
}
for i, file := range config.ScrapeConfigFiles {
config.ScrapeConfigFiles[i] = filepath.Join(dir, file)
}
files := map[string][]string{
"r": config.RuleFiles, // "r" for rule files
"s": config.ScrapeConfigFiles, // "s" for scrape config files
}
for _, prefix := range []string{"r", "s"} {
for _, pattern := range files[prefix] {
matchingFiles, err := filepath.Glob(pattern)
if err != nil {
return "", fmt.Errorf("error finding files with pattern %q: %w", pattern, err)
}
for _, file := range matchingFiles {
// Write prefix to the hash ("r" or "s") followed by \0, then
// the file path.
_, err = hash.Write([]byte(prefix + "\x00" + file + "\x00"))
if err != nil {
return "", fmt.Errorf("error writing %q path to hash: %w", file, err)
}
// Read and hash the content of the file.
content, err := os.ReadFile(file)
if err != nil {
return "", fmt.Errorf("error reading file %s: %w", file, err)
}
_, err = hash.Write(append(content, []byte("\x00")...))
if err != nil {
return "", fmt.Errorf("error writing %q content to hash: %w", file, err)
}
}
}
}
return hex.EncodeToString(hash.Sum(nil)), nil
}

222
config/reload_test.go Normal file
View file

@ -0,0 +1,222 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestGenerateChecksum(t *testing.T) {
tmpDir := t.TempDir()
// Define paths for the temporary files.
yamlFilePath := filepath.Join(tmpDir, "test.yml")
ruleFilePath := filepath.Join(tmpDir, "rule_file.yml")
scrapeConfigFilePath := filepath.Join(tmpDir, "scrape_config.yml")
// Define initial and modified content for the files.
originalRuleContent := "groups:\n- name: example\n rules:\n - alert: ExampleAlert"
modifiedRuleContent := "groups:\n- name: example\n rules:\n - alert: ModifiedAlert"
originalScrapeConfigContent := "scrape_configs:\n- job_name: example"
modifiedScrapeConfigContent := "scrape_configs:\n- job_name: modified_example"
// Define YAML content referencing the rule and scrape config files.
yamlContent := `
rule_files:
- rule_file.yml
scrape_config_files:
- scrape_config.yml
`
// Write initial content to files.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Generate the original checksum.
originalChecksum := calculateChecksum(t, yamlFilePath)
t.Run("Rule File Change", func(t *testing.T) {
// Modify the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(modifiedRuleContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Scrape Config Change", func(t *testing.T) {
// Modify the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(modifiedScrapeConfigContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Rule File Deletion", func(t *testing.T) {
// Delete the rule file.
require.NoError(t, os.Remove(ruleFilePath))
// Checksum should change.
deletedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, deletedChecksum)
// Restore the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Scrape Config Deletion", func(t *testing.T) {
// Delete the scrape config file.
require.NoError(t, os.Remove(scrapeConfigFilePath))
// Checksum should change.
deletedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, deletedChecksum)
// Restore the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Main File Change", func(t *testing.T) {
// Modify the main YAML file.
modifiedYamlContent := `
global:
scrape_interval: 3s
rule_files:
- rule_file.yml
scrape_config_files:
- scrape_config.yml
`
require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the main YAML file.
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Rule File Removed from YAML Config", func(t *testing.T) {
// Modify the YAML content to remove the rule file.
modifiedYamlContent := `
scrape_config_files:
- scrape_config.yml
`
require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the YAML content.
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Scrape Config Removed from YAML Config", func(t *testing.T) {
// Modify the YAML content to remove the scrape config file.
modifiedYamlContent := `
rule_files:
- rule_file.yml
`
require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the YAML content.
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Empty Rule File", func(t *testing.T) {
// Write an empty rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(""), 0o644))
// Checksum should change.
emptyChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, emptyChecksum)
// Restore the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Empty Scrape Config File", func(t *testing.T) {
// Write an empty scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(""), 0o644))
// Checksum should change.
emptyChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, emptyChecksum)
// Restore the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
}
// calculateChecksum generates a checksum for the given YAML file path.
func calculateChecksum(t *testing.T, yamlFilePath string) string {
checksum, err := GenerateChecksum(yamlFilePath)
require.NoError(t, err)
require.NotEmpty(t, checksum)
return checksum
}

View file

@ -8,6 +8,8 @@ global:
label_limit: 30
label_name_length_limit: 200
label_value_length_limit: 200
query_log_file: query.log
scrape_failure_log_file: fail.log
# scrape_timeout is set to the global default (10s).
external_labels:
@ -45,6 +47,9 @@ remote_write:
headers:
name: value
otlp:
promote_resource_attributes: ["k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"]
remote_read:
- url: http://remote1/read
read_recent: true
@ -69,6 +74,9 @@ scrape_configs:
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
fallback_scrape_protocol: PrometheusText0.0.4
scrape_failure_log_file: fail_prom.log
file_sd_configs:
- files:
- foo/*.slow.json
@ -84,6 +92,12 @@ scrape_configs:
my: label
your: label
http_headers:
foo:
values: ["foobar"]
secrets: ["bar", "foo"]
files: ["valid_password_file"]
relabel_configs:
- source_labels: [job, __meta_dns_name]
regex: (.*)some-[regex]

View file

@ -0,0 +1,7 @@
alerting:
alertmanagers:
- scheme: http
api_version: v1
file_sd_configs:
- files:
- nonexistent_file.yml

View file

@ -1,4 +1,6 @@
# Two scrape configs with the same job names are not allowed.
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
- job_name: service-x

View file

@ -1,3 +1,5 @@
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
relabel_configs:

View file

@ -0,0 +1,2 @@
otlp:
keep_identifying_resource_attributes: true

View file

@ -0,0 +1,4 @@
global:
metric_name_validation_scheme: legacy
otlp:
translation_strategy: Invalid

View file

@ -0,0 +1,2 @@
otlp:
translation_strategy: NoUTF8EscapingWithSuffixes

View file

@ -0,0 +1,4 @@
global:
metric_name_validation_scheme: legacy
otlp:
translation_strategy: NoUTF8EscapingWithSuffixes

View file

@ -0,0 +1,2 @@
otlp:
promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name", "k8s.job.name", ""]

View file

@ -0,0 +1,2 @@
otlp:
promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name"]

View file

@ -0,0 +1,2 @@
scrape_configs:
- job_name: prometheus

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: node
fallback_scrape_protocol: "prometheusproto"
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: node
fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"]
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,4 @@
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus

View file

@ -0,0 +1,5 @@
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
metric_name_validation_scheme: utf8

View file

@ -0,0 +1,3 @@
scrape_configs:
- job_name: prometheus
metric_name_validation_scheme: legacy

View file

@ -1,82 +0,0 @@
{{/* vim: set ft=html: */}}
{{/* Navbar, should be passed . */}}
{{ define "navbar" }}
<nav class="navbar fixed-top navbar-expand-sm navbar-dark bg-dark">
<div class="container-fluid">
<!-- Brand and toggle get grouped for better mobile display -->
<div class="navbar-header">
<button type="button" class="navbar-toggler" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false" aria-controls="navbar-nav" aria-label="toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<a class="navbar-brand" href="{{ pathPrefix }}/">Prometheus</a>
</div>
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav">
<li class="nav-item"><a class="nav-link" href="{{ pathPrefix }}/alerts">Alerts</a></li>
<li class="nav-item"><a class="nav-link" href="https://www.pagerduty.com/">PagerDuty</a></li>
</ul>
</div>
</div>
</nav>
{{ end }}
{{/* LHS menu, should be passed . */}}
{{ define "menu" }}
<div class="prom_lhs_menu row">
<nav class="col-md-2 md-block bg-dark sidebar prom_lhs_menu_nav">
<div class="sidebar-sticky">
<ul class="nav flex-column">
{{ template "_menuItem" (args . "index.html.example" "Overview") }}
{{ if query "up{job='node'}" }}
{{ template "_menuItem" (args . "node.html" "Node") }}
{{ if match "^node" .Path }}
{{ if .Params.instance }}
<ul>
<li {{ if eq .Path "node-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="node-overview.html?instance={{ .Params.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</a>
</li>
<ul>
<li {{ if eq .Path "node-cpu.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="node-cpu.html?instance={{ .Params.instance }}">CPU</a>
</li>
<li {{ if eq .Path "node-disk.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="node-disk.html?instance={{ .Params.instance }}">Disk</a>
</li>
</ul>
</ul>
{{ end }}
{{ end }}
{{ end }}
{{ if query "up{job='prometheus'}" }}
{{ template "_menuItem" (args . "prometheus.html" "Prometheus") }}
{{ if match "^prometheus" .Path }}
{{ if .Params.instance }}
<ul>
<li {{ if eq .Path "prometheus-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="prometheus-overview.html?instance={{ .Params.instance }}">{{.Params.instance }}</a>
</li>
</ul>
{{ end }}
{{ end }}
{{ end }}
</ul>
</div>
</nav>
</div>
{{ end }}
{{/* Helper, pass (args . path name) */}}
{{ define "_menuItem" }}
<li {{ if eq .arg0.Path .arg1 }} class="prom_lhs_menu_selected nav-item" {{ end }}><a class="nav-link" href="{{ .arg1 }}">{{ .arg2 }}</a></li>
{{ end }}

View file

@ -1,138 +0,0 @@
{{/* vim: set ft=html: */}}
{{/* Load Prometheus console library JS/CSS. Should go in <head> */}}
{{ define "prom_console_head" }}
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/vendor/rickshaw/rickshaw.min.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/vendor/bootstrap-4.5.2/css/bootstrap.min.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/css/prom_console.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/vendor/bootstrap4-glyphicons/css/bootstrap-glyphicons.min.css">
<script src="{{ pathPrefix }}/classic/static/vendor/rickshaw/vendor/d3.v3.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/rickshaw/vendor/d3.layout.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/rickshaw/rickshaw.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/js/jquery-3.5.1.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/js/popper.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/bootstrap-4.5.2/js/bootstrap.min.js"></script>
<script>
var PATH_PREFIX = "{{ pathPrefix }}";
</script>
<script src="{{ pathPrefix }}/classic/static/js/prom_console.js"></script>
{{ end }}
{{/* Top of all pages. */}}
{{ define "head" -}}
<!doctype html>
<html lang="en">
<head>
{{ template "prom_console_head" }}
</head>
<body>
{{ template "navbar" . }}
{{ template "menu" . }}
{{ end }}
{{ define "__prom_query_drilldown_noop" }}{{ . }}{{ end }}
{{ define "humanize" }}{{ humanize . }}{{ end }}
{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }}
{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }}
{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }}
{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }}
{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }}
{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }}
{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }}
{{/* prom_query_drilldown (args expr suffix? renderTemplate?)
Displays the result of the expression, with a link to /graph for it.
renderTemplate is the name of the template to use to render the value.
*/}}
{{ define "prom_query_drilldown" }}
{{ $expr := .arg0 }}{{ $suffix := (or .arg1 "") }}{{ $renderTemplate := (or .arg2 "__prom_query_drilldown_noop") }}
<a class="prom_query_drilldown" href="{{ pathPrefix }}{{ graphLink $expr }}">{{ with query $expr }}{{tmpl $renderTemplate ( . | first | value )}}{{ $suffix }}{{ else }}-{{ end }}</a>
{{ end }}
{{ define "prom_path" }}/consoles/{{ .Path }}?{{ range $param, $value := .Params }}{{ $param }}={{ $value }}&amp;{{ end }}{{ end }}"
{{ define "prom_right_table_head" }}
<div class="prom_console_rhs">
<table class="table table-bordered table-hover table-sm">
{{ end }}
{{ define "prom_right_table_tail" }}
</table>
</div>
{{ end }}
{{/* RHS table head, pass job name. Should be used after prom_right_table_head. */}}
{{ define "prom_right_table_job_head" }}
<tr>
<th>{{ . }}</th>
<th>{{ template "prom_query_drilldown" (args (printf "sum(up{job='%s'})" .)) }} / {{ template "prom_query_drilldown" (args (printf "count(up{job='%s'})" .)) }}</th>
</tr>
<tr>
<td>CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "avg by(job)(irate(process_cpu_seconds_total{job='%s'}[5m]))" .) "s/s" "humanizeNoSmallPrefix") }}</td>
</tr>
<tr>
<td>Memory</td>
<td>{{ template "prom_query_drilldown" (args (printf "avg by(job)(process_resident_memory_bytes{job='%s'})" .) "B" "humanize1024") }}</td>
</tr>
{{ end }}
{{ define "prom_content_head" }}
<div class="prom_console_content">
<div class="container-fluid">
{{ template "prom_graph_timecontrol" . }}
{{ end }}
{{ define "prom_content_tail" }}
</div>
</div>
{{ end }}
{{ define "prom_graph_timecontrol" }}
<div class="prom_graph_timecontrol">
<div class="prom_graph_timecontrol_inner">
<div class="prom_graph_timecontrol_group ">
<button class="btn btn-light pull-left" type="button" id="prom_graph_duration_shrink" title="Shrink the time range.">
<i class="glyphicon glyphicon-minus"></i>
</button><!-- Comments between elements to remove spaces
--><input class="input pull-left align-middle" size="3" title="Time range of graph" type="text" id="prom_graph_duration"><!--
--><button class="btn btn-light pull-left" type="button" id="prom_graph_duration_grow" title="Grow the time range.">
<i class="glyphicon glyphicon-plus"></i>
</button>
</div>
<div class="prom_graph_timecontrol_group ">
<button class="btn btn-light pull-left" type="button" id="prom_graph_time_back" title="Rewind the end time.">
<i class="glyphicon glyphicon-backward"></i>
</button><!--
--><input class="input pull-left align-middle" title="End time of graph" placeholder="Until" type="text" id="prom_graph_time_end" size="16" value=""><!--
--><button class="btn btn-light pull-left" type="button" id="prom_graph_time_forward" title="Advance the end time.">
<i class="glyphicon glyphicon-forward"></i>
</button>
</div>
<div class="prom_graph_timecontrol_group ">
<div class="btn-group dropup prom_graph_timecontrol_refresh pull-left">
<button type="button" class="btn btn-light pull-left" id="prom_graph_refresh_button" title="Refresh.">
<i class="glyphicon glyphicon-repeat"></i>
<span class="icon-repeat"></span>
(<span id="prom_graph_refresh_button_value">Off</span>)
</button>
<button type="button" class="btn btn-light pull-left dropdown-toggle" data-toggle="dropdown" title="Set autorefresh."aria-haspopup="true" aria-expanded="false">
<span class="caret"></span>&nbsp;
</button>
<ul class="dropdown-menu" id="prom_graph_refresh_intervals" role="menu">
</ul>
</div>
</div>
</div>
<script>
new PromConsole.TimeControl();
</script>
</div>
{{ end }}
{{/* Bottom of all pages. */}}
{{ define "tail" }}
</body>
</html>
{{ end }}

View file

@ -1,28 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Overview</h1>
<p>These are example consoles for Prometheus.</p>
<p>These consoles expect exporters to have the following job labels:</p>
<table class="table table-sm table-striped table-bordered" style="width: 0%">
<tr>
<th>Exporter</th>
<th>Job label</th>
</tr>
<tr>
<td>Node Exporter</td>
<td><code>node</code></td>
</tr>
<tr>
<td>Prometheus</td>
<td><code>prometheus</code></td>
</tr>
</table>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,60 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th colspan="2">CPU(s): {{ template "prom_query_drilldown" (args (printf "scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance)) }}</th>
</tr>
{{ range printf "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='%s'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance .Params.instance | query | sortByLabel "mode" }}
<tr>
<td>{{ .Labels.mode | title }} CPU</td>
<td>{{ .Value | printf "%.1f" }}%</td>
</tr>
{{ end }}
<tr><th colspan="2">Misc</th></tr>
<tr>
<td>Processes Running</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_procs_running{job='node',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<td>Processes Blocked</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_procs_blocked{job='node',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<td>Forks</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_forks_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }}</td>
</tr>
<tr>
<td>Context Switches</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_context_switches_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }}</td>
</tr>
<tr>
<td>Interrupts</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_intr_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }}</td>
</tr>
<tr>
<td>1m Loadavg</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_load1{job='node',instance='%s'}" .Params.instance)) }}</td>
</tr>
<tr>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node CPU - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</h1>
<h3>CPU Usage</h3>
<div id="cpuGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#cpuGraph"),
expr: "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='{{ .Params.instance }}',mode!='idle',mode!='iowait',mode!='steal'}[5m]))",
renderer: 'area',
max: {{ with printf "count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance | query }}{{ . | first | value }}{{ else}}undefined{{end}},
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Cores'
})
</script>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,78 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th colspan="2">Disks</th>
</tr>
{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }}
<th colspan="2">{{ .Labels.device }}</th>
<tr>
<td>Utilization</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }}</td>
</tr>
<tr>
<td>Throughput</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
<tr>
<td>Avg Read Time</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_reads_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }}</td>
</tr>
<tr>
<td>Avg Write Time</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_write_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_writes_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }}</td>
</tr>
{{ end }}
<tr>
<th colspan="2">Filesystem Fullness</th>
</tr>
{{ define "roughlyNearZero" }}
{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }}
{{ end }}
{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }}
<tr>
<td>{{ .Labels.mountpoint }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }}</td>
</tr>
{{ end }}
<tr>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node Disk - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</h1>
<h3>Disk I/O Utilization</h3>
<div id="diskioGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#diskioGraph"),
expr: [
"irate(node_disk_io_time_seconds_total{job='node',instance='{{ .Params.instance }}',device!~'^(md\\\\d+$|dm-)'}[5m]) * 100",
],
min: 0,
name: '[[ device ]]',
yUnits: "%",
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Disk I/O Utilization'
})
</script>
<h3>Filesystem Usage</h3>
<div id="fsGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#fsGraph"),
expr: "100 - node_filesystem_avail_bytes{job='node',instance='{{ .Params.instance }}'} / node_filesystem_size_bytes{job='node'} * 100",
min: 0,
max: 100,
name: '[[ mountpoint ]]',
yUnits: "%",
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Filesystem Fullness'
})
</script>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,121 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr><th colspan="2">Overview</th></tr>
<tr>
<td>User CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='user'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }}</td>
</tr>
<tr>
<td>System CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='system'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }}</td>
</tr>
<tr>
<td>Memory Total</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_memory_MemTotal_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
</tr>
<tr>
<td>Memory Free</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
</tr>
<tr>
<th colspan="2">Network</th>
</tr>
{{ range printf "node_network_receive_bytes_total{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device" }}
<tr>
<td>{{ .Labels.device }} Received</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_network_receive_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
<tr>
<td>{{ .Labels.device }} Transmitted</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_network_transmit_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
{{ end }}
<tr>
<th colspan="2">Disks</th>
</tr>
{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s',device!~'^(md\\\\d+$|dm-)'}" .Params.instance | query | sortByLabel "device" }}
<tr>
<td>{{ .Labels.device }} Utilization</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }}</td>
</tr>
{{ end }}
{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }}
<tr>
<td>{{ .Labels.device }} Throughput</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
{{ end }}
<tr>
<th colspan="2">Filesystem Fullness</th>
</tr>
{{ define "roughlyNearZero" }}
{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }}
{{ end }}
{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }}
<tr>
<td>{{ .Labels.mountpoint }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }}</td>
</tr>
{{ end }}
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node Overview - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</h1>
<h3>CPU Usage</h3>
<div id="cpuGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#cpuGraph"),
expr: "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='{{ .Params.instance }}',mode!='idle',mode!='iowait',mode!='steal'}[5m]))",
renderer: 'area',
max: {{ with printf "count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance | query }}{{ . | first | value }}{{ else}}undefined{{end}},
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Cores'
})
</script>
<h3>Disk I/O Utilization</h3>
<div id="diskioGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#diskioGraph"),
expr: [
"irate(node_disk_io_time_seconds_total{job='node',instance='{{ .Params.instance }}',device!~'^(md\\\\d+$|dm-)'}[5m]) * 100",
],
min: 0,
name: '[[ device ]]',
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yUnits: "%",
yTitle: 'Disk I/O Utilization'
})
</script>
<h3>Memory</h3>
<div id="memoryGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#memoryGraph"),
renderer: 'area',
expr: [
"node_memory_Cached_bytes{job='node',instance='{{ .Params.instance }}'}",
"node_memory_Buffers_bytes{job='node',instance='{{ .Params.instance }}'}",
"node_memory_MemTotal_bytes{job='node',instance='{{ .Params.instance }}'} - node_memory_MemFree_bytes{job='node',instance='{{.Params.instance}}'} - node_memory_Buffers_bytes{job='node',instance='{{.Params.instance}}'} - node_memory_Cached_bytes{job='node',instance='{{.Params.instance}}'}",
"node_memory_MemFree{job='node',instance='{{ .Params.instance }}'}",
],
name: ["Cached", "Buffers", "Used", "Free"],
min: 0,
yUnits: "B",
yAxisFormatter: PromConsole.NumberFormatter.humanize1024,
yHoverFormatter: PromConsole.NumberFormatter.humanize1024,
yTitle: 'Memory'
})
</script>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,35 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th>Node</th>
<th>{{ template "prom_query_drilldown" (args "sum(up{job='node'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='node'})") }}</th>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node</h1>
<table class="table table-condensed table-striped table-bordered" style="width: 0%">
<tr>
<th>Node</th>
<th>Up</th>
<th>CPU<br/>Used</th>
<th>Memory<br/> Available</th>
</tr>
{{ range query "up{job='node'}" | sortByLabel "instance" }}
<tr>
<td><a href="node-overview.html?instance={{ .Labels.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Labels.instance }}</a></td>
<td{{ if eq (. | value) 1.0 }}>Yes{{ else }} class="alert-danger">No{{ end }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "100 * (1 - avg by(instance) (sum without(mode) (irate(node_cpu_seconds_total{job='node',mode=~'idle|iowait|steal',instance='%s'}[5m]))))" .Labels.instance) "%" "printf.1f") }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'} + node_memory_Cached_bytes{job='node',instance='%s'} + node_memory_Buffers_bytes{job='node',instance='%s'}" .Labels.instance .Labels.instance .Labels.instance) "B" "humanize1024") }}</td>
</tr>
{{ else }}
<tr><td colspan=4>No nodes found.</td></tr>
{{ end }}
</table>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,96 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th colspan="2">Overview</th>
</tr>
<tr>
<td>CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }}</td>
</tr>
<tr>
<td>Memory</td>
<td>{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
</tr>
<tr>
<td>Version</td>
<td>{{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}}</td>
</tr>
<tr>
<th colspan="2">Storage</th>
</tr>
<tr>
<td>Ingested Samples</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }}</td>
</tr>
<tr>
<td>Head Series</td>
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_head_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<td>Blocks Loaded</td>
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_blocks_loaded{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<th colspan="2">Rules</th>
</tr>
<tr>
<td>Evaluation Duration</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}</td>
</tr>
<tr>
<td>Notification Latency</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}</td>
</tr>
<tr>
<td>Notification Queue</td>
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<th colspan="2">HTTP Server</th>
</tr>
{{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }}
<tr>
<td>{{ .Labels.handler }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }}</td>
</tr>
{{ end }}
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<div class="prom_content_div">
<h1>Prometheus Overview - {{ .Params.instance }}</h1>
<h3>Ingested Samples</h3>
<div id="samplesGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#samplesGraph"),
expr: "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='{{ .Params.instance }}'}[5m])",
name: 'Ingested Samples',
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: "Samples",
yUnits: "/s",
})
</script>
<h3>HTTP Server</h3>
<div id="serverGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#serverGraph"),
expr: "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='{{ .Params.instance }}'}[5m])",
name: '[[handler]]',
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: "Requests",
yUnits: "/s",
})
</script>
</div>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,34 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th>Prometheus</th>
<th>{{ template "prom_query_drilldown" (args "sum(up{job='prometheus'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='prometheus'})") }}</th>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Prometheus</h1>
<table class="table table-sm table-striped table-bordered" style="width: 0%">
<tr>
<th>Prometheus</th>
<th>Up</th>
<th>Ingested Samples</th>
<th>Memory</th>
</tr>
{{ range query "up{job='prometheus'}" | sortByLabel "instance" }}
<tr>
<td><a href="prometheus-overview.html?instance={{ .Labels.instance }}">{{ .Labels.instance }}</a></td>
<td {{ if eq (. | value) 1.0 }}>Yes{{ else }} class="alert-danger">No{{ end }}</td>
<td class="text-right">{{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Labels.instance) "/s" "humanizeNoSmallPrefix") }}</td>
<td class="text-right">{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Labels.instance) "B" "humanize1024")}}</td>
</tr>
{{ else }}
<tr><td colspan=4>No devices found.</td></tr>
{{ end }}
</table>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -233,7 +233,7 @@ type Config interface {
}
type DiscovererOptions struct {
Logger log.Logger
Logger *slog.Logger
// A registerer for the Discoverer's metrics.
Registerer prometheus.Registerer

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
@ -29,11 +30,11 @@ import (
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@ -100,7 +101,7 @@ type EC2SDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
func (*EC2SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
func (*EC2SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &ec2Metrics{
refreshMetrics: rmi,
}
@ -146,9 +147,9 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type EC2Discovery struct {
*refresh.Discovery
logger log.Logger
logger *slog.Logger
cfg *EC2SDConfig
ec2 *ec2.EC2
ec2 ec2iface.EC2API
// azToAZID maps this account's availability zones to their underlying AZ
// ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so
@ -157,14 +158,14 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
m, ok := metrics.(*ec2Metrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
d := &EC2Discovery{
logger: logger,
@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.Dis
return d, nil
}
func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) {
func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) {
if d.ec2 != nil {
return d.ec2, nil
}
@ -254,14 +255,14 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
// Prometheus requires a reload if AWS adds a new AZ to the region.
if d.azToAZID == nil {
if err := d.refreshAZIDs(ctx); err != nil {
level.Debug(d.logger).Log(
"msg", "Unable to describe availability zones",
d.logger.Debug(
"Unable to describe availability zones",
"err", err)
}
}
input := &ec2.DescribeInstancesInput{Filters: filters}
if err := ec2Client.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
if err := ec2Client.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, _ bool) bool {
for _, r := range p.Reservations {
for _, inst := range r.Instances {
if inst.PrivateIpAddress == nil {
@ -296,8 +297,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone)
azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone]
if !ok && d.azToAZID != nil {
level.Debug(d.logger).Log(
"msg", "Availability zone ID not found",
d.logger.Debug(
"Availability zone ID not found",
"az", *inst.Placement.AvailabilityZone)
}
labels[ec2LabelAZID] = model.LabelValue(azID)

434
discovery/aws/ec2_test.go Normal file
View file

@ -0,0 +1,434 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"testing"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
// Helper function to get pointers on literals.
// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package.
func strptr(str string) *string {
return &str
}
func boolptr(b bool) *bool {
return &b
}
func int64ptr(i int64) *int64 {
return &i
}
// Struct for test data.
type ec2DataStore struct {
region string
azToAZID map[string]string
ownerID string
instances []*ec2.Instance
}
// The tests itself.
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
func TestEC2DiscoveryRefreshAZIDs(t *testing.T) {
ctx := context.Background()
// iterate through the test cases
for _, tt := range []struct {
name string
shouldFail bool
ec2Data *ec2DataStore
}{
{
name: "Normal",
shouldFail: false,
ec2Data: &ec2DataStore{
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
},
},
{
name: "HandleError",
shouldFail: true,
ec2Data: &ec2DataStore{},
},
} {
t.Run(tt.name, func(t *testing.T) {
client := newMockEC2Client(tt.ec2Data)
d := &EC2Discovery{
ec2: client,
}
err := d.refreshAZIDs(ctx)
if tt.shouldFail {
require.Error(t, err)
} else {
require.NoError(t, err)
require.Equal(t, client.ec2Data.azToAZID, d.azToAZID)
}
})
}
}
func TestEC2DiscoveryRefresh(t *testing.T) {
ctx := context.Background()
// iterate through the test cases
for _, tt := range []struct {
name string
ec2Data *ec2DataStore
expected []*targetgroup.Group
}{
{
name: "NoPrivateIp",
ec2Data: &ec2DataStore{
region: "region-noprivateip",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
instances: []*ec2.Instance{
{
InstanceId: strptr("instance-id-noprivateip"),
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-noprivateip",
},
},
},
{
name: "NoVpc",
ec2Data: &ec2DataStore{
region: "region-novpc",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
ownerID: "owner-id-novpc",
instances: []*ec2.Instance{
{
// set every possible options and test them here
Architecture: strptr("architecture-novpc"),
ImageId: strptr("ami-novpc"),
InstanceId: strptr("instance-id-novpc"),
InstanceLifecycle: strptr("instance-lifecycle-novpc"),
InstanceType: strptr("instance-type-novpc"),
Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")},
Platform: strptr("platform-novpc"),
PrivateDnsName: strptr("private-dns-novpc"),
PrivateIpAddress: strptr("1.2.3.4"),
PublicDnsName: strptr("public-dns-novpc"),
PublicIpAddress: strptr("42.42.42.2"),
State: &ec2.InstanceState{Name: strptr("running")},
// test tags once and for all
Tags: []*ec2.Tag{
{Key: strptr("tag-1-key"), Value: strptr("tag-1-value")},
{Key: strptr("tag-2-key"), Value: strptr("tag-2-value")},
nil,
{Value: strptr("tag-4-value")},
{Key: strptr("tag-5-key")},
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-novpc",
Targets: []model.LabelSet{
{
"__address__": model.LabelValue("1.2.3.4:4242"),
"__meta_ec2_ami": model.LabelValue("ami-novpc"),
"__meta_ec2_architecture": model.LabelValue("architecture-novpc"),
"__meta_ec2_availability_zone": model.LabelValue("azname-b"),
"__meta_ec2_availability_zone_id": model.LabelValue("azid-2"),
"__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"),
"__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"),
"__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"),
"__meta_ec2_instance_state": model.LabelValue("running"),
"__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"),
"__meta_ec2_platform": model.LabelValue("platform-novpc"),
"__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"),
"__meta_ec2_private_ip": model.LabelValue("1.2.3.4"),
"__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"),
"__meta_ec2_public_ip": model.LabelValue("42.42.42.2"),
"__meta_ec2_region": model.LabelValue("region-novpc"),
"__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"),
"__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"),
},
},
},
},
},
{
name: "Ipv4",
ec2Data: &ec2DataStore{
region: "region-ipv4",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
instances: []*ec2.Instance{
{
// just the minimum needed for the refresh work
ImageId: strptr("ami-ipv4"),
InstanceId: strptr("instance-id-ipv4"),
InstanceType: strptr("instance-type-ipv4"),
Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")},
PrivateIpAddress: strptr("5.6.7.8"),
State: &ec2.InstanceState{Name: strptr("running")},
SubnetId: strptr("azid-3"),
VpcId: strptr("vpc-ipv4"),
// network interfaces
NetworkInterfaces: []*ec2.InstanceNetworkInterface{
// interface without subnet -> should be ignored
{
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:1::1"),
IsPrimaryIpv6: boolptr(true),
},
},
},
// interface with subnet, no IPv6
{
Ipv6Addresses: []*ec2.InstanceIpv6Address{},
SubnetId: strptr("azid-3"),
},
// interface with another subnet, no IPv6
{
Ipv6Addresses: []*ec2.InstanceIpv6Address{},
SubnetId: strptr("azid-1"),
},
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-ipv4",
Targets: []model.LabelSet{
{
"__address__": model.LabelValue("5.6.7.8:4242"),
"__meta_ec2_ami": model.LabelValue("ami-ipv4"),
"__meta_ec2_availability_zone": model.LabelValue("azname-c"),
"__meta_ec2_availability_zone_id": model.LabelValue("azid-3"),
"__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"),
"__meta_ec2_instance_state": model.LabelValue("running"),
"__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"),
"__meta_ec2_owner_id": model.LabelValue(""),
"__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"),
"__meta_ec2_private_ip": model.LabelValue("5.6.7.8"),
"__meta_ec2_region": model.LabelValue("region-ipv4"),
"__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"),
"__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"),
},
},
},
},
},
{
name: "Ipv6",
ec2Data: &ec2DataStore{
region: "region-ipv6",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
instances: []*ec2.Instance{
{
// just the minimum needed for the refresh work
ImageId: strptr("ami-ipv6"),
InstanceId: strptr("instance-id-ipv6"),
InstanceType: strptr("instance-type-ipv6"),
Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")},
PrivateIpAddress: strptr("9.10.11.12"),
State: &ec2.InstanceState{Name: strptr("running")},
SubnetId: strptr("azid-2"),
VpcId: strptr("vpc-ipv6"),
// network interfaces
NetworkInterfaces: []*ec2.InstanceNetworkInterface{
// interface without primary IPv6, index 2
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(3),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:2::1:1"),
IsPrimaryIpv6: boolptr(false),
},
},
SubnetId: strptr("azid-2"),
},
// interface with primary IPv6, index 1
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(1),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:2::2:1"),
IsPrimaryIpv6: boolptr(false),
},
{
Ipv6Address: strptr("2001:db8:2::2:2"),
IsPrimaryIpv6: boolptr(true),
},
},
SubnetId: strptr("azid-2"),
},
// interface with primary IPv6, index 3
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(3),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:2::3:1"),
IsPrimaryIpv6: boolptr(true),
},
},
SubnetId: strptr("azid-1"),
},
// interface without primary IPv6, index 0
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(0),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{},
SubnetId: strptr("azid-3"),
},
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-ipv6",
Targets: []model.LabelSet{
{
"__address__": model.LabelValue("9.10.11.12:4242"),
"__meta_ec2_ami": model.LabelValue("ami-ipv6"),
"__meta_ec2_availability_zone": model.LabelValue("azname-b"),
"__meta_ec2_availability_zone_id": model.LabelValue("azid-2"),
"__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"),
"__meta_ec2_instance_state": model.LabelValue("running"),
"__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"),
"__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"),
"__meta_ec2_owner_id": model.LabelValue(""),
"__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"),
"__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"),
"__meta_ec2_private_ip": model.LabelValue("9.10.11.12"),
"__meta_ec2_region": model.LabelValue("region-ipv6"),
"__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"),
"__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"),
},
},
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
client := newMockEC2Client(tt.ec2Data)
d := &EC2Discovery{
ec2: client,
cfg: &EC2SDConfig{
Port: 4242,
Region: client.ec2Data.region,
},
}
g, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, tt.expected, g)
})
}
}
// EC2 client mock.
type mockEC2Client struct {
ec2iface.EC2API
ec2Data ec2DataStore
}
func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client {
client := mockEC2Client{
ec2Data: *ec2Data,
}
return &client
}
func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(_ aws.Context, _ *ec2.DescribeAvailabilityZonesInput, _ ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) {
if len(m.ec2Data.azToAZID) == 0 {
return nil, errors.New("No AZs found")
}
azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID))
i := 0
for k, v := range m.ec2Data.azToAZID {
azs[i] = &ec2.AvailabilityZone{
ZoneName: strptr(k),
ZoneId: strptr(v),
}
i++
}
return &ec2.DescribeAvailabilityZonesOutput{
AvailabilityZones: azs,
}, nil
}
func (m *mockEC2Client) DescribeInstancesPagesWithContext(_ aws.Context, _ *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, _ ...request.Option) error {
r := ec2.Reservation{}
r.SetInstances(m.ec2Data.instances)
r.SetOwnerId(m.ec2Data.ownerID)
o := ec2.DescribeInstancesOutput{}
o.SetReservations([]*ec2.Reservation{&r})
_ = fn(&o, true)
return nil
}

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
@ -29,10 +30,10 @@ import (
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lightsail"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@ -82,7 +83,7 @@ type LightsailSDConfig struct {
}
// NewDiscovererMetrics implements discovery.Config.
func (*LightsailSDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
func (*LightsailSDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &lightsailMetrics{
refreshMetrics: rmi,
}
@ -130,14 +131,14 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
m, ok := metrics.(*lightsailMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
d := &LightsailDiscovery{

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"math/rand"
"net"
"net/http"
@ -35,10 +36,9 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -70,18 +70,14 @@ const (
authMethodManagedIdentity = "ManagedIdentity"
)
var (
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
// DefaultSDConfig is the default Azure SD configuration.
DefaultSDConfig = SDConfig{
Port: 80,
RefreshInterval: model.Duration(5 * time.Minute),
Environment: "AzurePublicCloud",
AuthenticationMethod: authMethodOAuth,
HTTPClientConfig: config_util.DefaultHTTPClientConfig,
}
)
// DefaultSDConfig is the default Azure SD configuration.
var DefaultSDConfig = SDConfig{
Port: 80,
RefreshInterval: model.Duration(5 * time.Minute),
Environment: "AzurePublicCloud",
AuthenticationMethod: authMethodOAuth,
HTTPClientConfig: config_util.DefaultHTTPClientConfig,
}
var environments = map[string]cloud.Configuration{
"AZURECHINACLOUD": cloud.AzureChina,
@ -175,7 +171,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type Discovery struct {
*refresh.Discovery
logger log.Logger
logger *slog.Logger
cfg *SDConfig
port int
cache *cache.Cache[string, *armnetwork.Interface]
@ -183,14 +179,14 @@ type Discovery struct {
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*azureMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
d := &Discovery{
@ -228,26 +224,26 @@ type azureClient struct {
vm *armcompute.VirtualMachinesClient
vmss *armcompute.VirtualMachineScaleSetsClient
vmssvm *armcompute.VirtualMachineScaleSetVMsClient
logger log.Logger
logger *slog.Logger
}
var _ client = &azureClient{}
// createAzureClient is a helper function for creating an Azure compute client to ARM.
func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment)
// createAzureClient is a helper method for creating an Azure compute client to ARM.
func (d *Discovery) createAzureClient() (client, error) {
cloudConfiguration, err := CloudConfigurationFromName(d.cfg.Environment)
if err != nil {
return &azureClient{}, err
}
var c azureClient
c.logger = logger
c.logger = d.logger
telemetry := policy.TelemetryOptions{
ApplicationID: userAgent,
ApplicationID: version.PrometheusUserAgent(),
}
credential, err := newCredential(cfg, policy.ClientOptions{
credential, err := newCredential(*d.cfg, policy.ClientOptions{
Cloud: cloudConfiguration,
Telemetry: telemetry,
})
@ -255,7 +251,7 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
return &azureClient{}, err
}
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd")
client, err := config_util.NewClientFromConfig(d.cfg.HTTPClientConfig, "azure_sd")
if err != nil {
return &azureClient{}, err
}
@ -267,22 +263,22 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
},
}
c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options)
c.vm, err = armcompute.NewVirtualMachinesClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options)
c.nic, err = armnetwork.NewInterfacesClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options)
c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options)
c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
@ -337,35 +333,27 @@ type virtualMachine struct {
}
// Create a new azureResource object from an ID string.
func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) {
func newAzureResourceFromID(id string, logger *slog.Logger) (*arm.ResourceID, error) {
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
resourceID, err := arm.ParseResourceID(id)
if err != nil {
err := fmt.Errorf("invalid ID '%s': %w", id, err)
level.Error(logger).Log("err", err)
logger.Error("Failed to parse resource ID", "err", err)
return &arm.ResourceID{}, err
}
return resourceID, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
defer level.Debug(d.logger).Log("msg", "Azure discovery completed")
client, err := createAzureClient(*d.cfg, d.logger)
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
func (d *Discovery) refreshAzureClient(ctx context.Context, client client) ([]*targetgroup.Group, error) {
machines, err := client.getVMs(ctx, d.cfg.ResourceGroup)
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machines: %w", err)
}
level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines))
d.logger.Debug("Found virtual machines during Azure discovery.", "count", len(machines))
// Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup)
@ -418,6 +406,18 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
return []*targetgroup.Group{&tg}, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
defer d.logger.Debug("Azure discovery completed")
client, err := d.createAzureClient()
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
return d.refreshAzureClient(ctx, client)
}
func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) {
r, err := newAzureResourceFromID(vm.ID, d.logger)
if err != nil {
@ -458,11 +458,10 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM
networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID)
}
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
} else {
if !errors.Is(err, errorNotFound) {
return nil, err
}
d.logger.Warn("Network interface does not exist", "name", nicID, "err", err)
// Get out of this routine because we cannot continue without a network interface.
return nil, nil
}
@ -480,7 +479,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM
// yet support this. On deallocated machines, this value happens to be nil so it
// is a cheap and easy way to determine if a machine is allocated or not.
if networkInterface.Properties.Primary == nil {
level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name)
d.logger.Debug("Skipping deallocated virtual machine", "machine", vm.Name)
return nil, nil
}
@ -724,7 +723,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
rs := time.Duration(random) * time.Second
exptime := time.Duration(d.cfg.RefreshInterval*10) + rs
d.cache.Set(nicID, netInt, cache.WithExpiration(exptime))
level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds())
d.logger.Debug("Adding nic", "nic", nicID, "time", exptime.Seconds())
}
// getFromCache will get the network Interface for the specified nicID

View file

@ -15,19 +15,34 @@ package azure
import (
"context"
"fmt"
"log/slog"
"net/http"
"slices"
"strings"
"testing"
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
fakenetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4/fake"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
const defaultMockNetworkID string = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}"
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m,
goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"),
@ -96,13 +111,12 @@ func TestVMToLabelSet(t *testing.T) {
vmType := "type"
location := "westeurope"
computerName := "computer_name"
networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1"
ipAddress := "10.20.30.40"
primary := true
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{
ID: &networkID,
ID: to.Ptr(defaultMockNetworkID),
Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary},
},
},
@ -139,7 +153,7 @@ func TestVMToLabelSet(t *testing.T) {
Location: location,
OsType: "Linux",
Tags: map[string]*string{},
NetworkInterfaces: []string{networkID},
NetworkInterfaces: []string{defaultMockNetworkID},
Size: size,
}
@ -150,11 +164,12 @@ func TestVMToLabelSet(t *testing.T) {
cfg := DefaultSDConfig
d := &Discovery{
cfg: &cfg,
logger: log.NewNopLogger(),
logger: promslog.NewNopLogger(),
cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))),
}
network := armnetwork.Interface{
Name: &networkID,
Name: to.Ptr(defaultMockNetworkID),
ID: to.Ptr(defaultMockNetworkID),
Properties: &armnetwork.InterfacePropertiesFormat{
Primary: &primary,
IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
@ -164,9 +179,9 @@ func TestVMToLabelSet(t *testing.T) {
},
},
}
client := &mockAzureClient{
networkInterface: &network,
}
client := createMockAzureClient(t, nil, nil, nil, network, nil)
labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM)
require.NoError(t, err)
require.Len(t, labelSet, 11)
@ -475,34 +490,372 @@ func TestNewAzureResourceFromID(t *testing.T) {
}
}
func TestAzureRefresh(t *testing.T) {
tests := []struct {
scenario string
vmResp []armcompute.VirtualMachinesClientListAllResponse
vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse
vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse
interfacesResp armnetwork.Interface
expectedTG []*targetgroup.Group
}{
{
scenario: "VMs, VMSS and VMSSVMs in Multiple Responses",
vmResp: []armcompute.VirtualMachinesClientListAllResponse{
{
VirtualMachineListResult: armcompute.VirtualMachineListResult{
Value: []*armcompute.VirtualMachine{
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1"), to.Ptr("vm1")),
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2"), to.Ptr("vm2")),
},
},
},
{
VirtualMachineListResult: armcompute.VirtualMachineListResult{
Value: []*armcompute.VirtualMachine{
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3"), to.Ptr("vm3")),
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4"), to.Ptr("vm4")),
},
},
},
},
vmssResp: []armcompute.VirtualMachineScaleSetsClientListAllResponse{
{
VirtualMachineScaleSetListWithLinkResult: armcompute.VirtualMachineScaleSetListWithLinkResult{
Value: []*armcompute.VirtualMachineScaleSet{
{
ID: to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1"),
Name: to.Ptr("vmScaleSet1"),
Location: to.Ptr("australiaeast"),
Type: to.Ptr("Microsoft.Compute/virtualMachineScaleSets"),
},
},
},
},
},
vmssvmResp: []armcompute.VirtualMachineScaleSetVMsClientListResponse{
{
VirtualMachineScaleSetVMListResult: armcompute.VirtualMachineScaleSetVMListResult{
Value: []*armcompute.VirtualMachineScaleSetVM{
defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1"), to.Ptr("vmScaleSet1_vm1")),
defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2"), to.Ptr("vmScaleSet1_vm2")),
},
},
},
},
interfacesResp: armnetwork.Interface{
ID: to.Ptr(defaultMockNetworkID),
Properties: &armnetwork.InterfacePropertiesFormat{
Primary: to.Ptr(true),
IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
{Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{
PrivateIPAddress: to.Ptr("10.0.0.1"),
}},
},
},
},
expectedTG: []*targetgroup.Group{
{
Targets: []model.LabelSet{
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm1",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm2",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm3",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm4",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vmScaleSet1_vm1",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_scale_set": "vmScaleSet1",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vmScaleSet1_vm2",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_scale_set": "vmScaleSet1",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.scenario, func(t *testing.T) {
t.Parallel()
azureSDConfig := &DefaultSDConfig
azureClient := createMockAzureClient(t, tc.vmResp, tc.vmssResp, tc.vmssvmResp, tc.interfacesResp, nil)
reg := prometheus.NewRegistry()
refreshMetrics := discovery.NewRefreshMetrics(reg)
metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics)
sd, err := NewDiscovery(azureSDConfig, nil, metrics)
require.NoError(t, err)
tg, err := sd.refreshAzureClient(context.Background(), azureClient)
require.NoError(t, err)
sortTargetsByID(tg[0].Targets)
require.Equal(t, tc.expectedTG, tg)
})
}
}
type mockAzureClient struct {
networkInterface *armnetwork.Interface
azureClient
}
var _ client = &mockAzureClient{}
func createMockAzureClient(t *testing.T, vmResp []armcompute.VirtualMachinesClientListAllResponse, vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse, vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse, interfaceResp armnetwork.Interface, logger *slog.Logger) client {
t.Helper()
mockVMServer := defaultMockVMServer(vmResp)
mockVMSSServer := defaultMockVMSSServer(vmssResp)
mockVMScaleSetVMServer := defaultMockVMSSVMServer(vmssvmResp)
mockInterfaceServer := defaultMockInterfaceServer(interfaceResp)
func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) {
return nil, nil
}
vmClient, err := armcompute.NewVirtualMachinesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fake.NewVirtualMachinesServerTransport(&mockVMServer),
},
})
require.NoError(t, err)
func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) {
return nil, nil
}
vmssClient, err := armcompute.NewVirtualMachineScaleSetsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fake.NewVirtualMachineScaleSetsServerTransport(&mockVMSSServer),
},
})
require.NoError(t, err)
func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) {
return nil, nil
}
vmssvmClient, err := armcompute.NewVirtualMachineScaleSetVMsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fake.NewVirtualMachineScaleSetVMsServerTransport(&mockVMScaleSetVMServer),
},
})
require.NoError(t, err)
func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
if networkInterfaceID == "" {
return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty")
interfacesClient, err := armnetwork.NewInterfacesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fakenetwork.NewInterfacesServerTransport(&mockInterfaceServer),
},
})
require.NoError(t, err)
return &mockAzureClient{
azureClient: azureClient{
vm: vmClient,
vmss: vmssClient,
vmssvm: vmssvmClient,
nic: interfacesClient,
logger: logger,
},
}
return m.networkInterface, nil
}
func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) {
if scaleSetName == "" {
return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty")
func defaultMockInterfaceServer(interfaceResp armnetwork.Interface) fakenetwork.InterfacesServer {
return fakenetwork.InterfacesServer{
Get: func(_ context.Context, _, _ string, _ *armnetwork.InterfacesClientGetOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetResponse], errResp azfake.ErrorResponder) {
resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetResponse{Interface: interfaceResp}, nil)
return
},
GetVirtualMachineScaleSetNetworkInterface: func(_ context.Context, _, _, _, _ string, _ *armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse], errResp azfake.ErrorResponder) {
resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse{Interface: interfaceResp}, nil)
return
},
}
return m.networkInterface, nil
}
func defaultMockVMServer(vmResp []armcompute.VirtualMachinesClientListAllResponse) fake.VirtualMachinesServer {
return fake.VirtualMachinesServer{
NewListAllPager: func(_ *armcompute.VirtualMachinesClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachinesClientListAllResponse]) {
for _, page := range vmResp {
resp.AddPage(http.StatusOK, page, nil)
}
return
},
}
}
func defaultMockVMSSServer(vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse) fake.VirtualMachineScaleSetsServer {
return fake.VirtualMachineScaleSetsServer{
NewListAllPager: func(_ *armcompute.VirtualMachineScaleSetsClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetsClientListAllResponse]) {
for _, page := range vmssResp {
resp.AddPage(http.StatusOK, page, nil)
}
return
},
}
}
func defaultMockVMSSVMServer(vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse) fake.VirtualMachineScaleSetVMsServer {
return fake.VirtualMachineScaleSetVMsServer{
NewListPager: func(_, _ string, _ *armcompute.VirtualMachineScaleSetVMsClientListOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetVMsClientListResponse]) {
for _, page := range vmssvmResp {
resp.AddPage(http.StatusOK, page, nil)
}
return
},
}
}
func defaultVMWithIDAndName(id, name *string) *armcompute.VirtualMachine {
vmSize := armcompute.VirtualMachineSizeTypes("size")
osType := armcompute.OperatingSystemTypesLinux
defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/testVM"
defaultName := "testVM"
if id == nil {
id = &defaultID
}
if name == nil {
name = &defaultName
}
return &armcompute.VirtualMachine{
ID: id,
Name: name,
Type: to.Ptr("Microsoft.Compute/virtualMachines"),
Location: to.Ptr("australiaeast"),
Properties: &armcompute.VirtualMachineProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: to.Ptr("computer_name"),
},
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{
ID: to.Ptr(defaultMockNetworkID),
},
},
},
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
},
Tags: map[string]*string{
"prometheus": new(string),
},
}
}
func defaultVMSSVMWithIDAndName(id, name *string) *armcompute.VirtualMachineScaleSetVM {
vmSize := armcompute.VirtualMachineSizeTypes("size")
osType := armcompute.OperatingSystemTypesLinux
defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/testVMScaleSet/virtualMachines/testVM"
defaultName := "testVM"
if id == nil {
id = &defaultID
}
if name == nil {
name = &defaultName
}
return &armcompute.VirtualMachineScaleSetVM{
ID: id,
Name: name,
Type: to.Ptr("Microsoft.Compute/virtualMachines"),
InstanceID: to.Ptr("123"),
Location: to.Ptr("australiaeast"),
Properties: &armcompute.VirtualMachineScaleSetVMProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: to.Ptr("computer_name"),
},
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{ID: to.Ptr(defaultMockNetworkID)},
},
},
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
},
Tags: map[string]*string{
"prometheus": new(string),
},
}
}
func sortTargetsByID(targets []model.LabelSet) {
slices.SortFunc(targets, func(a, b model.LabelSet) int {
return strings.Compare(string(a["__meta_azure_machine_id"]), string(b["__meta_azure_machine_id"]))
})
}

View file

@ -17,17 +17,18 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"slices"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
consul "github.com/hashicorp/consul/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -113,8 +114,11 @@ type SDConfig struct {
Services []string `yaml:"services,omitempty"`
// A list of tags used to filter instances inside a service. Services must contain all tags in the list.
ServiceTags []string `yaml:"tags,omitempty"`
// Desired node metadata.
// Desired node metadata. As of Consul 1.14, consider `filter` instead.
NodeMeta map[string]string `yaml:"node_meta,omitempty"`
// Consul filter string
// See https://www.consul.io/api-docs/catalog#filtering-1, for syntax
Filter string `yaml:"filter,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
}
@ -174,22 +178,23 @@ type Discovery struct {
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
watchedFilter string
allowStale bool
refreshInterval time.Duration
finalizer func()
logger log.Logger
logger *slog.Logger
metrics *consulMetrics
}
// NewDiscovery returns a new Discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*consulMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
return nil, errors.New("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
wrapper, err := config.NewClientFromConfig(conf.HTTPClientConfig, "consul_sd", config.WithIdleConnTimeout(2*watchTimeout))
@ -218,6 +223,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
watchedServices: conf.Services,
watchedTags: conf.ServiceTags,
watchedNodeMeta: conf.NodeMeta,
watchedFilter: conf.Filter,
allowStale: conf.AllowStale,
refreshInterval: time.Duration(conf.RefreshInterval),
clientDatacenter: conf.Datacenter,
@ -236,22 +242,17 @@ func (d *Discovery) shouldWatch(name string, tags []string) bool {
return d.shouldWatchFromName(name) && d.shouldWatchFromTags(tags)
}
// shouldWatch returns whether the service of the given name should be watched based on its name.
// shouldWatchFromName returns whether the service of the given name should be watched based on its name.
func (d *Discovery) shouldWatchFromName(name string) bool {
// If there's no fixed set of watched services, we watch everything.
if len(d.watchedServices) == 0 {
return true
}
for _, sn := range d.watchedServices {
if sn == name {
return true
}
}
return false
return slices.Contains(d.watchedServices, name)
}
// shouldWatch returns whether the service of the given name should be watched based on its tags.
// shouldWatchFromTags returns whether the service of the given name should be watched based on its tags.
// This gets called when the user doesn't specify a list of services in order to avoid watching
// *all* services. Details in https://github.com/prometheus/prometheus/pull/3814
func (d *Discovery) shouldWatchFromTags(tags []string) bool {
@ -282,7 +283,7 @@ func (d *Discovery) getDatacenter() error {
info, err := d.client.Agent().Self()
if err != nil {
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
d.logger.Error("Error retrieving datacenter name", "err", err)
d.metrics.rpcFailuresCount.Inc()
return err
}
@ -290,12 +291,12 @@ func (d *Discovery) getDatacenter() error {
dc, ok := info["Config"]["Datacenter"].(string)
if !ok {
err := fmt.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"])
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
d.logger.Error("Error retrieving datacenter name", "err", err)
return err
}
d.clientDatacenter = dc
d.logger = log.With(d.logger, "datacenter", dc)
d.logger = d.logger.With("datacenter", dc)
return nil
}
@ -361,13 +362,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// entire list of services.
func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) {
catalog := d.client.Catalog()
level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ","))
d.logger.Debug("Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter)
opts := &consul.QueryOptions{
WaitIndex: *lastIndex,
WaitTime: watchTimeout,
AllowStale: d.allowStale,
NodeMeta: d.watchedNodeMeta,
Filter: d.watchedFilter,
}
t0 := time.Now()
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
@ -382,7 +384,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
}
if err != nil {
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
d.logger.Error("Error refreshing service list", "err", err)
d.metrics.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
@ -445,7 +447,7 @@ type consulService struct {
discovery *Discovery
client *consul.Client
tagSeparator string
logger log.Logger
logger *slog.Logger
rpcFailuresCount prometheus.Counter
serviceRPCDuration prometheus.Observer
}
@ -490,7 +492,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
// Get updates for a service.
func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, health *consul.Health, lastIndex *uint64) {
level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ","))
srv.logger.Debug("Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ","))
opts := &consul.QueryOptions{
WaitIndex: *lastIndex,
@ -513,7 +515,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
}
if err != nil {
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
srv.logger.Error("Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
srv.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return

View file

@ -21,10 +21,10 @@ import (
"testing"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"gopkg.in/yaml.v2"
@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
case "/v1/catalog/services?index=1&wait=120000ms":
time.Sleep(5 * time.Second)
response = ServicesTestAnswer
case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms":
response = ServicesTestAnswer
default:
t.Errorf("Unhandled consul call: %s", r.URL)
}
@ -270,7 +272,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
}
func newDiscovery(t *testing.T, config *SDConfig) *Discovery {
logger := log.NewNopLogger()
logger := promslog.NewNopLogger()
metrics := NewTestMetrics(t, config, prometheus.NewRegistry())
@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) {
<-ch
}
// Watch the test service with a specific tag and node-meta via Filter parameter.
func TestFilterOption(t *testing.T) {
stub, config := newServer(t)
defer stub.Close()
config.Services = []string{"test"}
config.Filter = `NodeMeta.rack_name == "2304"`
config.Token = "fake-token"
d := newDiscovery(t, config)
ctx, cancel := context.WithCancel(context.Background())
ch := make(chan []*targetgroup.Group)
go func() {
d.Run(ctx, ch)
close(ch)
}()
checkOneTarget(t, <-ch)
cancel()
}
func TestGetDatacenterShouldReturnError(t *testing.T) {
for _, tc := range []struct {
handler func(http.ResponseWriter, *http.Request)
@ -376,14 +399,14 @@ func TestGetDatacenterShouldReturnError(t *testing.T) {
}{
{
// Define a handler that will return status 500.
handler: func(w http.ResponseWriter, r *http.Request) {
handler: func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
},
errMessage: "Unexpected response code: 500 ()",
},
{
// Define a handler that will return incorrect response.
handler: func(w http.ResponseWriter, r *http.Request) {
handler: func(w http.ResponseWriter, _ *http.Request) {
w.Write([]byte(`{"Config": {"Not-Datacenter": "test-dc"}}`))
},
errMessage: "invalid value '<nil>' for Config.Datacenter",
@ -407,7 +430,7 @@ func TestGetDatacenterShouldReturnError(t *testing.T) {
err = d.getDatacenter()
// An error should be returned.
require.Equal(t, tc.errMessage, err.Error())
require.EqualError(t, err, tc.errMessage)
// Should still be empty.
require.Equal(t, "", d.clientDatacenter)
}

View file

@ -31,7 +31,7 @@ type consulMetrics struct {
metricRegisterer discovery.MetricRegisterer
}
func newDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
func newDiscovererMetrics(reg prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
m := &consulMetrics{
rpcFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{

View file

@ -15,7 +15,9 @@ package digitalocean
import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"strconv"
@ -23,7 +25,6 @@ import (
"time"
"github.com/digitalocean/godo"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -64,7 +65,7 @@ func init() {
}
// NewDiscovererMetrics implements discovery.Config.
func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
func (*SDConfig) NewDiscovererMetrics(_ prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
return &digitaloceanMetrics{
refreshMetrics: rmi,
}
@ -111,10 +112,10 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*digitaloceanMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
return nil, errors.New("invalid discovery metrics type")
}
d := &Discovery{
@ -131,7 +132,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
Transport: rt,
Timeout: time.Duration(conf.RefreshInterval),
},
godo.SetUserAgent(fmt.Sprintf("Prometheus/%s", version.Version)),
godo.SetUserAgent(version.PrometheusUserAgent()),
)
if err != nil {
return nil, fmt.Errorf("error setting up digital ocean agent: %w", err)

View file

@ -19,9 +19,9 @@ import (
"net/url"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@ -57,7 +57,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)

View file

@ -13,7 +13,7 @@
package discovery
// Create a dummy metrics struct, because this SD doesn't have any metrics.
// NoopDiscovererMetrics creates a dummy metrics struct, because this SD doesn't have any metrics.
type NoopDiscovererMetrics struct{}
var _ DiscovererMetrics = (*NoopDiscovererMetrics)(nil)

View file

@ -15,9 +15,9 @@ package discovery
import (
"context"
"log/slog"
"reflect"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@ -39,7 +39,7 @@ type Discoverer interface {
Run(ctx context.Context, up chan<- []*targetgroup.Group)
}
// Internal metrics of service discovery mechanisms.
// DiscovererMetrics are internal metrics of service discovery mechanisms.
type DiscovererMetrics interface {
Register() error
Unregister()
@ -47,7 +47,7 @@ type DiscovererMetrics interface {
// DiscovererOptions provides options for a Discoverer.
type DiscovererOptions struct {
Logger log.Logger
Logger *slog.Logger
Metrics DiscovererMetrics
@ -56,7 +56,7 @@ type DiscovererOptions struct {
HTTPClientOptions []config.HTTPClientOption
}
// Metrics used by the "refresh" package.
// RefreshMetrics are used by the "refresh" package.
// We define them here in the "discovery" package in order to avoid a cyclic dependency between
// "discovery" and "refresh".
type RefreshMetrics struct {
@ -64,17 +64,18 @@ type RefreshMetrics struct {
Duration prometheus.Observer
}
// Instantiate the metrics used by the "refresh" package.
// RefreshMetricsInstantiator instantiates the metrics used by the "refresh" package.
type RefreshMetricsInstantiator interface {
Instantiate(mech string) *RefreshMetrics
}
// An interface for registering, unregistering, and instantiating metrics for the "refresh" package.
// Refresh metrics are registered and unregistered outside of the service discovery mechanism.
// This is so that the same metrics can be reused across different service discovery mechanisms.
// To manage refresh metrics inside the SD mechanism, we'd need to use const labels which are
// specific to that SD. However, doing so would also expose too many unused metrics on
// the Prometheus /metrics endpoint.
// RefreshMetricsManager is an interface for registering, unregistering, and
// instantiating metrics for the "refresh" package. Refresh metrics are
// registered and unregistered outside of the service discovery mechanism. This
// is so that the same metrics can be reused across different service discovery
// mechanisms. To manage refresh metrics inside the SD mechanism, we'd need to
// use const labels which are specific to that SD. However, doing so would also
// expose too many unused metrics on the Prometheus /metrics endpoint.
type RefreshMetricsManager interface {
DiscovererMetrics
RefreshMetricsInstantiator
@ -108,7 +109,7 @@ func (c *Configs) SetDirectory(dir string) {
// UnmarshalYAML implements yaml.Unmarshaler.
func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error {
cfgTyp := getConfigType(configsType)
cfgTyp := reflect.StructOf(configFields)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
@ -123,7 +124,7 @@ func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error {
// MarshalYAML implements yaml.Marshaler.
func (c Configs) MarshalYAML() (interface{}, error) {
cfgTyp := getConfigType(configsType)
cfgTyp := reflect.StructOf(configFields)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
@ -145,7 +146,8 @@ func (c StaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
return staticDiscoverer(c), nil
}
// No metrics are needed for this service discovery mechanism.
// NewDiscovererMetrics returns NoopDiscovererMetrics because no metrics are
// needed for this service discovery mechanism.
func (c StaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
return &NoopDiscovererMetrics{}
}

Some files were not shown because too many files have changed in this diff Show more