Merge branch 'main' into nhcb-scrape-impl

# Conflicts:
#	config/config.go
#	scrape/scrape.go
This commit is contained in:
György Krajcsovits 2024-09-25 13:43:57 +02:00
commit 71fd2d93a9
528 changed files with 79200 additions and 63099 deletions

56
.github/stale.yml vendored
View file

@ -1,56 +0,0 @@
# Configuration for probot-stale - https://github.com/probot/stale
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 60
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: false
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels: []
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- keepalive
# Set to true to ignore issues in a project (defaults to false)
exemptProjects: false
# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: false
# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: false
# Label to use when marking as stale
staleLabel: stale
# Comment to post when marking as stale. Set to `false` to disable
markComment: false
# Comment to post when removing the stale label.
# unmarkComment: >
# Your comment here.
# Comment to post when closing a stale Issue or Pull Request.
# closeComment: >
# Your comment here.
# Limit the number of actions per hour, from 1-30. Default is 30
limitPerRun: 30
# Limit to only `issues` or `pulls`
only: pulls
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
# pulls:
# daysUntilStale: 30
# markComment: >
# This pull request has been automatically marked as stale because it has not had
# recent activity. It will be closed if no further activity occurs. Thank you
# for your contributions.
# issues:
# exemptLabels:
# - confirmed

View file

@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: bufbuild/buf-setup-action@dde0b9351db90fbf78e345f41a57de8514bf1091 # v1.32.2
- uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1

View file

@ -13,7 +13,7 @@ jobs:
if: github.repository_owner == 'prometheus'
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: bufbuild/buf-setup-action@dde0b9351db90fbf78e345f41a57de8514bf1091 # v1.32.2
- uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1

View file

@ -11,12 +11,14 @@ jobs:
container:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/setup_environment
- run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1
with:
enable_npm: true
- run: make GOOPTS=--tags=stringlabels GO_ONLY=1 SKIP_GOLANGCI_LINT=1 test-flags=""
- run: go test --tags=stringlabels ./tsdb/ -test.tsdb-isolation=false
- run: make -C documentation/examples/remote_storage
- run: make -C documentation/examples
@ -25,10 +27,10 @@ jobs:
name: More Go tests
runs-on: ubuntu-latest
container:
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/setup_environment
- run: go test --tags=dedupelabels ./...
- run: GOARCH=386 go test ./cmd/prometheus
@ -39,9 +41,12 @@ jobs:
test_go_oldest:
name: Go tests with previous Go version
runs-on: ubuntu-latest
env:
# Enforce the Go version.
GOTOOLCHAIN: local
container:
# The go version in this image should be N-1 wrt test_go.
image: quay.io/prometheus/golang-builder:1.21-base
image: quay.io/prometheus/golang-builder:1.22-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- run: make build
@ -54,11 +59,11 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/setup_environment
with:
enable_go: false
@ -75,9 +80,9 @@ jobs:
runs-on: windows-latest
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
- uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: 1.22.x
go-version: 1.23.x
- run: |
$TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
go test $TestTargets -vet=off -v
@ -89,7 +94,7 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.22-base
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- run: go install ./cmd/promtool/.
@ -107,6 +112,8 @@ jobs:
if: |
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
&&
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
&&
!(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
&&
!(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
@ -115,7 +122,7 @@ jobs:
thread: [ 0, 1, 2 ]
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/build
with:
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
@ -127,6 +134,8 @@ jobs:
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
||
(github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-'))
||
(github.event_name == 'push' && github.event.ref == 'refs/heads/main')
@ -138,11 +147,23 @@ jobs:
# should also be updated.
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/build
with:
parallelism: 12
thread: ${{ matrix.thread }}
build_all_status:
name: Report status of build Prometheus for all architectures
runs-on: ubuntu-latest
needs: [build_all]
if: github.event_name == 'pull_request' && startsWith(github.event.pull_request.base.ref, 'release-')
steps:
- name: Successful build
if: ${{ !(contains(needs.*.result, 'failure')) && !(contains(needs.*.result, 'cancelled')) }}
run: exit 0
- name: Failing or cancelled build
if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
run: exit 1
check_generated_parser:
name: Check generated parser
runs-on: ubuntu-latest
@ -150,10 +171,10 @@ jobs:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- name: Install Go
uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
cache: false
go-version: 1.22.x
go-version: 1.23.x
- name: Run goyacc and check for diff
run: make install-goyacc check-generated-parser
golangci:
@ -163,18 +184,18 @@ jobs:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- name: Install Go
uses: actions/setup-go@cdcb36043654635271a94b9a6d1392de5bb323a7 # v5.0.1
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: 1.22.x
go-version: 1.23.x
- name: Install snmp_exporter/generator dependencies
run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
if: github.repository == 'prometheus/snmp_exporter'
- name: Lint
uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1
uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0
with:
args: --verbose
# Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
version: v1.59.1
version: v1.60.2
fuzzing:
uses: ./.github/workflows/fuzzing.yml
if: github.event_name == 'pull_request'
@ -188,7 +209,7 @@ jobs:
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/publish_main
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -199,10 +220,13 @@ jobs:
name: Publish release artefacts
runs-on: ubuntu-latest
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/publish_release
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -217,9 +241,9 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- name: Install nodejs
uses: actions/setup-node@60edb5dd545a775178f52524783378180af0d1f8 # v4.0.2
uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"
@ -230,17 +254,26 @@ jobs:
restore-keys: |
${{ runner.os }}-node-
- name: Check libraries version
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
run: ./scripts/ui_release.sh --check-package "$(echo ${{ github.ref_name }}|sed s/v2/v0/)"
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --check-package "$(./scripts/get_module_version.sh ${{ github.ref_name }})"
- name: build
run: make assets
- name: Copy files before publishing libs
run: ./scripts/ui_release.sh --copy
- name: Publish dry-run libraries
if: "!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))"
if: |
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
&&
!(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --publish dry-run
- name: Publish libraries
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
if: |
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.'))
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
run: ./scripts/ui_release.sh --publish
env:
# The setup-node action writes an .npmrc file with this env variable

View file

@ -27,12 +27,12 @@ jobs:
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- name: Initialize CodeQL
uses: github/codeql-action/init@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8
uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8
uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8
uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6

View file

@ -21,7 +21,7 @@ jobs:
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts

View file

@ -26,7 +26,7 @@ jobs:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # tag=v2.3.3
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # tag=v2.4.0
with:
results_file: results.sarif
results_format: sarif
@ -37,7 +37,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # tag=v4.3.3
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # tag=v4.4.0
with:
name: SARIF file
path: results.sarif
@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # tag=v3.25.8
uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # tag=v3.26.6
with:
sarif_file: results.sarif

31
.github/workflows/stale.yml vendored Normal file
View file

@ -0,0 +1,31 @@
name: Stale Check
on:
workflow_dispatch: {}
schedule:
- cron: '16 22 * * *'
permissions:
issues: write
pull-requests: write
jobs:
stale:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
runs-on: ubuntu-latest
steps:
- uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
# opt out of defaults to avoid marking issues as stale and closing them
# https://github.com/actions/stale#days-before-close
# https://github.com/actions/stale#days-before-stale
days-before-stale: -1
days-before-close: -1
# Setting it to empty string to skip comments.
# https://github.com/actions/stale#stale-pr-message
# https://github.com/actions/stale#stale-issue-message
stale-pr-message: ''
stale-issue-message: ''
operations-per-run: 30
# override days-before-stale, for only marking the pull requests as stale
days-before-pr-stale: 60
stale-pr-label: stale
exempt-pr-labels: keepalive

2
.gitignore vendored
View file

@ -22,7 +22,7 @@ benchmark.txt
/documentation/examples/remote_storage/example_write_adapter/example_write_adapter
npm_licenses.tar.bz2
/web/ui/static/react
/web/ui/static
/vendor
/.build

View file

@ -1,12 +1,5 @@
run:
timeout: 15m
skip-files:
# Skip autogenerated files.
- ^.*\.(pb|y)\.go$
skip-dirs:
# Copied it from a different source
- storage/remote/otlptranslator/prometheusremotewrite
- storage/remote/otlptranslator/prometheus
output:
sort-results: true
@ -32,8 +25,34 @@ linters:
- loggercheck
issues:
max-issues-per-linter: 0
max-same-issues: 0
# The default exclusions are too aggressive. For one, they
# essentially disable any linting on doc comments. We disable
# default exclusions here and add exclusions fitting our codebase
# further down.
exclude-use-default: false
exclude-files:
# Skip autogenerated files.
- ^.*\.(pb|y)\.go$
exclude-dirs:
# Copied it from a different source.
- storage/remote/otlptranslator/prometheusremotewrite
- storage/remote/otlptranslator/prometheus
exclude-rules:
- linters:
- errcheck
# Taken from the default exclusions (that are otherwise disabled above).
text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked
- linters:
- govet
# We use many Seek methods that do not follow the usual pattern.
text: "stdmethods: method Seek.* should have signature Seek"
- linters:
- revive
# We have stopped at some point to write doc comments on exported symbols.
# TODO(beorn7): Maybe we should enforce this again? There are ~500 offenders right now.
text: exported (.+) should have comment( \(or a comment on this block\))? or be unexported
- linters:
- gocritic
text: "appendAssign"
@ -94,15 +113,14 @@ linters-settings:
errorf: false
revive:
# By default, revive will enable only the linting rules that are named in the configuration file.
# So, it's needed to explicitly set in configuration all required rules.
# The following configuration enables all the rules from the defaults.toml
# https://github.com/mgechev/revive/blob/master/defaults.toml
# So, it's needed to explicitly enable all required rules here.
rules:
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
- name: blank-imports
- name: comment-spacings
- name: context-as-argument
arguments:
# allow functions with test or bench signatures
# Allow functions with test or bench signatures.
- allowTypesBefore: "*testing.T,testing.TB"
- name: context-keys-type
- name: dot-imports
@ -118,6 +136,8 @@ linters-settings:
- name: increment-decrement
- name: indent-error-flow
- name: package-comments
# TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them.
disabled: true
- name: range
- name: receiver-naming
- name: redefines-builtin-id

View file

@ -1,7 +1,7 @@
go:
# Whenever the Go version is updated here,
# .github/workflows should also be updated.
version: 1.22
version: 1.23
repository:
path: github.com/prometheus/prometheus
build:
@ -28,8 +28,6 @@ tarball:
# Whenever there are new files to include in the tarball,
# remember to make sure the new files will be generated after `make build`.
files:
- consoles
- console_libraries
- documentation/examples/prometheus.yml
- LICENSE
- NOTICE

View file

@ -2,6 +2,119 @@
## unreleased
* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930
* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910
## 3.0.0-beta.0 / 2024-09-05
Release 3.0.0-beta.0 includes new features such as a brand new UI and UTF-8 support enabled by default. As a new major version, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. Most users should be able to try this release out of the box without any configuration changes.
As is traditional with a beta release, we do **not** recommend users install 3.0.0-beta on critical production systems, but we do want everyone to test it out and find bugs.
* [CHANGE] UI: The old web UI has been replaced by a completely new one that is less cluttered and adds a few new features (PromLens-style tree view, better metrics explorer, "Explain" tab). However, it is still missing some features of the old UI (notably, exemplar display and heatmaps). To switch back to the old UI, you can use the feature flag `--enable-feature=old-ui` for the time being. #14872
* [CHANGE] PromQL: Range selectors and the lookback delta are now left-open, i.e. a sample coinciding with the lower time limit is excluded rather than included. #13904
* [CHANGE] Kubernetes SD: Remove support for `discovery.k8s.io/v1beta1` API version of EndpointSlice. This version is no longer served as of Kubernetes v1.25. #14365
* [CHANGE] Kubernetes SD: Remove support for `networking.k8s.io/v1beta1` API version of Ingress. This version is no longer served as of Kubernetes v1.22. #14365
* [CHANGE] UTF-8: Enable UTF-8 support by default. Prometheus now allows all UTF-8 characters in metric and label names. The corresponding `utf8-name` feature flag has been removed. #14705
* [CHANGE] Console: Remove example files for the console feature. Users can continue using the console feature by supplying their own JavaScript and templates. #14807
* [CHANGE] SD: Enable the new service discovery manager by default. This SD manager does not restart unchanged discoveries upon reloading. This makes reloads faster and reduces pressure on service discoveries' sources. The corresponding `new-service-discovery-manager` feature flag has been removed. #14770
* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747
* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526
* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643
* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875
* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029
## 2.55.0-rc.0 / 2024-09-20
* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727
* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769
* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817
* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815
* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734
* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200
* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346
* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403
* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506
* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706
* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612
* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379
* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450
* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477
* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655
* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621
* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413
* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816
* [ENHANCEMENT] API: Support multiple listening addresses. #14665
* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934
* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948
* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729
* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147
* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622
* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810
* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766
* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716
* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821
* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042
## 2.54.1 / 2024-08-27
* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685
* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654
* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538
* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514
* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605
## 2.54.0 / 2024-08-09
Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/).
This is experimental at this time and may still change.
Remote-write v2 is enabled by default, but can be disabled via feature-flag `web.remote-write-receiver.accepted-protobuf-messages`.
* [CHANGE] Remote-Write: `highest_timestamp_in_seconds` and `queue_highest_sent_timestamp_seconds` metrics now initialized to 0. #14437
* [CHANGE] API: Split warnings from info annotations in API response. #14327
* [FEATURE] Remote-Write: Version 2.0 experimental, plus metadata in WAL via feature flag `metadata-wal-records` (defaults on). #14395,#14427,#14444
* [FEATURE] PromQL: add limitk() and limit_ratio() aggregation operators. #12503
* [ENHANCEMENT] PromQL: Accept underscores in literal numbers, e.g. 1_000_000 for 1 million. #12821
* [ENHANCEMENT] PromQL: float literal numbers and durations are now interchangeable (experimental). Example: `time() - my_timestamp > 10m`. #9138
* [ENHANCEMENT] PromQL: use Kahan summation for sum(). #14074,#14362
* [ENHANCEMENT] PromQL (experimental native histograms): Optimize `histogram_count` and `histogram_sum` functions. #14097
* [ENHANCEMENT] TSDB: Better support for out-of-order experimental native histogram samples. #14438
* [ENHANCEMENT] TSDB: Optimise seek within index. #14393
* [ENHANCEMENT] TSDB: Optimise deletion of stale series. #14307
* [ENHANCEMENT] TSDB: Reduce locking to optimise adding and removing series. #13286,#14286
* [ENHANCEMENT] TSDB: Small optimisation: streamline special handling for out-of-order data. #14396,#14584
* [ENHANCEMENT] Regexps: Optimize patterns with multiple prefixes. #13843,#14368
* [ENHANCEMENT] Regexps: Optimize patterns containing multiple literal strings. #14173
* [ENHANCEMENT] AWS SD: expose Primary IPv6 addresses as __meta_ec2_primary_ipv6_addresses. #14156
* [ENHANCEMENT] Docker SD: add MatchFirstNetwork for containers with multiple networks. #10490
* [ENHANCEMENT] OpenStack SD: Use `flavor.original_name` if available. #14312
* [ENHANCEMENT] UI (experimental native histograms): more accurate representation. #13680,#14430
* [ENHANCEMENT] Agent: `out_of_order_time_window` config option now applies to agent. #14094
* [ENHANCEMENT] Notifier: Send any outstanding Alertmanager notifications when shutting down. #14290
* [ENHANCEMENT] Rules: Add label-matcher support to Rules API. #10194
* [ENHANCEMENT] HTTP API: Add url to message logged on error while sending response. #14209
* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584
* [BUGFIX] CLI: escape `|` characters when generating docs. #14420
* [BUGFIX] PromQL (experimental native histograms): Fix some binary operators between native histogram values. #14454
* [BUGFIX] TSDB: LabelNames API could fail during compaction. #14279
* [BUGFIX] TSDB: Fix rare issue where pending OOO read can be left dangling if creating querier fails. #14341
* [BUGFIX] TSDB: fix check for context cancellation in LabelNamesFor. #14302
* [BUGFIX] Rules: Fix rare panic on reload. #14366
* [BUGFIX] Config: In YAML marshalling, do not output a regexp field if it was never set. #14004
* [BUGFIX] Remote-Write: reject samples with future timestamps. #14304
* [BUGFIX] Remote-Write: Fix data corruption in remote write if max_sample_age is applied. #14078
* [BUGFIX] Notifier: Fix Alertmanager discovery not updating under heavy load. #14174
* [BUGFIX] Regexes: some Unicode characters were not matched by case-insensitive comparison. #14170,#14299
* [BUGFIX] Remote-Read: Resolve occasional segmentation fault on query. #14515
## 2.53.1 / 2024-07-10
Fix a bug which would drop samples in remote-write if the sending flow stalled
for longer than it takes to write one "WAL segment". How long this takes depends on the size
of your Prometheus; as a rough guide with 10 million series it is about 2-3 minutes.
* [BUGFIX] Remote-write: stop dropping samples in catch-up #14446
## 2.53.0 / 2024-06-16
This release changes the default for GOGC, the Go runtime control for the trade-off between excess memory use and CPU usage. We have found that Prometheus operates with minimal additional CPU usage, but greatly reduced memory by adjusting the upstream Go default from 100 to 75.
@ -40,7 +153,7 @@ This release changes the default for GOGC, the Go runtime control for the trade-
* [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754
* [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772
* [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823
* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846
* [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667
* [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852
@ -577,7 +690,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2
## 2.33.0 / 2022-01-29
* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121
* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121
* [CHANGE] Web: Promote remote-write-receiver to stable. #10119
* [FEATURE] Config: Add `stripPort` template function. #10002
* [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045
@ -814,7 +927,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec.
* [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682
* [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659
* [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790
* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723
* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723
* [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737
* [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766
@ -1740,7 +1853,7 @@ information, read the announcement blog post and migration guide.
## 1.7.0 / 2017-06-06
* [CHANGE] Compress remote storage requests and responses with unframed/raw snappy.
* [CHANGE] Properly ellide secrets in config.
* [CHANGE] Properly elide secrets in config.
* [FEATURE] Add OpenStack service discovery.
* [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces.
* [FEATURE] Add metric for discovered number of Alertmanagers.

View file

@ -8,21 +8,16 @@ ARG OS="linux"
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
COPY console_libraries/ /usr/share/prometheus/console_libraries/
COPY consoles/ /usr/share/prometheus/consoles/
COPY LICENSE /LICENSE
COPY NOTICE /NOTICE
COPY npm_licenses.tar.bz2 /npm_licenses.tar.bz2
WORKDIR /prometheus
RUN ln -s /usr/share/prometheus/console_libraries /usr/share/prometheus/consoles/ /etc/prometheus/ && \
chown -R nobody:nobody /etc/prometheus /prometheus
RUN chown -R nobody:nobody /etc/prometheus /prometheus
USER nobody
EXPOSE 9090
VOLUME [ "/prometheus" ]
ENTRYPOINT [ "/bin/prometheus" ]
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
"--storage.tsdb.path=/prometheus", \
"--web.console.libraries=/usr/share/prometheus/console_libraries", \
"--web.console.templates=/usr/share/prometheus/consoles" ]
"--storage.tsdb.path=/prometheus" ]

View file

@ -13,13 +13,12 @@ Maintainers for specific parts of the codebase:
* `k8s`: Frederic Branczyk (<fbranczyk@gmail.com> / @brancz)
* `documentation`
* `prometheus-mixin`: Matthias Loibl (<mail@matthiasloibl.com> / @metalmatze)
* `model/histogram` and other code related to native histograms: Björn Rabenstein (<beorn@grafana.com> / @beorn7),
* `model/histogram` and other code related to native histograms: Björn Rabenstein (<beorn@grafana.com> / @beorn7),
George Krajcsovits (<gyorgy.krajcsovits@grafana.com> / @krajorama)
* `storage`
* `remote`: Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( <npazosmendez@gmail.com> / @npazosmendez), Alex Greenbank ( <alex.greenbank@grafana.com> / @alexgreenbank)
* `otlptranslator`: Arve Knudsen (<arve.knudsen@gmail.com> / @aknuds1), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `tsdb`: Ganesh Vernekar (<ganesh@grafana.com> / @codesome), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `agent`: Robert Fratto (<robert.fratto@grafana.com> / @rfratto)
* `web`
* `ui`: Julius Volz (<julius.volz@gmail.com> / @juliusv)
* `module`: Augustin Husson (<husson.augustin@gmail.com> @nexucis)

View file

@ -30,6 +30,11 @@ include Makefile.common
DOCKER_IMAGE_NAME ?= prometheus
# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set
ifdef PREBUILT_ASSETS_STATIC_DIR
SKIP_UI_BUILD = true
endif
.PHONY: update-npm-deps
update-npm-deps:
@echo ">> updating npm dependencies"
@ -42,13 +47,17 @@ upgrade-npm-deps:
.PHONY: ui-bump-version
ui-bump-version:
version=$$(sed s/2/0/ < VERSION) && ./scripts/ui_release.sh --bump-version "$${version}"
version=$$(./scripts/get_module_version.sh) && ./scripts/ui_release.sh --bump-version "$${version}"
cd web/ui && npm install
git add "./web/ui/package-lock.json" "./**/package.json"
.PHONY: ui-install
ui-install:
cd $(UI_PATH) && npm install
# The old React app has been separated from the npm workspaces setup to avoid
# issues with conflicting dependencies. This is a temporary solution until the
# new Mantine-based UI is fully integrated and the old app can be removed.
cd $(UI_PATH)/react-app && npm install
.PHONY: ui-build
ui-build:
@ -65,10 +74,30 @@ ui-test:
.PHONY: ui-lint
ui-lint:
cd $(UI_PATH) && npm run lint
# The old React app has been separated from the npm workspaces setup to avoid
# issues with conflicting dependencies. This is a temporary solution until the
# new Mantine-based UI is fully integrated and the old app can be removed.
cd $(UI_PATH)/react-app && npm run lint
.PHONY: assets
ifndef SKIP_UI_BUILD
assets: ui-install ui-build
.PHONY: npm_licenses
npm_licenses: ui-install
@echo ">> bundling npm licenses"
rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
ln -s . npm_licenses
find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
rm -f npm_licenses
else
assets:
@echo '>> skipping assets build, pre-built assets provided'
npm_licenses:
@echo '>> skipping assets npm licenses, pre-built assets provided'
endif
.PHONY: assets-compress
assets-compress: assets
@echo '>> compressing assets'
@ -117,14 +146,6 @@ else
test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
.PHONY: npm_licenses
npm_licenses: ui-install
@echo ">> bundling npm licenses"
rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
ln -s . npm_licenses
find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
rm -f npm_licenses
.PHONY: tarball
tarball: npm_licenses common-tarball

View file

@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v1.59.1
GOLANGCI_LINT_VERSION ?= v1.60.2
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
@ -275,3 +275,9 @@ $(1)_precheck:
exit 1; \
fi
endef
govulncheck: install-govulncheck
govulncheck ./...
install-govulncheck:
command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest

View file

@ -12,9 +12,10 @@ examples and guides.</p>
[![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub]
[![Go Report Card](https://goreportcard.com/badge/github.com/prometheus/prometheus)](https://goreportcard.com/report/github.com/prometheus/prometheus)
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486)
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus)
[![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/prometheus/badge)](https://clomonitor.io/projects/cncf/prometheus)
[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/prometheus/prometheus)
[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/prometheus.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:prometheus)
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://securityscorecards.dev/viewer/?uri=github.com/prometheus/prometheus)
</div>
@ -114,7 +115,7 @@ The Makefile provides several targets:
Prometheus is bundled with many service discovery plugins.
When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml)
file to disable some service discoveries. The file is a yaml-formated list of go
file to disable some service discoveries. The file is a yaml-formatted list of go
import path that will be built into the Prometheus binary.
After you have changed the file, you

View file

@ -57,7 +57,9 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.50 | 2024-01-16 | Augustin Husson (GitHub: @nexucis) |
| v2.51 | 2024-03-07 | Bryan Boreham (GitHub: @bboreham) |
| v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) |
| v2.53 | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) |
| v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) |
| v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) |
| v2.55 | 2024-09-17 | Bryan Boreham (GitHub: @bboreham) |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@ -186,7 +188,7 @@ the Prometheus server, we use major version zero releases for the libraries.
Tag the new library release via the following commands:
```bash
tag="v$(sed s/2/0/ < VERSION)"
tag="v$(./scripts/get_module_version.sh)"
git tag -s "${tag}" -m "${tag}"
git push origin "${tag}"
```

48
SECURITY-INSIGHTS.yml Normal file
View file

@ -0,0 +1,48 @@
header:
schema-version: '1.0.0'
expiration-date: '2025-07-30T01:00:00.000Z'
last-updated: '2024-07-30'
last-reviewed: '2024-07-30'
project-url: https://github.com/prometheus/prometheus
changelog: https://github.com/prometheus/prometheus/blob/main/CHANGELOG.md
license: https://github.com/prometheus/prometheus/blob/main/LICENSE
project-lifecycle:
status: active
bug-fixes-only: false
core-maintainers:
- https://github.com/prometheus/prometheus/blob/main/MAINTAINERS.md
contribution-policy:
accepts-pull-requests: true
accepts-automated-pull-requests: true
dependencies:
third-party-packages: true
dependencies-lists:
- https://github.com/prometheus/prometheus/blob/main/go.mod
- https://github.com/prometheus/prometheus/blob/main/web/ui/package.json
env-dependencies-policy:
policy-url: https://github.com/prometheus/prometheus/blob/main/CONTRIBUTING.md#dependency-management
distribution-points:
- https://github.com/prometheus/prometheus/releases
documentation:
- https://prometheus.io/docs/introduction/overview/
security-contacts:
- type: email
value: prometheus-team@googlegroups.com
security-testing:
- tool-type: sca
tool-name: Dependabot
tool-version: latest
integration:
ad-hoc: false
ci: true
before-release: true
- tool-type: sast
tool-name: CodeQL
tool-version: latest
integration:
ad-hoc: false
ci: true
before-release: true
vulnerability-reporting:
accepts-vulnerability-reports: true
security-policy: https://github.com/prometheus/prometheus/security/policy

View file

@ -1 +1 @@
2.53.0
3.0.0-beta.0

View file

@ -58,8 +58,6 @@ import (
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/legacymanager"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@ -103,6 +101,8 @@ var (
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
prometheus.MustRegister(versioncollector.NewCollector(strings.ReplaceAll(appName, "-", "_")))
var err error
@ -152,13 +152,16 @@ type flagConfig struct {
queryConcurrency int
queryMaxSamples int
RemoteFlushDeadline model.Duration
nameEscapingScheme string
enableAutoReload bool
autoReloadInterval model.Duration
featureList []string
memlimitRatio float64
// These options are extracted from featureList
// for ease of use.
enableExpandExternalLabels bool
enableNewSDManager bool
enablePerStepStats bool
enableAutoGOMAXPROCS bool
enableAutoGOMEMLIMIT bool
@ -168,6 +171,8 @@ type flagConfig struct {
corsRegexString string
promlogConfig promlog.Config
promqlEnableDelayedNameRemoval bool
}
// setFeatureListOptions sets the corresponding options from the featureList.
@ -176,9 +181,6 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
case "remote-write-receiver":
c.web.EnableRemoteWriteReceiver = true
level.Warn(logger).Log("msg", "Remote write receiver enabled via feature flag remote-write-receiver. This is DEPRECATED. Use --web.enable-remote-write-receiver.")
case "otlp-write-receiver":
c.web.EnableOTLPWriteReceiver = true
level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled")
@ -194,18 +196,21 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "extra-scrape-metrics":
c.scrape.ExtraMetrics = true
level.Info(logger).Log("msg", "Experimental additional scrape metrics enabled")
case "new-service-discovery-manager":
c.enableNewSDManager = true
level.Info(logger).Log("msg", "Experimental service discovery manager")
case "agent":
agentMode = true
level.Info(logger).Log("msg", "Experimental agent mode enabled.")
case "metadata-wal-records":
c.scrape.AppendMetadata = true
level.Info(logger).Log("msg", "Experimental metadata records in WAL enabled, required for remote write 2.0")
case "promql-per-step-stats":
c.enablePerStepStats = true
level.Info(logger).Log("msg", "Experimental per-step statistics reporting")
case "auto-gomaxprocs":
c.enableAutoGOMAXPROCS = true
level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota")
case "auto-reload-config":
c.enableAutoReload = true
if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 {
c.autoReloadInterval, _ = model.ParseDuration("1s")
}
level.Info(logger).Log("msg", fmt.Sprintf("Enabled automatic configuration file reloading. Checking for configuration changes every %s.", c.autoReloadInterval))
case "auto-gomemlimit":
c.enableAutoGOMEMLIMIT = true
level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit")
@ -225,16 +230,26 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "ooo-native-histograms":
c.tsdb.EnableOOONativeHistograms = true
level.Info(logger).Log("msg", "Experimental out-of-order native histogram ingestion enabled. This will only take effect if OutOfOrderTimeWindow is > 0 and if EnableNativeHistograms = true")
case "created-timestamp-zero-ingestion":
c.scrape.EnableCreatedTimestampZeroIngestion = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "delayed-compaction":
c.tsdb.EnableDelayedCompaction = true
level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.")
case "promql-delayed-name-removal":
c.promqlEnableDelayedNameRemoval = true
level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.")
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o)
case "old-ui":
c.web.UseOldUI = true
level.Info(logger).Log("msg", "Serving previous version of the Prometheus web UI.")
default:
level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o)
}
@ -250,11 +265,6 @@ func main() {
runtime.SetMutexProfileFraction(20)
}
var (
oldFlagRetentionDuration model.Duration
newFlagRetentionDuration model.Duration
)
// Unregister the default GoCollector, and reregister with our defaults.
if prometheus.Unregister(collectors.NewGoCollector()) {
prometheus.MustRegister(
@ -287,8 +297,11 @@ func main() {
a.Flag("config.file", "Prometheus configuration file path.").
Default("prometheus.yml").StringVar(&cfg.configFile)
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry.").
Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress)
a.Flag("config.auto-reload-interval", "Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes.").
Default("30s").SetValue(&cfg.autoReloadInterval)
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated.").
Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses)
a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory").
Default("0.9").FloatVar(&cfg.memlimitRatio)
@ -302,7 +315,7 @@ func main() {
"Maximum duration before timing out read of the request, and closing idle connections.").
Default("5m").SetValue(&cfg.webTimeout)
a.Flag("web.max-connections", "Maximum number of simultaneous connections.").
a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners.").
Default("512").IntVar(&cfg.web.MaxConnections)
a.Flag("web.external-url",
@ -322,9 +335,15 @@ func main() {
a.Flag("web.enable-admin-api", "Enable API endpoints for admin control actions.").
Default("false").BoolVar(&cfg.web.EnableAdminAPI)
// TODO(bwplotka): Consider allowing those remote receive flags to be changed in config.
// See https://github.com/prometheus/prometheus/issues/14410
a.Flag("web.enable-remote-write-receiver", "Enable API endpoint accepting remote write requests.").
Default("false").BoolVar(&cfg.web.EnableRemoteWriteReceiver)
supportedRemoteWriteProtoMsgs := config.RemoteWriteProtoMsgs{config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2}
a.Flag("web.remote-write-receiver.accepted-protobuf-messages", fmt.Sprintf("List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: %v", supportedRemoteWriteProtoMsgs.String())).
Default(supportedRemoteWriteProtoMsgs.Strings()...).SetValue(rwProtoMsgFlagValue(&cfg.web.AcceptRemoteWriteProtoMsgs))
a.Flag("web.console.templates", "Path to the console template directory, available at /consoles.").
Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath)
@ -355,11 +374,8 @@ func main() {
"Size at which to split the tsdb WAL segment files. Example: 100MB").
Hidden().PlaceHolder("<bytes>").BytesVar(&cfg.tsdb.WALSegmentSize)
serverOnlyFlag(a, "storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead.").
SetValue(&oldFlagRetentionDuration)
serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms.").
SetValue(&newFlagRetentionDuration)
serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. If neither this flag nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms.").
SetValue(&cfg.tsdb.RetentionDuration)
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B.").
BytesVar(&cfg.tsdb.MaxBytes)
@ -367,11 +383,6 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
Default("false").BoolVar(&cfg.tsdb.NoLockfile)
// TODO: Remove in Prometheus 3.0.
var b bool
serverOnlyFlag(a, "storage.tsdb.allow-overlapping-blocks", "[DEPRECATED] This flag has no effect. Overlapping blocks are enabled by default now.").
Default("true").Hidden().BoolVar(&b)
serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL.").
Hidden().Default("true").BoolVar(&cfg.tsdb.WALCompression)
@ -448,9 +459,6 @@ func main() {
serverOnlyFlag(a, "alertmanager.drain-notification-queue-on-shutdown", "Send any outstanding Alertmanager notifications when shutting down. If false, any outstanding Alertmanager notifications will be dropped when shutting down.").
Default("true").BoolVar(&cfg.notifier.DrainOnShutdown)
// TODO: Remove in Prometheus 3.0.
alertmanagerTimeout := a.Flag("alertmanager.timeout", "[DEPRECATED] This flag has no effect.").Hidden().String()
serverOnlyFlag(a, "query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations and federation.").
Default("5m").SetValue(&cfg.lookbackDelta)
@ -466,9 +474,11 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode)
promlogflag.AddFlags(a, &cfg.promlogConfig)
a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error {
@ -494,6 +504,15 @@ func main() {
os.Exit(1)
}
if cfg.nameEscapingScheme != "" {
scheme, err := model.ToEscapingScheme(cfg.nameEscapingScheme)
if err != nil {
fmt.Fprintf(os.Stderr, `Invalid name escaping scheme: %q; Needs to be one of "values", "underscores", or "dots"`, cfg.nameEscapingScheme)
os.Exit(1)
}
model.NameEscapingScheme = scheme
}
if agentMode && len(serverOnlyFlags) > 0 {
fmt.Fprintf(os.Stderr, "The following flag(s) can not be used in agent mode: %q", serverOnlyFlags)
os.Exit(3)
@ -514,7 +533,7 @@ func main() {
localStoragePath = cfg.agentStoragePath
}
cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress)
cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddresses[0])
if err != nil {
fmt.Fprintln(os.Stderr, fmt.Errorf("parse external URL %q: %w", cfg.prometheusURL, err))
os.Exit(2)
@ -526,10 +545,6 @@ func main() {
os.Exit(2)
}
if *alertmanagerTimeout != "" {
level.Warn(logger).Log("msg", "The flag --alertmanager.timeout has no effect and will be removed in the future.")
}
// Throw error for invalid config before starting other components.
var cfgFile *config.Config
if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil {
@ -576,17 +591,6 @@ func main() {
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
if !agentMode {
// Time retention settings.
if oldFlagRetentionDuration != 0 {
level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.")
cfg.tsdb.RetentionDuration = oldFlagRetentionDuration
}
// When the new flag is set it takes precedence.
if newFlagRetentionDuration != 0 {
cfg.tsdb.RetentionDuration = newFlagRetentionDuration
}
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
level.Info(logger).Log("msg", "No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
@ -646,7 +650,7 @@ func main() {
var (
localStorage = &readyStorage{stats: tsdb.NewDBStats()}
scraper = &readyScrapeManager{}
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper)
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata)
fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage)
)
@ -658,8 +662,8 @@ func main() {
ctxScrape, cancelScrape = context.WithCancel(context.Background())
ctxNotify, cancelNotify = context.WithCancel(context.Background())
discoveryManagerScrape discoveryManager
discoveryManagerNotify discoveryManager
discoveryManagerScrape *discovery.Manager
discoveryManagerNotify *discovery.Manager
)
// Kubernetes client metrics are used by Kubernetes SD.
@ -679,47 +683,22 @@ func main() {
os.Exit(1)
}
if cfg.enableNewSDManager {
{
discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerScrape = discMgr
}
discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
if discoveryManagerScrape == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
{
discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
discoveryManagerNotify = discMgr
}
} else {
{
discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("scrape"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerScrape = discMgr
}
{
discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, legacymanager.Name("notify"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
discoveryManagerNotify = discMgr
}
discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
if discoveryManagerNotify == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
scrapeManager, err := scrape.NewManager(
&cfg.scrape,
log.With(logger, "component", "scrape manager"),
func(s string) (log.Logger, error) { return logging.NewJSONFileLogger(s) },
fanoutStorage,
prometheus.DefaultRegisterer,
)
@ -769,9 +748,10 @@ func main() {
NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get,
// EnableAtModifier and EnableNegativeOffset have to be
// always on for regular PromQL as of Prometheus v2.33.
EnableAtModifier: true,
EnableNegativeOffset: true,
EnablePerStepStats: cfg.enablePerStepStats,
EnableAtModifier: true,
EnableNegativeOffset: true,
EnablePerStepStats: cfg.enablePerStepStats,
EnableDelayedNameRemoval: cfg.promqlEnableDelayedNameRemoval,
}
queryEngine = promql.NewEngine(opts)
@ -960,15 +940,21 @@ func main() {
})
}
listener, err := webHandler.Listener()
listeners, err := webHandler.Listeners()
if err != nil {
level.Error(logger).Log("msg", "Unable to start web listener", "err", err)
level.Error(logger).Log("msg", "Unable to start web listeners", "err", err)
if err := queryEngine.Close(); err != nil {
level.Warn(logger).Log("msg", "Closing query engine failed", "err", err)
}
os.Exit(1)
}
err = toolkit_web.Validate(*webConfig)
if err != nil {
level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err)
if err := queryEngine.Close(); err != nil {
level.Warn(logger).Log("msg", "Closing query engine failed", "err", err)
}
os.Exit(1)
}
@ -990,11 +976,14 @@ func main() {
case <-cancel:
reloadReady.Close()
}
if err := queryEngine.Close(); err != nil {
level.Warn(logger).Log("msg", "Closing query engine failed", "err", err)
}
return nil
},
func(err error) {
close(cancel)
webHandler.SetReady(false)
webHandler.SetReady(web.Stopping)
},
)
}
@ -1084,6 +1073,15 @@ func main() {
hup := make(chan os.Signal, 1)
signal.Notify(hup, syscall.SIGHUP)
cancel := make(chan struct{})
var checksum string
if cfg.enableAutoReload {
checksum, err = config.GenerateChecksum(cfg.configFile)
if err != nil {
level.Error(logger).Log("msg", "Failed to generate initial checksum for configuration file", "err", err)
}
}
g.Add(
func() error {
<-reloadReady.C
@ -1093,6 +1091,12 @@ func main() {
case <-hup:
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
level.Error(logger).Log("msg", "Error reloading config", "err", err)
} else if cfg.enableAutoReload {
if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil {
checksum = currentChecksum
} else {
level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
}
}
case rc := <-webHandler.Reload():
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
@ -1100,6 +1104,30 @@ func main() {
rc <- err
} else {
rc <- nil
if cfg.enableAutoReload {
if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil {
checksum = currentChecksum
} else {
level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
}
}
}
case <-time.Tick(time.Duration(cfg.autoReloadInterval)):
if !cfg.enableAutoReload {
continue
}
currentChecksum, err := config.GenerateChecksum(cfg.configFile)
if err != nil {
level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
} else if currentChecksum == checksum {
continue
}
level.Info(logger).Log("msg", "Configuration file change detected, reloading the configuration.")
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
level.Error(logger).Log("msg", "Error reloading config", "err", err)
} else {
checksum = currentChecksum
}
case <-cancel:
return nil
@ -1132,7 +1160,7 @@ func main() {
reloadReady.Close()
webHandler.SetReady(true)
webHandler.SetReady(web.Ready)
level.Info(logger).Log("msg", "Server is ready to receive web requests.")
<-cancel
return nil
@ -1257,7 +1285,7 @@ func main() {
// Web handler.
g.Add(
func() error {
if err := webHandler.Run(ctxWeb, listener, *webConfig); err != nil {
if err := webHandler.Run(ctxWeb, listeners, *webConfig); err != nil {
return fmt.Errorf("error starting web server: %w", err)
}
return nil
@ -1706,6 +1734,9 @@ type tsdbOptions struct {
MaxExemplars int64
EnableMemorySnapshotOnShutdown bool
EnableNativeHistograms bool
EnableDelayedCompaction bool
EnableOverlappingCompaction bool
EnableOOONativeHistograms bool
}
func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
@ -1725,8 +1756,10 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
MaxExemplars: opts.MaxExemplars,
EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown,
EnableNativeHistograms: opts.EnableNativeHistograms,
EnableOOONativeHistograms: opts.EnableOOONativeHistograms,
OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow,
EnableOverlappingCompaction: true,
EnableDelayedCompaction: opts.EnableDelayedCompaction,
EnableOverlappingCompaction: opts.EnableOverlappingCompaction,
}
}
@ -1759,11 +1792,38 @@ func (opts agentOptions) ToAgentOptions(outOfOrderTimeWindow int64) agent.Option
}
}
// discoveryManager interfaces the discovery manager. This is used to keep using
// the manager that restarts SD's on reload for a few releases until we feel
// the new manager can be enabled for all users.
type discoveryManager interface {
ApplyConfig(cfg map[string]discovery.Configs) error
Run() error
SyncCh() <-chan map[string][]*targetgroup.Group
// rwProtoMsgFlagParser is a custom parser for config.RemoteWriteProtoMsg enum.
type rwProtoMsgFlagParser struct {
msgs *[]config.RemoteWriteProtoMsg
}
func rwProtoMsgFlagValue(msgs *[]config.RemoteWriteProtoMsg) kingpin.Value {
return &rwProtoMsgFlagParser{msgs: msgs}
}
// IsCumulative is used by kingpin to tell if it's an array or not.
func (p *rwProtoMsgFlagParser) IsCumulative() bool {
return true
}
func (p *rwProtoMsgFlagParser) String() string {
ss := make([]string, 0, len(*p.msgs))
for _, t := range *p.msgs {
ss = append(ss, string(t))
}
return strings.Join(ss, ",")
}
func (p *rwProtoMsgFlagParser) Set(opt string) error {
t := config.RemoteWriteProtoMsg(opt)
if err := t.Validate(); err != nil {
return err
}
for _, prev := range *p.msgs {
if prev == t {
return fmt.Errorf("duplicated %v flag value, got %v already", t, *p.msgs)
}
}
*p.msgs = append(*p.msgs, t)
return nil
}

View file

@ -30,16 +30,23 @@ import (
"testing"
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/rules"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
const startupTime = 10 * time.Second
var (
@ -346,7 +353,7 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
}
func TestAgentSuccessfulStartup(t *testing.T) {
prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
require.NoError(t, prom.Start())
actualExitStatus := 0
@ -364,7 +371,7 @@ func TestAgentSuccessfulStartup(t *testing.T) {
}
func TestAgentFailedStartupWithServerFlag(t *testing.T) {
prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
output := bytes.Buffer{}
prom.Stderr = &output
@ -391,7 +398,7 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) {
}
func TestAgentFailedStartupWithInvalidConfig(t *testing.T) {
prom := exec.Command(promPath, "-test.main", "--enable-feature=agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
require.NoError(t, prom.Start())
actualExitStatus := 0
@ -429,7 +436,7 @@ func TestModeSpecificFlags(t *testing.T) {
args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"}
if tc.mode == "agent" {
args = append(args, "--enable-feature=agent", "--config.file="+agentConfig)
args = append(args, "--agent", "--config.file="+agentConfig)
} else {
args = append(args, "--config.file="+promConfig)
}
@ -499,3 +506,65 @@ func TestDocumentation(t *testing.T) {
require.Equal(t, string(expectedContent), generatedContent, "Generated content does not match documentation. Hint: run `make cli-documentation`.")
}
func TestRwProtoMsgFlagParser(t *testing.T) {
defaultOpts := config.RemoteWriteProtoMsgs{
config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2,
}
for _, tcase := range []struct {
args []string
expected []config.RemoteWriteProtoMsg
expectedErr error
}{
{
args: nil,
expected: defaultOpts,
},
{
args: []string{"--test-proto-msgs", "test"},
expectedErr: errors.New("unknown remote write protobuf message test, supported: prometheus.WriteRequest, io.prometheus.write.v2.Request"),
},
{
args: []string{"--test-proto-msgs", "io.prometheus.write.v2.Request"},
expected: config.RemoteWriteProtoMsgs{config.RemoteWriteProtoMsgV2},
},
{
args: []string{
"--test-proto-msgs", "io.prometheus.write.v2.Request",
"--test-proto-msgs", "io.prometheus.write.v2.Request",
},
expectedErr: errors.New("duplicated io.prometheus.write.v2.Request flag value, got [io.prometheus.write.v2.Request] already"),
},
{
args: []string{
"--test-proto-msgs", "io.prometheus.write.v2.Request",
"--test-proto-msgs", "prometheus.WriteRequest",
},
expected: config.RemoteWriteProtoMsgs{config.RemoteWriteProtoMsgV2, config.RemoteWriteProtoMsgV1},
},
{
args: []string{
"--test-proto-msgs", "io.prometheus.write.v2.Request",
"--test-proto-msgs", "prometheus.WriteRequest",
"--test-proto-msgs", "io.prometheus.write.v2.Request",
},
expectedErr: errors.New("duplicated io.prometheus.write.v2.Request flag value, got [io.prometheus.write.v2.Request prometheus.WriteRequest] already"),
},
} {
t.Run(strings.Join(tcase.args, ","), func(t *testing.T) {
a := kingpin.New("test", "")
var opt []config.RemoteWriteProtoMsg
a.Flag("test-proto-msgs", "").Default(defaultOpts.Strings()...).SetValue(rwProtoMsgFlagValue(&opt))
_, err := a.Parse(tcase.args)
if tcase.expectedErr != nil {
require.Error(t, err)
require.Equal(t, tcase.expectedErr, err)
} else {
require.NoError(t, err)
require.Equal(t, tcase.expected, opt)
}
})
}
}

View file

@ -0,0 +1,193 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bytes"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"os/exec"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
"github.com/prometheus/prometheus/util/testutil"
)
func TestScrapeFailureLogFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
// Tracks the number of requests made to the mock server.
var requestCount atomic.Int32
// Starts a server that always returns HTTP 500 errors.
mockServerAddress := startGarbageServer(t, &requestCount)
// Create a temporary directory for Prometheus configuration and logs.
tempDir := t.TempDir()
// Define file paths for the scrape failure log and Prometheus configuration.
// Like other files, the scrape failure log file should be relative to the
// config file. Therefore, we split the name we put in the file and the full
// path used to check the content of the file.
scrapeFailureLogFileName := "scrape_failure.log"
scrapeFailureLogFile := filepath.Join(tempDir, scrapeFailureLogFileName)
promConfigFile := filepath.Join(tempDir, "prometheus.yml")
// Step 1: Set up an initial Prometheus configuration that globally
// specifies a scrape failure log file.
promConfig := fmt.Sprintf(`
global:
scrape_interval: 500ms
scrape_failure_log_file: %s
scrape_configs:
- job_name: 'test_job'
static_configs:
- targets: ['%s']
`, scrapeFailureLogFileName, mockServerAddress)
err := os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
require.NoError(t, err, "Failed to write Prometheus configuration file")
// Start Prometheus with the generated configuration and a random port, enabling the lifecycle API.
port := testutil.RandomUnprivilegedPort(t)
params := []string{
"-test.main",
"--config.file=" + promConfigFile,
"--storage.tsdb.path=" + filepath.Join(tempDir, "data"),
fmt.Sprintf("--web.listen-address=127.0.0.1:%d", port),
"--web.enable-lifecycle",
}
prometheusProcess := exec.Command(promPath, params...)
prometheusProcess.Stdout = os.Stdout
prometheusProcess.Stderr = os.Stderr
err = prometheusProcess.Start()
require.NoError(t, err, "Failed to start Prometheus")
defer prometheusProcess.Process.Kill()
// Wait until the mock server receives at least two requests from Prometheus.
require.Eventually(t, func() bool {
return requestCount.Load() >= 2
}, 30*time.Second, 500*time.Millisecond, "Expected at least two requests to the mock server")
// Verify that the scrape failures have been logged to the specified file.
content, err := os.ReadFile(scrapeFailureLogFile)
require.NoError(t, err, "Failed to read scrape failure log")
require.Contains(t, string(content), "server returned HTTP status 500 Internal Server Error", "Expected scrape failure log entry not found")
// Step 2: Update the Prometheus configuration to remove the scrape failure
// log file setting.
promConfig = fmt.Sprintf(`
global:
scrape_interval: 1s
scrape_configs:
- job_name: 'test_job'
static_configs:
- targets: ['%s']
`, mockServerAddress)
err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
require.NoError(t, err, "Failed to update Prometheus configuration file")
// Reload Prometheus with the updated configuration.
reloadPrometheus(t, port)
// Count the number of lines in the scrape failure log file before any
// further requests.
preReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
// Wait for at least two more requests to the mock server to ensure
// Prometheus continues scraping.
requestsBeforeReload := requestCount.Load()
require.Eventually(t, func() bool {
return requestCount.Load() >= requestsBeforeReload+2
}, 30*time.Second, 500*time.Millisecond, "Expected two more requests to the mock server after configuration reload")
// Ensure that no new lines were added to the scrape failure log file after
// the configuration change.
require.Equal(t, preReloadLogLineCount, countLinesInFile(scrapeFailureLogFile), "No new lines should be added to the scrape failure log file after removing the log setting")
// Step 3: Re-add the scrape failure log file setting, but this time under
// scrape_configs, and reload Prometheus.
promConfig = fmt.Sprintf(`
global:
scrape_interval: 1s
scrape_configs:
- job_name: 'test_job'
scrape_failure_log_file: %s
static_configs:
- targets: ['%s']
`, scrapeFailureLogFileName, mockServerAddress)
err = os.WriteFile(promConfigFile, []byte(promConfig), 0o644)
require.NoError(t, err, "Failed to update Prometheus configuration file")
// Reload Prometheus with the updated configuration.
reloadPrometheus(t, port)
// Wait for at least two more requests to the mock server and verify that
// new log entries are created.
postReloadLogLineCount := countLinesInFile(scrapeFailureLogFile)
requestsBeforeReAddingLog := requestCount.Load()
require.Eventually(t, func() bool {
return requestCount.Load() >= requestsBeforeReAddingLog+2
}, 30*time.Second, 500*time.Millisecond, "Expected two additional requests after re-adding the log setting")
// Confirm that new lines were added to the scrape failure log file.
require.Greater(t, countLinesInFile(scrapeFailureLogFile), postReloadLogLineCount, "New lines should be added to the scrape failure log file after re-adding the log setting")
}
// reloadPrometheus sends a reload request to the Prometheus server to apply
// updated configurations.
func reloadPrometheus(t *testing.T, port int) {
resp, err := http.Post(fmt.Sprintf("http://127.0.0.1:%d/-/reload", port), "", nil)
require.NoError(t, err, "Failed to reload Prometheus")
require.Equal(t, http.StatusOK, resp.StatusCode, "Unexpected status code when reloading Prometheus")
}
// startGarbageServer sets up a mock server that returns a 500 Internal Server Error
// for all requests. It also increments the request count each time it's hit.
func startGarbageServer(t *testing.T, requestCount *atomic.Int32) string {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
requestCount.Inc()
w.WriteHeader(http.StatusInternalServerError)
}))
t.Cleanup(server.Close)
parsedURL, err := url.Parse(server.URL)
require.NoError(t, err, "Failed to parse mock server URL")
return parsedURL.Host
}
// countLinesInFile counts and returns the number of lines in the specified file.
func countLinesInFile(filePath string) int {
data, err := os.ReadFile(filePath)
if err != nil {
return 0 // Return 0 if the file doesn't exist or can't be read.
}
return bytes.Count(data, []byte{'\n'})
}

View file

@ -85,7 +85,7 @@ func getCompatibleBlockDuration(maxBlockDuration int64) int64 {
return blockDuration
}
func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool, customLabels map[string]string) (returnErr error) {
blockDuration := getCompatibleBlockDuration(maxBlockDuration)
mint = blockDuration * (mint / blockDuration)
@ -102,6 +102,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
nextSampleTs int64 = math.MaxInt64
)
lb := labels.NewBuilder(labels.EmptyLabels())
for t := mint; t <= maxt; t += blockDuration {
tsUpper := t + blockDuration
if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper {
@ -162,7 +164,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
l := labels.Labels{}
p.Metric(&l)
if _, err := app.Append(0, l, *ts, v); err != nil {
lb.Reset(l)
for name, value := range customLabels {
lb.Set(name, value)
}
lbls := lb.Labels()
if _, err := app.Append(0, lbls, *ts, v); err != nil {
return fmt.Errorf("add sample: %w", err)
}
@ -221,13 +229,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
return nil
}
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) (err error) {
p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps.
maxt, mint, err := getMinAndMaxTimestamps(p)
if err != nil {
return fmt.Errorf("getting min and max timestamp: %w", err)
}
if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet); err != nil {
if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet, customLabels); err != nil {
return fmt.Errorf("block creation: %w", err)
}
return nil

View file

@ -92,6 +92,7 @@ func TestBackfill(t *testing.T) {
Description string
MaxSamplesInAppender int
MaxBlockDuration time.Duration
Labels map[string]string
Expected struct {
MinTime int64
MaxTime int64
@ -636,6 +637,49 @@ http_requests_total{code="400"} 1024 7199
},
},
},
{
ToParse: `# HELP http_requests_total The total number of HTTP requests.
# TYPE http_requests_total counter
http_requests_total{code="200"} 1 1624463088.000
http_requests_total{code="200"} 2 1629503088.000
http_requests_total{code="200"} 3 1629863088.000
# EOF
`,
IsOk: true,
Description: "Sample with external labels.",
MaxSamplesInAppender: 5000,
MaxBlockDuration: 2048 * time.Hour,
Labels: map[string]string{"cluster_id": "123", "org_id": "999"},
Expected: struct {
MinTime int64
MaxTime int64
NumBlocks int
BlockDuration int64
Samples []backfillSample
}{
MinTime: 1624463088000,
MaxTime: 1629863088000,
NumBlocks: 2,
BlockDuration: int64(1458 * time.Hour / time.Millisecond),
Samples: []backfillSample{
{
Timestamp: 1624463088000,
Value: 1,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
},
{
Timestamp: 1629503088000,
Value: 2,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
},
{
Timestamp: 1629863088000,
Value: 3,
Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
},
},
},
},
{
ToParse: `# HELP rpc_duration_seconds A summary of the RPC duration in seconds.
# TYPE rpc_duration_seconds summary
@ -689,7 +733,7 @@ after_eof 1 2
outputDir := t.TempDir()
err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration)
err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration, test.Labels)
if !test.IsOk {
require.Error(t, err, test.Description)

View file

@ -62,6 +62,11 @@ import (
"github.com/prometheus/prometheus/util/documentcli"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
const (
successExitCode = 0
failureExitCode = 1
@ -204,6 +209,7 @@ func main() {
pushMetricsHeaders := pushMetricsCmd.Flag("header", "Prometheus remote write header.").StringMap()
testCmd := app.Command("test", "Unit testing.")
junitOutFile := testCmd.Flag("junit", "File path to store JUnit XML test results.").OpenFile(os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
testRulesRun := testRulesCmd.Flag("run", "If set, will only run test groups whose names match the regular expression. Can be specified multiple times.").Strings()
testRulesFiles := testRulesCmd.Arg(
@ -235,14 +241,14 @@ func main() {
tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.")
dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String()
dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.")
dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String()
dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String()
dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end.").String()
dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64()
dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64()
dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings()
@ -252,6 +258,7 @@ func main() {
importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool()
maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("<duration>").Duration()
openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.")
openMetricsLabels := openMetricsImportCmd.Flag("label", "Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value").StringMap()
importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String()
importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String()
importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.")
@ -323,8 +330,6 @@ func main() {
noDefaultScrapePort = true
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
fmt.Printf(" WARNING: Option for --enable-feature is a no-op after promotion to a stable feature: %q\n", o)
default:
fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o)
}
@ -378,7 +383,11 @@ func main() {
os.Exit(QueryLabels(serverURL, httpRoundTripper, *queryLabelsMatch, *queryLabelsName, *queryLabelsBegin, *queryLabelsEnd, p))
case testRulesCmd.FullCommand():
os.Exit(RulesUnitTest(
results := io.Discard
if *junitOutFile != nil {
results = *junitOutFile
}
os.Exit(RulesUnitTestResult(results,
promqltest.LazyLoaderOpts{
EnableAtModifier: true,
EnableNegativeOffset: true,
@ -403,7 +412,7 @@ func main() {
os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics)))
// TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand():
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration, *openMetricsLabels))
case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...)))
@ -466,7 +475,7 @@ func (ls lintConfig) lintDuplicateRules() bool {
return ls.all || ls.duplicateRules
}
// Check server status - healthy & ready.
// CheckServerStatus - healthy & ready.
func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
if serverURL.Scheme == "" {
serverURL.Scheme = "http"

View file

@ -31,12 +31,19 @@ import (
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/promql/promqltest"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
var promtoolPath = os.Args[0]
func TestMain(m *testing.M) {
@ -549,3 +556,46 @@ func TestCheckRulesWithRuleFiles(t *testing.T) {
require.Equal(t, lintErrExitCode, exitCode, "")
})
}
func TestTSDBDumpCommand(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
storage := promqltest.LoadedStorage(t, `
load 1m
metric{foo="bar"} 1 2 3
`)
t.Cleanup(func() { storage.Close() })
for _, c := range []struct {
name string
subCmd string
sandboxDirRoot string
}{
{
name: "dump",
subCmd: "dump",
},
{
name: "dump with sandbox dir root",
subCmd: "dump",
sandboxDirRoot: t.TempDir(),
},
{
name: "dump-openmetrics",
subCmd: "dump-openmetrics",
},
{
name: "dump-openmetrics with sandbox dir root",
subCmd: "dump-openmetrics",
sandboxDirRoot: t.TempDir(),
},
} {
t.Run(c.name, func(t *testing.T) {
args := []string{"-test.main", "tsdb", c.subCmd, storage.Dir()}
cmd := exec.Command(promtoolPath, args...)
require.NoError(t, cmd.Run())
})
}
}

View file

@ -31,7 +31,7 @@ import (
"github.com/prometheus/prometheus/util/fmtutil"
)
// Push metrics to a prometheus remote write (for testing purpose only).
// PushMetrics to a prometheus remote write (for testing purpose only).
func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int {
addressURL, err := url.Parse(url.String())
if err != nil {
@ -101,6 +101,7 @@ func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[strin
return successExitCode
}
// TODO(bwplotka): Add PRW 2.0 support.
func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]string) bool {
metricsData, err := fmtutil.MetricTextToWriteRequest(bytes.NewReader(data), labels)
if err != nil {
@ -116,7 +117,7 @@ func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]s
// Encode the request body into snappy encoding.
compressed := snappy.Encode(nil, raw)
err = client.Store(context.Background(), compressed, 0)
_, err = client.Store(context.Background(), compressed, 0)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return false

View file

@ -69,13 +69,13 @@ tests:
eval_time: 2m
exp_samples:
- labels: "test_histogram_repeat"
histogram: "{{count:2 sum:3 buckets:[2]}}"
histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}"
- expr: test_histogram_increase
eval_time: 2m
exp_samples:
- labels: "test_histogram_increase"
histogram: "{{count:4 sum:5.6 buckets:[4]}}"
histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}"
# Ensure a value is stale as soon as it is marked as such.
- expr: test_stale
@ -89,11 +89,11 @@ tests:
# Ensure lookback delta is respected, when a value is missing.
- expr: timestamp(test_missing)
eval_time: 5m
eval_time: 4m59s
exp_samples:
- value: 0
- expr: timestamp(test_missing)
eval_time: 5m1s
eval_time: 5m
exp_samples: []
# Minimal test case to check edge case of a single sample.
@ -113,7 +113,7 @@ tests:
- expr: count_over_time(fixed_data[1h])
eval_time: 1h
exp_samples:
- value: 61
- value: 60
- expr: timestamp(fixed_data)
eval_time: 1h
exp_samples:
@ -183,7 +183,7 @@ tests:
- expr: job:test:count_over_time1m
eval_time: 1m
exp_samples:
- value: 61
- value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 1m10s
@ -194,7 +194,7 @@ tests:
- expr: job:test:count_over_time1m
eval_time: 2m
exp_samples:
- value: 61
- value: 60
labels: 'job:test:count_over_time1m{job="test"}'
- expr: timestamp(job:test:count_over_time1m)
eval_time: 2m59s999ms

View file

@ -367,25 +367,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) {
fmt.Fprintf(tw,
"%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n",
meta.ULID,
getFormatedTime(meta.MinTime, humanReadable),
getFormatedTime(meta.MaxTime, humanReadable),
getFormattedTime(meta.MinTime, humanReadable),
getFormattedTime(meta.MaxTime, humanReadable),
time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond,
meta.Stats.NumSamples,
meta.Stats.NumChunks,
meta.Stats.NumSeries,
getFormatedBytes(b.Size(), humanReadable),
getFormattedBytes(b.Size(), humanReadable),
)
}
}
func getFormatedTime(timestamp int64, humanReadable bool) string {
func getFormattedTime(timestamp int64, humanReadable bool) string {
if humanReadable {
return time.Unix(timestamp/1000, 0).UTC().String()
}
return strconv.FormatInt(timestamp, 10)
}
func getFormatedBytes(bytes int64, humanReadable bool) string {
func getFormattedBytes(bytes int64, humanReadable bool) string {
if humanReadable {
return units.Base2Bytes(bytes).String()
}
@ -823,7 +823,7 @@ func checkErr(err error) int {
return 0
}
func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) int {
inputFile, err := fileutil.OpenMmapFile(path)
if err != nil {
return checkErr(err)
@ -834,7 +834,7 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB
return checkErr(fmt.Errorf("create output dir: %w", err))
}
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration, customLabels))
}
func displayHistogram(dataType string, datas []int, total int) {
@ -866,16 +866,16 @@ func displayHistogram(dataType string, datas []int, total int) {
fmt.Println()
}
func generateBucket(min, max int) (start, end, step int) {
s := (max - min) / 10
func generateBucket(minVal, maxVal int) (start, end, step int) {
s := (maxVal - minVal) / 10
step = 10
for step < s && step <= 10000 {
step *= 10
}
start = min - min%step
end = max - max%step + step
start = minVal - minVal%step
end = maxVal - maxVal%step + step
return
}

View file

@ -20,6 +20,7 @@ import (
"math"
"os"
"runtime"
"slices"
"strings"
"testing"
"time"
@ -54,7 +55,7 @@ func TestGenerateBucket(t *testing.T) {
}
// getDumpedSamples dumps samples and returns them.
func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string {
func getDumpedSamples(t *testing.T, databasePath, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) string {
t.Helper()
oldStdout := os.Stdout
@ -63,8 +64,8 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin
err := dumpSamples(
context.Background(),
path,
t.TempDir(),
databasePath,
sandboxDirRoot,
mint,
maxt,
match,
@ -95,13 +96,15 @@ func TestTSDBDump(t *testing.T) {
heavy_metric{foo="bar"} 5 4 3 2 1
heavy_metric{foo="foo"} 5 4 3 2 1
`)
t.Cleanup(func() { storage.Close() })
tests := []struct {
name string
mint int64
maxt int64
match []string
expectedDump string
name string
mint int64
maxt int64
sandboxDirRoot string
match []string
expectedDump string
}{
{
name: "default match",
@ -110,6 +113,14 @@ func TestTSDBDump(t *testing.T) {
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "default match with sandbox dir root set",
mint: math.MinInt64,
maxt: math.MaxInt64,
sandboxDirRoot: t.TempDir(),
match: []string{"{__name__=~'(?s:.*)'}"},
expectedDump: "testdata/dump-test-1.prom",
},
{
name: "same matcher twice",
mint: math.MinInt64,
@ -148,28 +159,51 @@ func TestTSDBDump(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.mint, tt.maxt, tt.match, formatSeriesSet)
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, tt.mint, tt.maxt, tt.match, formatSeriesSet)
expectedMetrics, err := os.ReadFile(tt.expectedDump)
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
// even though in case of one matcher samples are not sorted, the order in the cases above should stay the same.
require.Equal(t, string(expectedMetrics), dumpedMetrics)
// Sort both, because Prometheus does not guarantee the output order.
require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
})
}
}
func sortLines(buf string) string {
lines := strings.Split(buf, "\n")
slices.Sort(lines)
return strings.Join(lines, "\n")
}
func TestTSDBDumpOpenMetrics(t *testing.T) {
storage := promqltest.LoadedStorage(t, `
load 1m
my_counter{foo="bar", baz="abc"} 1 2 3 4 5
my_gauge{bar="foo", abc="baz"} 9 8 0 4 7
`)
t.Cleanup(func() { storage.Close() })
expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom")
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
dumpedMetrics := getDumpedSamples(t, storage.Dir(), math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
require.Equal(t, string(expectedMetrics), dumpedMetrics)
tests := []struct {
name string
sandboxDirRoot string
}{
{
name: "default match",
},
{
name: "default match with sandbox dir root set",
sandboxDirRoot: t.TempDir(),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
expectedMetrics, err := os.ReadFile("testdata/dump-openmetrics-test.prom")
require.NoError(t, err)
expectedMetrics = normalizeNewLine(expectedMetrics)
dumpedMetrics := getDumpedSamples(t, storage.Dir(), tt.sandboxDirRoot, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
require.Equal(t, sortLines(string(expectedMetrics)), sortLines(dumpedMetrics))
})
}
}
func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
@ -179,7 +213,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
dbDir := t.TempDir()
// Import samples from OM format
err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour)
err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour, map[string]string{})
require.NoError(t, err)
db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil)
require.NoError(t, err)
@ -188,7 +222,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {
})
// Dump the blocks into OM format
dumpedMetrics := getDumpedSamples(t, dbDir, math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
dumpedMetrics := getDumpedSamples(t, dbDir, "", math.MinInt64, math.MaxInt64, []string{"{__name__=~'(?s:.*)'}"}, formatSeriesSetOpenMetrics)
// Should get back the initial metrics.
require.Equal(t, string(initialMetrics), dumpedMetrics)

View file

@ -18,6 +18,7 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sort"
@ -29,9 +30,10 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/grafana/regexp"
"github.com/nsf/jsondiff"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
@ -39,12 +41,18 @@ import (
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/util/junitxml"
)
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...)
}
func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
failed := false
junit := &junitxml.JUnitXML{}
var run *regexp.Regexp
if runStrings != nil {
@ -52,7 +60,7 @@ func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, dif
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag); errs != nil {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -64,25 +72,30 @@ func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, dif
}
fmt.Println()
}
err := junit.WriteXML(results)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to write JUnit XML: %s\n", err)
}
if failed {
return failureExitCode
}
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool) []error {
fmt.Println("Unit Testing: ", filename)
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error {
b, err := os.ReadFile(filename)
if err != nil {
ts.Abort(err)
return []error{err}
}
var unitTestInp unitTestFile
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
ts.Abort(err)
return []error{err}
}
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
ts.Abort(err)
return []error{err}
}
@ -91,29 +104,38 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg
}
evalInterval := time.Duration(unitTestInp.EvaluationInterval)
ts.Settime(time.Now().Format("2006-01-02T15:04:05"))
// Giving number for groups mentioned in the file for ordering.
// Lower number group should be evaluated before higher number group.
groupOrderMap := make(map[string]int)
for i, gn := range unitTestInp.GroupEvalOrder {
if _, ok := groupOrderMap[gn]; ok {
return []error{fmt.Errorf("group name repeated in evaluation order: %s", gn)}
err := fmt.Errorf("group name repeated in evaluation order: %s", gn)
ts.Abort(err)
return []error{err}
}
groupOrderMap[gn] = i
}
// Testing.
var errs []error
for _, t := range unitTestInp.Tests {
for i, t := range unitTestInp.Tests {
if !matchesRun(t.TestGroupName, run) {
continue
}
testname := t.TestGroupName
if testname == "" {
testname = fmt.Sprintf("unnamed#%d", i)
}
tc := ts.Case(testname)
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...)
if ers != nil {
for _, e := range ers {
tc.Fail(e.Error())
}
errs = append(errs, ers...)
}
}

View file

@ -14,11 +14,15 @@
package main
import (
"bytes"
"encoding/xml"
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/util/junitxml"
)
func TestRulesUnitTest(t *testing.T) {
@ -125,13 +129,59 @@ func TestRulesUnitTest(t *testing.T) {
want: 0,
},
}
reuseFiles := []string{}
reuseCount := [2]int{}
for _, tt := range tests {
if (tt.queryOpts == promqltest.LazyLoaderOpts{
EnableNegativeOffset: true,
} || tt.queryOpts == promqltest.LazyLoaderOpts{
EnableAtModifier: true,
}) {
reuseFiles = append(reuseFiles, tt.args.files...)
reuseCount[tt.want] += len(tt.args.files)
}
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
}
t.Run("Junit xml output ", func(t *testing.T) {
var buf bytes.Buffer
if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 {
t.Errorf("RulesUnitTestResults() = %v, want 1", got)
}
var test junitxml.JUnitXML
output := buf.Bytes()
err := xml.Unmarshal(output, &test)
if err != nil {
fmt.Println("error in decoding XML:", err)
return
}
var total int
var passes int
var failures int
var cases int
total = len(test.Suites)
if total != len(reuseFiles) {
t.Errorf("JUnit output had %d testsuite elements; expected %d\n", total, len(reuseFiles))
}
for _, i := range test.Suites {
if i.FailureCount == 0 {
passes++
} else {
failures++
}
cases += len(i.Cases)
}
if total != passes+failures {
t.Errorf("JUnit output mismatch: Total testsuites (%d) does not equal the sum of passes (%d) and failures (%d).", total, passes, failures)
}
if cases < total {
t.Errorf("JUnit output had %d suites without test cases\n", total-cases)
}
})
}
func TestRulesUnitTestRun(t *testing.T) {

View file

@ -37,6 +37,7 @@ import (
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/storage/remote/azuread"
"github.com/prometheus/prometheus/storage/remote/googleiam"
)
var (
@ -66,6 +67,11 @@ var (
}
)
const (
LegacyValidationConfig = "legacy"
UTF8ValidationConfig = "utf8"
)
// Load parses the YAML input s into a Config.
func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
cfg := &Config{}
@ -180,6 +186,7 @@ var (
// DefaultRemoteWriteConfig is the default remote write configuration.
DefaultRemoteWriteConfig = RemoteWriteConfig{
RemoteTimeout: model.Duration(30 * time.Second),
ProtobufMessage: RemoteWriteProtoMsgV1,
QueueConfig: DefaultQueueConfig,
MetadataConfig: DefaultMetadataConfig,
HTTPClientConfig: config.DefaultHTTPClientConfig,
@ -214,6 +221,7 @@ var (
// DefaultRemoteReadConfig is the default remote read configuration.
DefaultRemoteReadConfig = RemoteReadConfig{
RemoteTimeout: model.Duration(1 * time.Minute),
ChunkedReadLimit: DefaultChunkedReadLimit,
HTTPClientConfig: config.DefaultHTTPClientConfig,
FilterExternalLabels: true,
}
@ -226,6 +234,9 @@ var (
DefaultExemplarsConfig = ExemplarsConfig{
MaxExemplars: 100000,
}
// DefaultOTLPConfig is the default OTLP configuration.
DefaultOTLPConfig = OTLPConfig{}
)
// Config is the top-level configuration for Prometheus's config files.
@ -241,6 +252,7 @@ type Config struct {
RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"`
RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"`
OTLPConfig OTLPConfig `yaml:"otlp,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
@ -279,7 +291,7 @@ func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
jobNames := map[string]string{}
for i, scfg := range c.ScrapeConfigs {
// We do these checks for library users that would not call Validate in
// We do these checks for library users that would not call validate in
// Unmarshal.
if err := scfg.Validate(c.GlobalConfig); err != nil {
return nil, err
@ -417,6 +429,8 @@ type GlobalConfig struct {
RuleQueryOffset model.Duration `yaml:"rule_query_offset,omitempty"`
// File to which PromQL queries are logged.
QueryLogFile string `yaml:"query_log_file,omitempty"`
// File to which scrape failures are logged.
ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The labels to add to any timeseries that this Prometheus instance scrapes.
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
// An uncompressed response body larger than this many bytes will cause the
@ -440,6 +454,8 @@ type GlobalConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
}
// ScrapeProtocol represents supported protocol for scraping metrics.
@ -465,6 +481,7 @@ var (
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
@ -514,6 +531,7 @@ func validateAcceptScrapeProtocols(sps []ScrapeProtocol) error {
// SetDirectory joins any relative file paths with dir.
func (c *GlobalConfig) SetDirectory(dir string) {
c.QueryLogFile = config.JoinDir(dir, c.QueryLogFile)
c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -576,6 +594,7 @@ func (c *GlobalConfig) isZero() bool {
c.EvaluationInterval == 0 &&
c.RuleQueryOffset == 0 &&
c.QueryLogFile == "" &&
c.ScrapeFailureLogFile == "" &&
c.ScrapeProtocols == nil
}
@ -619,6 +638,8 @@ type ScrapeConfig struct {
ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"`
// Whether to convert a scraped classic histogram into a native histogram with custom buckets.
ConvertClassicHistograms bool `yaml:"convert_classic_histograms,omitempty"`
// File to which scrape failures are logged.
ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The HTTP resource path on which to fetch metrics from targets.
MetricsPath string `yaml:"metrics_path,omitempty"`
// The URL scheme with which to fetch metrics from targets.
@ -652,6 +673,8 @@ type ScrapeConfig struct {
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// Allow UTF8 Metric and Label Names.
MetricNameValidationScheme string `yaml:"metric_name_validation_scheme,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -669,6 +692,7 @@ type ScrapeConfig struct {
func (c *ScrapeConfig) SetDirectory(dir string) {
c.ServiceDiscoveryConfigs.SetDirectory(dir)
c.HTTPClientConfig.SetDirectory(dir)
c.ScrapeFailureLogFile = config.JoinDir(dir, c.ScrapeFailureLogFile)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -750,6 +774,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c.KeepDroppedTargets == 0 {
c.KeepDroppedTargets = globalConfig.KeepDroppedTargets
}
if c.ScrapeFailureLogFile == "" {
c.ScrapeFailureLogFile = globalConfig.ScrapeFailureLogFile
}
if c.ScrapeProtocols == nil {
c.ScrapeProtocols = globalConfig.ScrapeProtocols
@ -758,6 +785,19 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
}
switch globalConfig.MetricNameValidationScheme {
case LegacyValidationConfig:
case "", UTF8ValidationConfig:
if model.NameValidationScheme != model.UTF8Validation {
panic("utf8 name validation requested but model.NameValidationScheme is not set to UTF8")
}
default:
return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
}
if c.MetricNameValidationScheme == "" {
c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
}
return nil
}
@ -1057,6 +1097,50 @@ func CheckTargetAddress(address model.LabelValue) error {
return nil
}
// RemoteWriteProtoMsg represents the known protobuf message for the remote write
// 1.0 and 2.0 specs.
type RemoteWriteProtoMsg string
// Validate returns error if the given reference for the protobuf message is not supported.
func (s RemoteWriteProtoMsg) Validate() error {
switch s {
case RemoteWriteProtoMsgV1, RemoteWriteProtoMsgV2:
return nil
default:
return fmt.Errorf("unknown remote write protobuf message %v, supported: %v", s, RemoteWriteProtoMsgs{RemoteWriteProtoMsgV1, RemoteWriteProtoMsgV2}.String())
}
}
type RemoteWriteProtoMsgs []RemoteWriteProtoMsg
func (m RemoteWriteProtoMsgs) Strings() []string {
ret := make([]string, 0, len(m))
for _, typ := range m {
ret = append(ret, string(typ))
}
return ret
}
func (m RemoteWriteProtoMsgs) String() string {
return strings.Join(m.Strings(), ", ")
}
var (
// RemoteWriteProtoMsgV1 represents the `prometheus.WriteRequest` protobuf
// message introduced in the https://prometheus.io/docs/specs/remote_write_spec/,
// which will eventually be deprecated.
//
// NOTE: This string is used for both HTTP header values and config value, so don't change
// this reference.
RemoteWriteProtoMsgV1 RemoteWriteProtoMsg = "prometheus.WriteRequest"
// RemoteWriteProtoMsgV2 represents the `io.prometheus.write.v2.Request` protobuf
// message introduced in https://prometheus.io/docs/specs/remote_write_spec_2_0/
//
// NOTE: This string is used for both HTTP header values and config value, so don't change
// this reference.
RemoteWriteProtoMsgV2 RemoteWriteProtoMsg = "io.prometheus.write.v2.Request"
)
// RemoteWriteConfig is the configuration for writing to remote storage.
type RemoteWriteConfig struct {
URL *config.URL `yaml:"url"`
@ -1066,6 +1150,9 @@ type RemoteWriteConfig struct {
Name string `yaml:"name,omitempty"`
SendExemplars bool `yaml:"send_exemplars,omitempty"`
SendNativeHistograms bool `yaml:"send_native_histograms,omitempty"`
// ProtobufMessage specifies the protobuf message to use against the remote
// receiver as specified in https://prometheus.io/docs/specs/remote_write_spec_2_0/
ProtobufMessage RemoteWriteProtoMsg `yaml:"protobuf_message,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -1074,6 +1161,7 @@ type RemoteWriteConfig struct {
MetadataConfig MetadataConfig `yaml:"metadata_config,omitempty"`
SigV4Config *sigv4.SigV4Config `yaml:"sigv4,omitempty"`
AzureADConfig *azuread.AzureADConfig `yaml:"azuread,omitempty"`
GoogleIAMConfig *googleiam.Config `yaml:"google_iam,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
@ -1100,6 +1188,10 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return err
}
if err := c.ProtobufMessage.Validate(); err != nil {
return fmt.Errorf("invalid protobuf_message value: %w", err)
}
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
// We cannot make it a pointer as the parser panics for inlined pointer structs.
// Thus we just do its validation here.
@ -1107,17 +1199,33 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return err
}
httpClientConfigAuthEnabled := c.HTTPClientConfig.BasicAuth != nil ||
c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
return validateAuthConfigs(c)
}
if httpClientConfigAuthEnabled && (c.SigV4Config != nil || c.AzureADConfig != nil) {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
// validateAuthConfigs validates that at most one of basic_auth, authorization, oauth2, sigv4, azuread or google_iam must be configured.
func validateAuthConfigs(c *RemoteWriteConfig) error {
var authConfigured []string
if c.HTTPClientConfig.BasicAuth != nil {
authConfigured = append(authConfigured, "basic_auth")
}
if c.SigV4Config != nil && c.AzureADConfig != nil {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
if c.HTTPClientConfig.Authorization != nil {
authConfigured = append(authConfigured, "authorization")
}
if c.HTTPClientConfig.OAuth2 != nil {
authConfigured = append(authConfigured, "oauth2")
}
if c.SigV4Config != nil {
authConfigured = append(authConfigured, "sigv4")
}
if c.AzureADConfig != nil {
authConfigured = append(authConfigured, "azuread")
}
if c.GoogleIAMConfig != nil {
authConfigured = append(authConfigured, "google_iam")
}
if len(authConfigured) > 1 {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, azuread or google_iam must be configured. Currently configured: %v", authConfigured)
}
return nil
}
@ -1136,7 +1244,7 @@ func validateHeadersForTracing(headers map[string]string) error {
func validateHeaders(headers map[string]string) error {
for header := range headers {
if strings.ToLower(header) == "authorization" {
return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter")
return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, azuread or google_iam parameter")
}
if _, ok := reservedHeaders[strings.ToLower(header)]; ok {
return fmt.Errorf("%s is a reserved header. It must not be changed", header)
@ -1184,13 +1292,20 @@ type MetadataConfig struct {
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
}
const (
// DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows.
// 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset.
DefaultChunkedReadLimit = 5e+7
)
// RemoteReadConfig is the configuration for reading from remote storage.
type RemoteReadConfig struct {
URL *config.URL `yaml:"url"`
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
ReadRecent bool `yaml:"read_recent,omitempty"`
Name string `yaml:"name,omitempty"`
URL *config.URL `yaml:"url"`
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
ChunkedReadLimit uint64 `yaml:"chunked_read_limit,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
ReadRecent bool `yaml:"read_recent,omitempty"`
Name string `yaml:"name,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -1255,3 +1370,35 @@ func getGoGCEnv() int {
}
return DefaultRuntimeConfig.GoGC
}
// OTLPConfig is the configuration for writing to the OTLP endpoint.
type OTLPConfig struct {
PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *OTLPConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
*c = DefaultOTLPConfig
type plain OTLPConfig
if err := unmarshal((*plain)(c)); err != nil {
return err
}
seen := map[string]struct{}{}
var err error
for i, attr := range c.PromoteResourceAttributes {
attr = strings.TrimSpace(attr)
if attr == "" {
err = errors.Join(err, fmt.Errorf("empty promoted OTel resource attribute"))
continue
}
if _, exists := seen[attr]; exists {
err = errors.Join(err, fmt.Errorf("duplicated promoted OTel resource attribute %q", attr))
continue
}
seen[attr] = struct{}{}
c.PromoteResourceAttributes[i] = attr
}
return err
}

View file

@ -16,6 +16,7 @@ package config
import (
"crypto/tls"
"encoding/json"
"fmt"
"net/url"
"os"
"path/filepath"
@ -61,6 +62,11 @@ import (
"github.com/prometheus/prometheus/util/testutil"
)
func init() {
// This can be removed when the default validation scheme in common is updated.
model.NameValidationScheme = model.UTF8Validation
}
func mustParseURL(u string) *config.URL {
parsed, err := url.Parse(u)
if err != nil {
@ -77,14 +83,16 @@ const (
globLabelNameLengthLimit = 200
globLabelValueLengthLimit = 200
globalGoGC = 42
globScrapeFailureLogFile = "testdata/fail.log"
)
var expectedConf = &Config{
GlobalConfig: GlobalConfig{
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EvaluationInterval: model.Duration(30 * time.Second),
QueryLogFile: "",
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EvaluationInterval: model.Duration(30 * time.Second),
QueryLogFile: "testdata/query.log",
ScrapeFailureLogFile: globScrapeFailureLogFile,
ExternalLabels: labels.FromStrings("foo", "bar", "monitor", "codelab"),
@ -108,9 +116,10 @@ var expectedConf = &Config{
RemoteWriteConfigs: []*RemoteWriteConfig{
{
URL: mustParseURL("http://remote1/push"),
RemoteTimeout: model.Duration(30 * time.Second),
Name: "drop_expensive",
URL: mustParseURL("http://remote1/push"),
ProtobufMessage: RemoteWriteProtoMsgV1,
RemoteTimeout: model.Duration(30 * time.Second),
Name: "drop_expensive",
WriteRelabelConfigs: []*relabel.Config{
{
SourceLabels: model.LabelNames{"__name__"},
@ -137,11 +146,12 @@ var expectedConf = &Config{
},
},
{
URL: mustParseURL("http://remote2/push"),
RemoteTimeout: model.Duration(30 * time.Second),
QueueConfig: DefaultQueueConfig,
MetadataConfig: DefaultMetadataConfig,
Name: "rw_tls",
URL: mustParseURL("http://remote2/push"),
ProtobufMessage: RemoteWriteProtoMsgV2,
RemoteTimeout: model.Duration(30 * time.Second),
QueueConfig: DefaultQueueConfig,
MetadataConfig: DefaultMetadataConfig,
Name: "rw_tls",
HTTPClientConfig: config.HTTPClientConfig{
TLSConfig: config.TLSConfig{
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
@ -154,12 +164,19 @@ var expectedConf = &Config{
},
},
OTLPConfig: OTLPConfig{
PromoteResourceAttributes: []string{
"k8s.cluster.name", "k8s.job.name", "k8s.namespace.name",
},
},
RemoteReadConfigs: []*RemoteReadConfig{
{
URL: mustParseURL("http://remote1/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
ReadRecent: true,
Name: "default",
URL: mustParseURL("http://remote1/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
ChunkedReadLimit: DefaultChunkedReadLimit,
ReadRecent: true,
Name: "default",
HTTPClientConfig: config.HTTPClientConfig{
FollowRedirects: true,
EnableHTTP2: false,
@ -169,6 +186,7 @@ var expectedConf = &Config{
{
URL: mustParseURL("http://remote3/read"),
RemoteTimeout: model.Duration(1 * time.Minute),
ChunkedReadLimit: DefaultChunkedReadLimit,
ReadRecent: false,
Name: "read_special",
RequiredMatchers: model.LabelSet{"job": "special"},
@ -200,6 +218,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: "testdata/fail_prom.log",
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -214,6 +233,15 @@ var expectedConf = &Config{
TLSConfig: config.TLSConfig{
MinVersion: config.TLSVersion(tls.VersionTLS10),
},
HTTPHeaders: &config.Headers{
Headers: map[string]config.Header{
"foo": {
Values: []string{"foobar"},
Secrets: []config.Secret{"bar", "foo"},
Files: []string{filepath.FromSlash("testdata/valid_password_file")},
},
},
},
},
ServiceDiscoveryConfigs: discovery.Configs{
@ -303,6 +331,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: 210,
LabelValueLengthLimit: 210,
ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4},
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.HTTPClientConfig{
BasicAuth: &config.BasicAuth{
@ -400,6 +429,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -455,6 +485,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: "/metrics",
Scheme: "http",
@ -488,6 +519,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -527,6 +559,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -566,6 +599,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -595,6 +629,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -632,6 +667,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -666,6 +702,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -707,6 +744,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -738,6 +776,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -772,6 +811,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -799,6 +839,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -829,6 +870,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: "/federate",
Scheme: DefaultScrapeConfig.Scheme,
@ -859,6 +901,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -889,6 +932,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -916,6 +960,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -951,6 +996,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -985,6 +1031,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1016,6 +1063,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1046,6 +1094,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1080,6 +1129,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1117,6 +1167,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1173,6 +1224,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1200,6 +1252,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1238,6 +1291,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1282,6 +1336,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1317,6 +1372,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
@ -1346,6 +1402,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1378,6 +1435,7 @@ var expectedConf = &Config{
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: globScrapeFailureLogFile,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1469,6 +1527,26 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
require.False(t, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
}
func TestOTLPSanitizeResourceAttributes(t *testing.T) {
t.Run("good config", func(t *testing.T) {
want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, log.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
var got Config
require.NoError(t, yaml.UnmarshalStrict(out, &got))
require.Equal(t, []string{"k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"}, got.OTLPConfig.PromoteResourceAttributes)
})
t.Run("bad config", func(t *testing.T) {
_, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, log.NewNopLogger())
require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`)
require.ErrorContains(t, err, `empty promoted OTel resource attribute`)
})
}
func TestLoadConfig(t *testing.T) {
// Parse a valid file that sets a global scrape timeout. This tests whether parsing
// an overwritten default field in the global config permanently changes the default.
@ -1501,7 +1579,7 @@ func TestElideSecrets(t *testing.T) {
yamlConfig := string(config)
matches := secretRe.FindAllStringIndex(yamlConfig, -1)
require.Len(t, matches, 22, "wrong number of secret matches found")
require.Len(t, matches, 24, "wrong number of secret matches found")
require.NotContains(t, yamlConfig, "mysecret",
"yaml marshal reveals authentication credentials.")
}
@ -1798,7 +1876,11 @@ var expectedErrors = []struct {
},
{
filename: "remote_write_authorization_header.bad.yml",
errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter`,
errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, azuread or google_iam parameter`,
},
{
filename: "remote_write_wrong_msg.bad.yml",
errMsg: `invalid protobuf_message value: unknown remote write protobuf message io.prometheus.writet.v2.Request, supported: prometheus.WriteRequest, io.prometheus.write.v2.Request`,
},
{
filename: "remote_write_url_missing.bad.yml",
@ -2007,6 +2089,10 @@ var expectedErrors = []struct {
}
func TestBadConfigs(t *testing.T) {
model.NameValidationScheme = model.LegacyValidation
defer func() {
model.NameValidationScheme = model.UTF8Validation
}()
for _, ee := range expectedErrors {
_, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger())
require.Error(t, err, "%s", ee.filename)
@ -2016,6 +2102,10 @@ func TestBadConfigs(t *testing.T) {
}
func TestBadStaticConfigsJSON(t *testing.T) {
model.NameValidationScheme = model.LegacyValidation
defer func() {
model.NameValidationScheme = model.UTF8Validation
}()
content, err := os.ReadFile("testdata/static_config.bad.json")
require.NoError(t, err)
var tg targetgroup.Group
@ -2024,6 +2114,10 @@ func TestBadStaticConfigsJSON(t *testing.T) {
}
func TestBadStaticConfigsYML(t *testing.T) {
model.NameValidationScheme = model.LegacyValidation
defer func() {
model.NameValidationScheme = model.UTF8Validation
}()
content, err := os.ReadFile("testdata/static_config.bad.yml")
require.NoError(t, err)
var tg targetgroup.Group
@ -2268,3 +2362,52 @@ func TestScrapeConfigDisableCompression(t *testing.T) {
require.False(t, got.ScrapeConfigs[0].EnableCompression)
}
func TestScrapeConfigNameValidationSettings(t *testing.T) {
model.NameValidationScheme = model.UTF8Validation
defer func() {
model.NameValidationScheme = model.LegacyValidation
}()
tests := []struct {
name string
inputFile string
expectScheme string
}{
{
name: "blank config implies default",
inputFile: "scrape_config_default_validation_mode",
expectScheme: "",
},
{
name: "global setting implies local settings",
inputFile: "scrape_config_global_validation_mode",
expectScheme: "legacy",
},
{
name: "local setting",
inputFile: "scrape_config_local_validation_mode",
expectScheme: "legacy",
},
{
name: "local setting overrides global setting",
inputFile: "scrape_config_local_global_validation_mode",
expectScheme: "utf8",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
require.Equal(t, tc.expectScheme, got.ScrapeConfigs[0].MetricNameValidationScheme)
})
}
}

92
config/reload.go Normal file
View file

@ -0,0 +1,92 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"gopkg.in/yaml.v2"
)
type ExternalFilesConfig struct {
RuleFiles []string `yaml:"rule_files"`
ScrapeConfigFiles []string `yaml:"scrape_config_files"`
}
// GenerateChecksum generates a checksum of the YAML file and the files it references.
func GenerateChecksum(yamlFilePath string) (string, error) {
hash := sha256.New()
yamlContent, err := os.ReadFile(yamlFilePath)
if err != nil {
return "", fmt.Errorf("error reading YAML file: %w", err)
}
_, err = hash.Write(yamlContent)
if err != nil {
return "", fmt.Errorf("error writing YAML file to hash: %w", err)
}
var config ExternalFilesConfig
if err := yaml.Unmarshal(yamlContent, &config); err != nil {
return "", fmt.Errorf("error unmarshalling YAML: %w", err)
}
dir := filepath.Dir(yamlFilePath)
for i, file := range config.RuleFiles {
config.RuleFiles[i] = filepath.Join(dir, file)
}
for i, file := range config.ScrapeConfigFiles {
config.ScrapeConfigFiles[i] = filepath.Join(dir, file)
}
files := map[string][]string{
"r": config.RuleFiles, // "r" for rule files
"s": config.ScrapeConfigFiles, // "s" for scrape config files
}
for _, prefix := range []string{"r", "s"} {
for _, pattern := range files[prefix] {
matchingFiles, err := filepath.Glob(pattern)
if err != nil {
return "", fmt.Errorf("error finding files with pattern %q: %w", pattern, err)
}
for _, file := range matchingFiles {
// Write prefix to the hash ("r" or "s") followed by \0, then
// the file path.
_, err = hash.Write([]byte(prefix + "\x00" + file + "\x00"))
if err != nil {
return "", fmt.Errorf("error writing %q path to hash: %w", file, err)
}
// Read and hash the content of the file.
content, err := os.ReadFile(file)
if err != nil {
return "", fmt.Errorf("error reading file %s: %w", file, err)
}
_, err = hash.Write(append(content, []byte("\x00")...))
if err != nil {
return "", fmt.Errorf("error writing %q content to hash: %w", file, err)
}
}
}
}
return hex.EncodeToString(hash.Sum(nil)), nil
}

222
config/reload_test.go Normal file
View file

@ -0,0 +1,222 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
)
func TestGenerateChecksum(t *testing.T) {
tmpDir := t.TempDir()
// Define paths for the temporary files.
yamlFilePath := filepath.Join(tmpDir, "test.yml")
ruleFilePath := filepath.Join(tmpDir, "rule_file.yml")
scrapeConfigFilePath := filepath.Join(tmpDir, "scrape_config.yml")
// Define initial and modified content for the files.
originalRuleContent := "groups:\n- name: example\n rules:\n - alert: ExampleAlert"
modifiedRuleContent := "groups:\n- name: example\n rules:\n - alert: ModifiedAlert"
originalScrapeConfigContent := "scrape_configs:\n- job_name: example"
modifiedScrapeConfigContent := "scrape_configs:\n- job_name: modified_example"
// Define YAML content referencing the rule and scrape config files.
yamlContent := `
rule_files:
- rule_file.yml
scrape_config_files:
- scrape_config.yml
`
// Write initial content to files.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Generate the original checksum.
originalChecksum := calculateChecksum(t, yamlFilePath)
t.Run("Rule File Change", func(t *testing.T) {
// Modify the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(modifiedRuleContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Scrape Config Change", func(t *testing.T) {
// Modify the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(modifiedScrapeConfigContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Rule File Deletion", func(t *testing.T) {
// Delete the rule file.
require.NoError(t, os.Remove(ruleFilePath))
// Checksum should change.
deletedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, deletedChecksum)
// Restore the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Scrape Config Deletion", func(t *testing.T) {
// Delete the scrape config file.
require.NoError(t, os.Remove(scrapeConfigFilePath))
// Checksum should change.
deletedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, deletedChecksum)
// Restore the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Main File Change", func(t *testing.T) {
// Modify the main YAML file.
modifiedYamlContent := `
global:
scrape_interval: 3s
rule_files:
- rule_file.yml
scrape_config_files:
- scrape_config.yml
`
require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the main YAML file.
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Rule File Removed from YAML Config", func(t *testing.T) {
// Modify the YAML content to remove the rule file.
modifiedYamlContent := `
scrape_config_files:
- scrape_config.yml
`
require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the YAML content.
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Scrape Config Removed from YAML Config", func(t *testing.T) {
// Modify the YAML content to remove the scrape config file.
modifiedYamlContent := `
rule_files:
- rule_file.yml
`
require.NoError(t, os.WriteFile(yamlFilePath, []byte(modifiedYamlContent), 0o644))
// Checksum should change.
modifiedChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, modifiedChecksum)
// Revert the YAML content.
require.NoError(t, os.WriteFile(yamlFilePath, []byte(yamlContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Empty Rule File", func(t *testing.T) {
// Write an empty rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(""), 0o644))
// Checksum should change.
emptyChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, emptyChecksum)
// Restore the rule file.
require.NoError(t, os.WriteFile(ruleFilePath, []byte(originalRuleContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
t.Run("Empty Scrape Config File", func(t *testing.T) {
// Write an empty scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(""), 0o644))
// Checksum should change.
emptyChecksum := calculateChecksum(t, yamlFilePath)
require.NotEqual(t, originalChecksum, emptyChecksum)
// Restore the scrape config file.
require.NoError(t, os.WriteFile(scrapeConfigFilePath, []byte(originalScrapeConfigContent), 0o644))
// Checksum should return to the original.
revertedChecksum := calculateChecksum(t, yamlFilePath)
require.Equal(t, originalChecksum, revertedChecksum)
})
}
// calculateChecksum generates a checksum for the given YAML file path.
func calculateChecksum(t *testing.T, yamlFilePath string) string {
checksum, err := GenerateChecksum(yamlFilePath)
require.NoError(t, err)
require.NotEmpty(t, checksum)
return checksum
}

View file

@ -8,6 +8,8 @@ global:
label_limit: 30
label_name_length_limit: 200
label_value_length_limit: 200
query_log_file: query.log
scrape_failure_log_file: fail.log
# scrape_timeout is set to the global default (10s).
external_labels:
@ -37,6 +39,7 @@ remote_write:
key_file: valid_key_file
- url: http://remote2/push
protobuf_message: io.prometheus.write.v2.Request
name: rw_tls
tls_config:
cert_file: valid_cert_file
@ -44,6 +47,9 @@ remote_write:
headers:
name: value
otlp:
promote_resource_attributes: ["k8s.cluster.name", "k8s.job.name", "k8s.namespace.name"]
remote_read:
- url: http://remote1/read
read_recent: true
@ -68,6 +74,7 @@ scrape_configs:
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
scrape_failure_log_file: fail_prom.log
file_sd_configs:
- files:
- foo/*.slow.json
@ -83,6 +90,12 @@ scrape_configs:
my: label
your: label
http_headers:
foo:
values: ["foobar"]
secrets: ["bar", "foo"]
files: ["valid_password_file"]
relabel_configs:
- source_labels: [job, __meta_dns_name]
regex: (.*)some-[regex]

View file

@ -1,4 +1,6 @@
# Two scrape configs with the same job names are not allowed.
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
- job_name: service-x

View file

@ -1,3 +1,5 @@
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
relabel_configs:

View file

@ -0,0 +1,2 @@
otlp:
promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name", "k8s.job.name", ""]

View file

@ -0,0 +1,2 @@
otlp:
promote_resource_attributes: ["k8s.cluster.name", " k8s.job.name ", "k8s.namespace.name"]

View file

@ -0,0 +1,3 @@
remote_write:
- url: localhost:9090
protobuf_message: io.prometheus.writet.v2.Request # typo in 'write"

View file

@ -0,0 +1,2 @@
scrape_configs:
- job_name: prometheus

View file

@ -0,0 +1,4 @@
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus

View file

@ -0,0 +1,5 @@
global:
metric_name_validation_scheme: legacy
scrape_configs:
- job_name: prometheus
metric_name_validation_scheme: utf8

View file

@ -0,0 +1,3 @@
scrape_configs:
- job_name: prometheus
metric_name_validation_scheme: legacy

View file

@ -1,82 +0,0 @@
{{/* vim: set ft=html: */}}
{{/* Navbar, should be passed . */}}
{{ define "navbar" }}
<nav class="navbar fixed-top navbar-expand-sm navbar-dark bg-dark">
<div class="container-fluid">
<!-- Brand and toggle get grouped for better mobile display -->
<div class="navbar-header">
<button type="button" class="navbar-toggler" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false" aria-controls="navbar-nav" aria-label="toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<a class="navbar-brand" href="{{ pathPrefix }}/">Prometheus</a>
</div>
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav">
<li class="nav-item"><a class="nav-link" href="{{ pathPrefix }}/alerts">Alerts</a></li>
<li class="nav-item"><a class="nav-link" href="https://www.pagerduty.com/">PagerDuty</a></li>
</ul>
</div>
</div>
</nav>
{{ end }}
{{/* LHS menu, should be passed . */}}
{{ define "menu" }}
<div class="prom_lhs_menu row">
<nav class="col-md-2 md-block bg-dark sidebar prom_lhs_menu_nav">
<div class="sidebar-sticky">
<ul class="nav flex-column">
{{ template "_menuItem" (args . "index.html.example" "Overview") }}
{{ if query "up{job='node'}" }}
{{ template "_menuItem" (args . "node.html" "Node") }}
{{ if match "^node" .Path }}
{{ if .Params.instance }}
<ul>
<li {{ if eq .Path "node-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="node-overview.html?instance={{ .Params.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</a>
</li>
<ul>
<li {{ if eq .Path "node-cpu.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="node-cpu.html?instance={{ .Params.instance }}">CPU</a>
</li>
<li {{ if eq .Path "node-disk.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="node-disk.html?instance={{ .Params.instance }}">Disk</a>
</li>
</ul>
</ul>
{{ end }}
{{ end }}
{{ end }}
{{ if query "up{job='prometheus'}" }}
{{ template "_menuItem" (args . "prometheus.html" "Prometheus") }}
{{ if match "^prometheus" .Path }}
{{ if .Params.instance }}
<ul>
<li {{ if eq .Path "prometheus-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a class="nav-link" href="prometheus-overview.html?instance={{ .Params.instance }}">{{.Params.instance }}</a>
</li>
</ul>
{{ end }}
{{ end }}
{{ end }}
</ul>
</div>
</nav>
</div>
{{ end }}
{{/* Helper, pass (args . path name) */}}
{{ define "_menuItem" }}
<li {{ if eq .arg0.Path .arg1 }} class="prom_lhs_menu_selected nav-item" {{ end }}><a class="nav-link" href="{{ .arg1 }}">{{ .arg2 }}</a></li>
{{ end }}

View file

@ -1,138 +0,0 @@
{{/* vim: set ft=html: */}}
{{/* Load Prometheus console library JS/CSS. Should go in <head> */}}
{{ define "prom_console_head" }}
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/vendor/rickshaw/rickshaw.min.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/vendor/bootstrap-4.5.2/css/bootstrap.min.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/css/prom_console.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/classic/static/vendor/bootstrap4-glyphicons/css/bootstrap-glyphicons.min.css">
<script src="{{ pathPrefix }}/classic/static/vendor/rickshaw/vendor/d3.v3.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/rickshaw/vendor/d3.layout.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/rickshaw/rickshaw.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/js/jquery-3.5.1.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/js/popper.min.js"></script>
<script src="{{ pathPrefix }}/classic/static/vendor/bootstrap-4.5.2/js/bootstrap.min.js"></script>
<script>
var PATH_PREFIX = "{{ pathPrefix }}";
</script>
<script src="{{ pathPrefix }}/classic/static/js/prom_console.js"></script>
{{ end }}
{{/* Top of all pages. */}}
{{ define "head" -}}
<!doctype html>
<html lang="en">
<head>
{{ template "prom_console_head" }}
</head>
<body>
{{ template "navbar" . }}
{{ template "menu" . }}
{{ end }}
{{ define "__prom_query_drilldown_noop" }}{{ . }}{{ end }}
{{ define "humanize" }}{{ humanize . }}{{ end }}
{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }}
{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }}
{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }}
{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }}
{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }}
{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }}
{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }}
{{/* prom_query_drilldown (args expr suffix? renderTemplate?)
Displays the result of the expression, with a link to /graph for it.
renderTemplate is the name of the template to use to render the value.
*/}}
{{ define "prom_query_drilldown" }}
{{ $expr := .arg0 }}{{ $suffix := (or .arg1 "") }}{{ $renderTemplate := (or .arg2 "__prom_query_drilldown_noop") }}
<a class="prom_query_drilldown" href="{{ pathPrefix }}{{ graphLink $expr }}">{{ with query $expr }}{{tmpl $renderTemplate ( . | first | value )}}{{ $suffix }}{{ else }}-{{ end }}</a>
{{ end }}
{{ define "prom_path" }}/consoles/{{ .Path }}?{{ range $param, $value := .Params }}{{ $param }}={{ $value }}&amp;{{ end }}{{ end }}"
{{ define "prom_right_table_head" }}
<div class="prom_console_rhs">
<table class="table table-bordered table-hover table-sm">
{{ end }}
{{ define "prom_right_table_tail" }}
</table>
</div>
{{ end }}
{{/* RHS table head, pass job name. Should be used after prom_right_table_head. */}}
{{ define "prom_right_table_job_head" }}
<tr>
<th>{{ . }}</th>
<th>{{ template "prom_query_drilldown" (args (printf "sum(up{job='%s'})" .)) }} / {{ template "prom_query_drilldown" (args (printf "count(up{job='%s'})" .)) }}</th>
</tr>
<tr>
<td>CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "avg by(job)(irate(process_cpu_seconds_total{job='%s'}[5m]))" .) "s/s" "humanizeNoSmallPrefix") }}</td>
</tr>
<tr>
<td>Memory</td>
<td>{{ template "prom_query_drilldown" (args (printf "avg by(job)(process_resident_memory_bytes{job='%s'})" .) "B" "humanize1024") }}</td>
</tr>
{{ end }}
{{ define "prom_content_head" }}
<div class="prom_console_content">
<div class="container-fluid">
{{ template "prom_graph_timecontrol" . }}
{{ end }}
{{ define "prom_content_tail" }}
</div>
</div>
{{ end }}
{{ define "prom_graph_timecontrol" }}
<div class="prom_graph_timecontrol">
<div class="prom_graph_timecontrol_inner">
<div class="prom_graph_timecontrol_group ">
<button class="btn btn-light pull-left" type="button" id="prom_graph_duration_shrink" title="Shrink the time range.">
<i class="glyphicon glyphicon-minus"></i>
</button><!-- Comments between elements to remove spaces
--><input class="input pull-left align-middle" size="3" title="Time range of graph" type="text" id="prom_graph_duration"><!--
--><button class="btn btn-light pull-left" type="button" id="prom_graph_duration_grow" title="Grow the time range.">
<i class="glyphicon glyphicon-plus"></i>
</button>
</div>
<div class="prom_graph_timecontrol_group ">
<button class="btn btn-light pull-left" type="button" id="prom_graph_time_back" title="Rewind the end time.">
<i class="glyphicon glyphicon-backward"></i>
</button><!--
--><input class="input pull-left align-middle" title="End time of graph" placeholder="Until" type="text" id="prom_graph_time_end" size="16" value=""><!--
--><button class="btn btn-light pull-left" type="button" id="prom_graph_time_forward" title="Advance the end time.">
<i class="glyphicon glyphicon-forward"></i>
</button>
</div>
<div class="prom_graph_timecontrol_group ">
<div class="btn-group dropup prom_graph_timecontrol_refresh pull-left">
<button type="button" class="btn btn-light pull-left" id="prom_graph_refresh_button" title="Refresh.">
<i class="glyphicon glyphicon-repeat"></i>
<span class="icon-repeat"></span>
(<span id="prom_graph_refresh_button_value">Off</span>)
</button>
<button type="button" class="btn btn-light pull-left dropdown-toggle" data-toggle="dropdown" title="Set autorefresh."aria-haspopup="true" aria-expanded="false">
<span class="caret"></span>&nbsp;
</button>
<ul class="dropdown-menu" id="prom_graph_refresh_intervals" role="menu">
</ul>
</div>
</div>
</div>
<script>
new PromConsole.TimeControl();
</script>
</div>
{{ end }}
{{/* Bottom of all pages. */}}
{{ define "tail" }}
</body>
</html>
{{ end }}

View file

@ -1,28 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Overview</h1>
<p>These are example consoles for Prometheus.</p>
<p>These consoles expect exporters to have the following job labels:</p>
<table class="table table-sm table-striped table-bordered" style="width: 0%">
<tr>
<th>Exporter</th>
<th>Job label</th>
</tr>
<tr>
<td>Node Exporter</td>
<td><code>node</code></td>
</tr>
<tr>
<td>Prometheus</td>
<td><code>prometheus</code></td>
</tr>
</table>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,60 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th colspan="2">CPU(s): {{ template "prom_query_drilldown" (args (printf "scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance)) }}</th>
</tr>
{{ range printf "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='%s'}[5m])) * 100 / scalar(count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'})))" .Params.instance .Params.instance | query | sortByLabel "mode" }}
<tr>
<td>{{ .Labels.mode | title }} CPU</td>
<td>{{ .Value | printf "%.1f" }}%</td>
</tr>
{{ end }}
<tr><th colspan="2">Misc</th></tr>
<tr>
<td>Processes Running</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_procs_running{job='node',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<td>Processes Blocked</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_procs_blocked{job='node',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<td>Forks</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_forks_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }}</td>
</tr>
<tr>
<td>Context Switches</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_context_switches_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }}</td>
</tr>
<tr>
<td>Interrupts</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_intr_total{job='node',instance='%s'}[5m])" .Params.instance) "/s" "humanize") }}</td>
</tr>
<tr>
<td>1m Loadavg</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_load1{job='node',instance='%s'}" .Params.instance)) }}</td>
</tr>
<tr>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node CPU - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</h1>
<h3>CPU Usage</h3>
<div id="cpuGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#cpuGraph"),
expr: "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='{{ .Params.instance }}',mode!='idle',mode!='iowait',mode!='steal'}[5m]))",
renderer: 'area',
max: {{ with printf "count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance | query }}{{ . | first | value }}{{ else}}undefined{{end}},
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Cores'
})
</script>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,78 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th colspan="2">Disks</th>
</tr>
{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }}
<th colspan="2">{{ .Labels.device }}</th>
<tr>
<td>Utilization</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }}</td>
</tr>
<tr>
<td>Throughput</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
<tr>
<td>Avg Read Time</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_reads_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }}</td>
</tr>
<tr>
<td>Avg Write Time</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_write_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) / irate(node_disk_writes_completed_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "s" "humanize") }}</td>
</tr>
{{ end }}
<tr>
<th colspan="2">Filesystem Fullness</th>
</tr>
{{ define "roughlyNearZero" }}
{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }}
{{ end }}
{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }}
<tr>
<td>{{ .Labels.mountpoint }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }}</td>
</tr>
{{ end }}
<tr>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node Disk - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</h1>
<h3>Disk I/O Utilization</h3>
<div id="diskioGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#diskioGraph"),
expr: [
"irate(node_disk_io_time_seconds_total{job='node',instance='{{ .Params.instance }}',device!~'^(md\\\\d+$|dm-)'}[5m]) * 100",
],
min: 0,
name: '[[ device ]]',
yUnits: "%",
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Disk I/O Utilization'
})
</script>
<h3>Filesystem Usage</h3>
<div id="fsGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#fsGraph"),
expr: "100 - node_filesystem_avail_bytes{job='node',instance='{{ .Params.instance }}'} / node_filesystem_size_bytes{job='node'} * 100",
min: 0,
max: 100,
name: '[[ mountpoint ]]',
yUnits: "%",
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Filesystem Fullness'
})
</script>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,121 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr><th colspan="2">Overview</th></tr>
<tr>
<td>User CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='user'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }}</td>
</tr>
<tr>
<td>System CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "sum(irate(node_cpu_seconds_total{job='node',instance='%s',mode='system'}[5m])) * 100 / count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance .Params.instance) "%" "printf.1f") }}</td>
</tr>
<tr>
<td>Memory Total</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_memory_MemTotal_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
</tr>
<tr>
<td>Memory Free</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
</tr>
<tr>
<th colspan="2">Network</th>
</tr>
{{ range printf "node_network_receive_bytes_total{job='node',instance='%s',device!='lo'}" .Params.instance | query | sortByLabel "device" }}
<tr>
<td>{{ .Labels.device }} Received</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_network_receive_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
<tr>
<td>{{ .Labels.device }} Transmitted</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_network_transmit_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
{{ end }}
<tr>
<th colspan="2">Disks</th>
</tr>
{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s',device!~'^(md\\\\d+$|dm-)'}" .Params.instance | query | sortByLabel "device" }}
<tr>
<td>{{ .Labels.device }} Utilization</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_io_time_seconds_total{job='node',instance='%s',device='%s'}[5m]) * 100" .Labels.instance .Labels.device) "%" "printf.1f") }}</td>
</tr>
{{ end }}
{{ range printf "node_disk_io_time_seconds_total{job='node',instance='%s'}" .Params.instance | query | sortByLabel "device" }}
<tr>
<td>{{ .Labels.device }} Throughput</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(node_disk_read_bytes_total{job='node',instance='%s',device='%s'}[5m]) + irate(node_disk_written_bytes_total{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device .Labels.instance .Labels.device) "B/s" "humanize") }}</td>
</tr>
{{ end }}
<tr>
<th colspan="2">Filesystem Fullness</th>
</tr>
{{ define "roughlyNearZero" }}
{{ if gt .1 . }}~0{{ else }}{{ printf "%.1f" . }}{{ end }}
{{ end }}
{{ range printf "node_filesystem_size_bytes{job='node',instance='%s'}" .Params.instance | query | sortByLabel "mountpoint" }}
<tr>
<td>{{ .Labels.mountpoint }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "100 - node_filesystem_avail_bytes{job='node',instance='%s',mountpoint='%s'} / node_filesystem_size_bytes{job='node'} * 100" .Labels.instance .Labels.mountpoint) "%" "roughlyNearZero") }}</td>
</tr>
{{ end }}
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node Overview - {{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</h1>
<h3>CPU Usage</h3>
<div id="cpuGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#cpuGraph"),
expr: "sum by (mode)(irate(node_cpu_seconds_total{job='node',instance='{{ .Params.instance }}',mode!='idle',mode!='iowait',mode!='steal'}[5m]))",
renderer: 'area',
max: {{ with printf "count(count by (cpu)(node_cpu_seconds_total{job='node',instance='%s'}))" .Params.instance | query }}{{ . | first | value }}{{ else}}undefined{{end}},
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: 'Cores'
})
</script>
<h3>Disk I/O Utilization</h3>
<div id="diskioGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#diskioGraph"),
expr: [
"irate(node_disk_io_time_seconds_total{job='node',instance='{{ .Params.instance }}',device!~'^(md\\\\d+$|dm-)'}[5m]) * 100",
],
min: 0,
name: '[[ device ]]',
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yUnits: "%",
yTitle: 'Disk I/O Utilization'
})
</script>
<h3>Memory</h3>
<div id="memoryGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#memoryGraph"),
renderer: 'area',
expr: [
"node_memory_Cached_bytes{job='node',instance='{{ .Params.instance }}'}",
"node_memory_Buffers_bytes{job='node',instance='{{ .Params.instance }}'}",
"node_memory_MemTotal_bytes{job='node',instance='{{ .Params.instance }}'} - node_memory_MemFree_bytes{job='node',instance='{{.Params.instance}}'} - node_memory_Buffers_bytes{job='node',instance='{{.Params.instance}}'} - node_memory_Cached_bytes{job='node',instance='{{.Params.instance}}'}",
"node_memory_MemFree{job='node',instance='{{ .Params.instance }}'}",
],
name: ["Cached", "Buffers", "Used", "Free"],
min: 0,
yUnits: "B",
yAxisFormatter: PromConsole.NumberFormatter.humanize1024,
yHoverFormatter: PromConsole.NumberFormatter.humanize1024,
yTitle: 'Memory'
})
</script>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,35 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th>Node</th>
<th>{{ template "prom_query_drilldown" (args "sum(up{job='node'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='node'})") }}</th>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Node</h1>
<table class="table table-condensed table-striped table-bordered" style="width: 0%">
<tr>
<th>Node</th>
<th>Up</th>
<th>CPU<br/>Used</th>
<th>Memory<br/> Available</th>
</tr>
{{ range query "up{job='node'}" | sortByLabel "instance" }}
<tr>
<td><a href="node-overview.html?instance={{ .Labels.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Labels.instance }}</a></td>
<td{{ if eq (. | value) 1.0 }}>Yes{{ else }} class="alert-danger">No{{ end }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "100 * (1 - avg by(instance) (sum without(mode) (irate(node_cpu_seconds_total{job='node',mode=~'idle|iowait|steal',instance='%s'}[5m]))))" .Labels.instance) "%" "printf.1f") }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "node_memory_MemFree_bytes{job='node',instance='%s'} + node_memory_Cached_bytes{job='node',instance='%s'} + node_memory_Buffers_bytes{job='node',instance='%s'}" .Labels.instance .Labels.instance .Labels.instance) "B" "humanize1024") }}</td>
</tr>
{{ else }}
<tr><td colspan=4>No nodes found.</td></tr>
{{ end }}
</table>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,96 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th colspan="2">Overview</th>
</tr>
<tr>
<td>CPU</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(process_cpu_seconds_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "s/s" "humanizeNoSmallPrefix") }}</td>
</tr>
<tr>
<td>Memory</td>
<td>{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Params.instance) "B" "humanize1024") }}</td>
</tr>
<tr>
<td>Version</td>
<td>{{ with query (printf "prometheus_build_info{job='prometheus',instance='%s'}" .Params.instance) }}{{. | first | label "version"}}{{end}}</td>
</tr>
<tr>
<th colspan="2">Storage</th>
</tr>
<tr>
<td>Ingested Samples</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Params.instance) "/s" "humanizeNoSmallPrefix") }}</td>
</tr>
<tr>
<td>Head Series</td>
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_head_series{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<td>Blocks Loaded</td>
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_tsdb_blocks_loaded{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<th colspan="2">Rules</th>
</tr>
<tr>
<td>Evaluation Duration</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_evaluator_duration_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_evaluator_duration_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}</td>
</tr>
<tr>
<td>Notification Latency</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_notifications_latency_seconds_sum{job='prometheus',instance='%s'}[5m]) / irate(prometheus_notifications_latency_seconds_count{job='prometheus',instance='%s'}[5m])" .Params.instance .Params.instance) "" "humanizeDuration") }}</td>
</tr>
<tr>
<td>Notification Queue</td>
<td>{{ template "prom_query_drilldown" (args (printf "prometheus_notifications_queue_length{job='prometheus',instance='%s'}" .Params.instance) "" "humanize") }}</td>
</tr>
<tr>
<th colspan="2">HTTP Server</th>
</tr>
{{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }}
<tr>
<td>{{ .Labels.handler }}</td>
<td>{{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }}</td>
</tr>
{{ end }}
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<div class="prom_content_div">
<h1>Prometheus Overview - {{ .Params.instance }}</h1>
<h3>Ingested Samples</h3>
<div id="samplesGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#samplesGraph"),
expr: "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='{{ .Params.instance }}'}[5m])",
name: 'Ingested Samples',
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: "Samples",
yUnits: "/s",
})
</script>
<h3>HTTP Server</h3>
<div id="serverGraph"></div>
<script>
new PromConsole.Graph({
node: document.querySelector("#serverGraph"),
expr: "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='{{ .Params.instance }}'}[5m])",
name: '[[handler]]',
yAxisFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yHoverFormatter: PromConsole.NumberFormatter.humanizeNoSmallPrefix,
yTitle: "Requests",
yUnits: "/s",
})
</script>
</div>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -1,34 +0,0 @@
{{ template "head" . }}
{{ template "prom_right_table_head" }}
<tr>
<th>Prometheus</th>
<th>{{ template "prom_query_drilldown" (args "sum(up{job='prometheus'})") }} / {{ template "prom_query_drilldown" (args "count(up{job='prometheus'})") }}</th>
</tr>
{{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }}
<h1>Prometheus</h1>
<table class="table table-sm table-striped table-bordered" style="width: 0%">
<tr>
<th>Prometheus</th>
<th>Up</th>
<th>Ingested Samples</th>
<th>Memory</th>
</tr>
{{ range query "up{job='prometheus'}" | sortByLabel "instance" }}
<tr>
<td><a href="prometheus-overview.html?instance={{ .Labels.instance }}">{{ .Labels.instance }}</a></td>
<td {{ if eq (. | value) 1.0 }}>Yes{{ else }} class="alert-danger">No{{ end }}</td>
<td class="text-right">{{ template "prom_query_drilldown" (args (printf "irate(prometheus_tsdb_head_samples_appended_total{job='prometheus',instance='%s'}[5m])" .Labels.instance) "/s" "humanizeNoSmallPrefix") }}</td>
<td class="text-right">{{ template "prom_query_drilldown" (args (printf "process_resident_memory_bytes{job='prometheus',instance='%s'}" .Labels.instance) "B" "humanize1024")}}</td>
</tr>
{{ else }}
<tr><td colspan=4>No devices found.</td></tr>
{{ end }}
</table>
{{ template "prom_content_tail" . }}
{{ template "tail" }}

View file

@ -13,7 +13,7 @@
package discovery
// Create a dummy metrics struct, because this SD doesn't have any metrics.
// NoopDiscovererMetrics creates a dummy metrics struct, because this SD doesn't have any metrics.
type NoopDiscovererMetrics struct{}
var _ DiscovererMetrics = (*NoopDiscovererMetrics)(nil)

View file

@ -39,7 +39,7 @@ type Discoverer interface {
Run(ctx context.Context, up chan<- []*targetgroup.Group)
}
// Internal metrics of service discovery mechanisms.
// DiscovererMetrics are internal metrics of service discovery mechanisms.
type DiscovererMetrics interface {
Register() error
Unregister()
@ -56,7 +56,7 @@ type DiscovererOptions struct {
HTTPClientOptions []config.HTTPClientOption
}
// Metrics used by the "refresh" package.
// RefreshMetrics are used by the "refresh" package.
// We define them here in the "discovery" package in order to avoid a cyclic dependency between
// "discovery" and "refresh".
type RefreshMetrics struct {
@ -64,17 +64,18 @@ type RefreshMetrics struct {
Duration prometheus.Observer
}
// Instantiate the metrics used by the "refresh" package.
// RefreshMetricsInstantiator instantiates the metrics used by the "refresh" package.
type RefreshMetricsInstantiator interface {
Instantiate(mech string) *RefreshMetrics
}
// An interface for registering, unregistering, and instantiating metrics for the "refresh" package.
// Refresh metrics are registered and unregistered outside of the service discovery mechanism.
// This is so that the same metrics can be reused across different service discovery mechanisms.
// To manage refresh metrics inside the SD mechanism, we'd need to use const labels which are
// specific to that SD. However, doing so would also expose too many unused metrics on
// the Prometheus /metrics endpoint.
// RefreshMetricsManager is an interface for registering, unregistering, and
// instantiating metrics for the "refresh" package. Refresh metrics are
// registered and unregistered outside of the service discovery mechanism. This
// is so that the same metrics can be reused across different service discovery
// mechanisms. To manage refresh metrics inside the SD mechanism, we'd need to
// use const labels which are specific to that SD. However, doing so would also
// expose too many unused metrics on the Prometheus /metrics endpoint.
type RefreshMetricsManager interface {
DiscovererMetrics
RefreshMetricsInstantiator
@ -145,7 +146,8 @@ func (c StaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
return staticDiscoverer(c), nil
}
// No metrics are needed for this service discovery mechanism.
// NewDiscovererMetrics returns NoopDiscovererMetrics because no metrics are
// needed for this service discovery mechanism.
func (c StaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
return &NoopDiscovererMetrics{}
}

View file

@ -97,7 +97,6 @@ func fetchApps(ctx context.Context, server string, client *http.Client) (*Applic
resp.Body.Close()
}()
//nolint:usestdlibvars
if resp.StatusCode/100 != 2 {
return nil, fmt.Errorf("non 2xx status '%d' response during eureka service discovery", resp.StatusCode)
}

View file

@ -87,7 +87,6 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error)
resp.Body.Close()
}()
//nolint:usestdlibvars
if resp.StatusCode/100 != 2 {
return nil, fmt.Errorf("non 2xx status '%d' response during hetzner service discovery with role robot", resp.StatusCode)
}

View file

@ -970,7 +970,7 @@ func TestEndpointsDiscoveryEmptyPodStatus(t *testing.T) {
}.Run(t)
}
// TestEndpointsUpdatePod makes sure that Endpoints discovery detects underlying Pods changes.
// TestEndpointsDiscoveryUpdatePod makes sure that Endpoints discovery detects underlying Pods changes.
// See https://github.com/prometheus/prometheus/issues/11305 for more details.
func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
pod := &v1.Pod{

View file

@ -26,7 +26,6 @@ import (
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
"k8s.io/api/discovery/v1beta1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@ -103,9 +102,9 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
return
}
// TODO(brancz): use cache.Indexer to index endpoints by
// disv1beta1.LabelServiceName so this operation doesn't have to
// iterate over all endpoint objects.
// TODO(brancz): use cache.Indexer to index endpointslices by
// LabelServiceName so this operation doesn't have to iterate over all
// endpoint objects.
for _, obj := range e.endpointSliceStore.List() {
esa, err := e.getEndpointSliceAdaptor(obj)
if err != nil {
@ -241,8 +240,6 @@ func (e *EndpointSlice) getEndpointSliceAdaptor(o interface{}) (endpointSliceAda
switch endpointSlice := o.(type) {
case *v1.EndpointSlice:
return newEndpointSliceAdaptorFromV1(endpointSlice), nil
case *v1beta1.EndpointSlice:
return newEndpointSliceAdaptorFromV1beta1(endpointSlice), nil
default:
return nil, fmt.Errorf("received unexpected object: %v", o)
}

View file

@ -16,7 +16,6 @@ package kubernetes
import (
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
"k8s.io/api/discovery/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -109,59 +108,6 @@ func (e *endpointSliceAdaptorV1) labelServiceName() string {
return v1.LabelServiceName
}
// Adaptor for k8s.io/api/discovery/v1beta1.
type endpointSliceAdaptorV1Beta1 struct {
endpointSlice *v1beta1.EndpointSlice
}
func newEndpointSliceAdaptorFromV1beta1(endpointSlice *v1beta1.EndpointSlice) endpointSliceAdaptor {
return &endpointSliceAdaptorV1Beta1{endpointSlice: endpointSlice}
}
func (e *endpointSliceAdaptorV1Beta1) get() interface{} {
return e.endpointSlice
}
func (e *endpointSliceAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta {
return e.endpointSlice.ObjectMeta
}
func (e *endpointSliceAdaptorV1Beta1) name() string {
return e.endpointSlice.Name
}
func (e *endpointSliceAdaptorV1Beta1) namespace() string {
return e.endpointSlice.Namespace
}
func (e *endpointSliceAdaptorV1Beta1) addressType() string {
return string(e.endpointSlice.AddressType)
}
func (e *endpointSliceAdaptorV1Beta1) endpoints() []endpointSliceEndpointAdaptor {
eps := make([]endpointSliceEndpointAdaptor, 0, len(e.endpointSlice.Endpoints))
for i := 0; i < len(e.endpointSlice.Endpoints); i++ {
eps = append(eps, newEndpointSliceEndpointAdaptorFromV1beta1(e.endpointSlice.Endpoints[i]))
}
return eps
}
func (e *endpointSliceAdaptorV1Beta1) ports() []endpointSlicePortAdaptor {
ports := make([]endpointSlicePortAdaptor, 0, len(e.endpointSlice.Ports))
for i := 0; i < len(e.endpointSlice.Ports); i++ {
ports = append(ports, newEndpointSlicePortAdaptorFromV1beta1(e.endpointSlice.Ports[i]))
}
return ports
}
func (e *endpointSliceAdaptorV1Beta1) labels() map[string]string {
return e.endpointSlice.Labels
}
func (e *endpointSliceAdaptorV1Beta1) labelServiceName() string {
return v1beta1.LabelServiceName
}
type endpointSliceEndpointAdaptorV1 struct {
endpoint v1.Endpoint
}
@ -218,62 +164,6 @@ func (e *endpointSliceEndpointConditionsAdaptorV1) terminating() *bool {
return e.endpointConditions.Terminating
}
type endpointSliceEndpointAdaptorV1beta1 struct {
endpoint v1beta1.Endpoint
}
func newEndpointSliceEndpointAdaptorFromV1beta1(endpoint v1beta1.Endpoint) endpointSliceEndpointAdaptor {
return &endpointSliceEndpointAdaptorV1beta1{endpoint: endpoint}
}
func (e *endpointSliceEndpointAdaptorV1beta1) addresses() []string {
return e.endpoint.Addresses
}
func (e *endpointSliceEndpointAdaptorV1beta1) hostname() *string {
return e.endpoint.Hostname
}
func (e *endpointSliceEndpointAdaptorV1beta1) nodename() *string {
return e.endpoint.NodeName
}
func (e *endpointSliceEndpointAdaptorV1beta1) zone() *string {
return nil
}
func (e *endpointSliceEndpointAdaptorV1beta1) conditions() endpointSliceEndpointConditionsAdaptor {
return newEndpointSliceEndpointConditionsAdaptorFromV1beta1(e.endpoint.Conditions)
}
func (e *endpointSliceEndpointAdaptorV1beta1) targetRef() *corev1.ObjectReference {
return e.endpoint.TargetRef
}
func (e *endpointSliceEndpointAdaptorV1beta1) topology() map[string]string {
return e.endpoint.Topology
}
type endpointSliceEndpointConditionsAdaptorV1beta1 struct {
endpointConditions v1beta1.EndpointConditions
}
func newEndpointSliceEndpointConditionsAdaptorFromV1beta1(endpointConditions v1beta1.EndpointConditions) endpointSliceEndpointConditionsAdaptor {
return &endpointSliceEndpointConditionsAdaptorV1beta1{endpointConditions: endpointConditions}
}
func (e *endpointSliceEndpointConditionsAdaptorV1beta1) ready() *bool {
return e.endpointConditions.Ready
}
func (e *endpointSliceEndpointConditionsAdaptorV1beta1) serving() *bool {
return e.endpointConditions.Serving
}
func (e *endpointSliceEndpointConditionsAdaptorV1beta1) terminating() *bool {
return e.endpointConditions.Terminating
}
type endpointSlicePortAdaptorV1 struct {
endpointPort v1.EndpointPort
}
@ -298,28 +188,3 @@ func (e *endpointSlicePortAdaptorV1) protocol() *string {
func (e *endpointSlicePortAdaptorV1) appProtocol() *string {
return e.endpointPort.AppProtocol
}
type endpointSlicePortAdaptorV1beta1 struct {
endpointPort v1beta1.EndpointPort
}
func newEndpointSlicePortAdaptorFromV1beta1(port v1beta1.EndpointPort) endpointSlicePortAdaptor {
return &endpointSlicePortAdaptorV1beta1{endpointPort: port}
}
func (e *endpointSlicePortAdaptorV1beta1) name() *string {
return e.endpointPort.Name
}
func (e *endpointSlicePortAdaptorV1beta1) port() *int32 {
return e.endpointPort.Port
}
func (e *endpointSlicePortAdaptorV1beta1) protocol() *string {
val := string(*e.endpointPort.Protocol)
return &val
}
func (e *endpointSlicePortAdaptorV1beta1) appProtocol() *string {
return e.endpointPort.AppProtocol
}

View file

@ -18,7 +18,6 @@ import (
"github.com/stretchr/testify/require"
v1 "k8s.io/api/discovery/v1"
"k8s.io/api/discovery/v1beta1"
)
func Test_EndpointSliceAdaptor_v1(t *testing.T) {
@ -48,31 +47,3 @@ func Test_EndpointSliceAdaptor_v1(t *testing.T) {
require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol())
}
}
func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) {
endpointSlice := makeEndpointSliceV1beta1()
adaptor := newEndpointSliceAdaptorFromV1beta1(endpointSlice)
require.Equal(t, endpointSlice.ObjectMeta.Name, adaptor.name())
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
require.Equal(t, endpointSlice.Endpoints[i].Hostname, endpointAdaptor.hostname())
require.Equal(t, endpointSlice.Endpoints[i].Conditions.Ready, endpointAdaptor.conditions().ready())
require.Equal(t, endpointSlice.Endpoints[i].Conditions.Serving, endpointAdaptor.conditions().serving())
require.Equal(t, endpointSlice.Endpoints[i].Conditions.Terminating, endpointAdaptor.conditions().terminating())
require.Equal(t, endpointSlice.Endpoints[i].TargetRef, endpointAdaptor.targetRef())
require.Equal(t, endpointSlice.Endpoints[i].Topology, endpointAdaptor.topology())
}
for i, portAdaptor := range adaptor.ports() {
require.Equal(t, endpointSlice.Ports[i].Name, portAdaptor.name())
require.Equal(t, endpointSlice.Ports[i].Port, portAdaptor.port())
require.EqualValues(t, endpointSlice.Ports[i].Protocol, portAdaptor.protocol())
require.Equal(t, endpointSlice.Ports[i].AppProtocol, portAdaptor.appProtocol())
}
}

View file

@ -21,7 +21,6 @@ import (
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
"k8s.io/api/discovery/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
@ -114,62 +113,8 @@ func makeEndpointSliceV1() *v1.EndpointSlice {
}
}
func makeEndpointSliceV1beta1() *v1beta1.EndpointSlice {
return &v1beta1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
Namespace: "default",
Labels: map[string]string{
v1beta1.LabelServiceName: "testendpoints",
},
Annotations: map[string]string{
"test.annotation": "test",
},
},
AddressType: v1beta1.AddressTypeIPv4,
Ports: []v1beta1.EndpointPort{
{
Name: strptr("testport"),
Port: int32ptr(9000),
Protocol: protocolptr(corev1.ProtocolTCP),
},
},
Endpoints: []v1beta1.Endpoint{
{
Addresses: []string{"1.2.3.4"},
Hostname: strptr("testendpoint1"),
}, {
Addresses: []string{"2.3.4.5"},
Conditions: v1beta1.EndpointConditions{
Ready: boolptr(true),
Serving: boolptr(true),
Terminating: boolptr(false),
},
}, {
Addresses: []string{"3.4.5.6"},
Conditions: v1beta1.EndpointConditions{
Ready: boolptr(false),
Serving: boolptr(true),
Terminating: boolptr(true),
},
}, {
Addresses: []string{"4.5.6.7"},
Conditions: v1beta1.EndpointConditions{
Ready: boolptr(true),
Serving: boolptr(true),
Terminating: boolptr(false),
},
TargetRef: &corev1.ObjectReference{
Kind: "Node",
Name: "barbaz",
},
},
},
}
}
func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.25.0")
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{
discovery: n,
@ -249,71 +194,6 @@ func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) {
}.Run(t)
}
func TestEndpointSliceDiscoveryBeforeRunV1beta1(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "1.20.0")
k8sDiscoveryTest{
discovery: n,
beforeRun: func() {
obj := makeEndpointSliceV1beta1()
c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
},
expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testendpoints": {
Targets: []model.LabelSet{
{
"__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "2.3.4.5:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "3.4.5.6:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
},
Source: "endpointslice/default/testendpoints",
},
},
}.Run(t)
}
func TestEndpointSliceDiscoveryAdd(t *testing.T) {
obj := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
@ -353,25 +233,25 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
PodIP: "1.2.3.4",
},
}
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.20.0", obj)
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, obj)
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := &v1beta1.EndpointSlice{
obj := &v1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
Namespace: "default",
},
AddressType: v1beta1.AddressTypeIPv4,
Ports: []v1beta1.EndpointPort{
AddressType: v1.AddressTypeIPv4,
Ports: []v1.EndpointPort{
{
Name: strptr("testport"),
Port: int32ptr(9000),
Protocol: protocolptr(corev1.ProtocolTCP),
},
},
Endpoints: []v1beta1.Endpoint{
Endpoints: []v1.Endpoint{
{
Addresses: []string{"4.3.2.1"},
TargetRef: &corev1.ObjectReference{
@ -379,13 +259,13 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
Name: "testpod",
Namespace: "default",
},
Conditions: v1beta1.EndpointConditions{
Conditions: v1.EndpointConditions{
Ready: boolptr(false),
},
},
},
}
c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
c.DiscoveryV1().EndpointSlices(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
},
expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{
@ -440,118 +320,34 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
}
func TestEndpointSliceDiscoveryDelete(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := makeEndpointSliceV1()
c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{})
c.DiscoveryV1().EndpointSlices(obj.Namespace).Delete(context.Background(), obj.Name, metav1.DeleteOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testendpoints": {
Source: "endpointslice/default/testendpoints",
Targets: []model.LabelSet{
{
"__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "",
"__meta_kubernetes_endpointslice_address_target_name": "",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpointslice_endpoint_node_name": "foobar",
"__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true",
"__meta_kubernetes_endpointslice_endpoint_topology_topology": "value",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "2.3.4.5:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "3.4.5.6:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: map[model.LabelName]model.LabelValue{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
},
},
},
}.Run(t)
}
func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := &v1beta1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
Namespace: "default",
},
AddressType: v1beta1.AddressTypeIPv4,
Ports: []v1beta1.EndpointPort{
{
Name: strptr("testport"),
Port: int32ptr(9000),
Protocol: protocolptr(corev1.ProtocolTCP),
},
},
Endpoints: []v1beta1.Endpoint{
{
Addresses: []string{"1.2.3.4"},
Hostname: strptr("testendpoint1"),
}, {
Addresses: []string{"2.3.4.5"},
Conditions: v1beta1.EndpointConditions{
Ready: boolptr(true),
},
},
},
}
c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
obj := makeEndpointSliceV1()
obj.ObjectMeta.Labels = nil
obj.ObjectMeta.Annotations = nil
obj.Endpoints = obj.Endpoints[0:2]
c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
@ -586,39 +382,11 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "3.4.5.6:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
},
},
},
@ -626,85 +394,18 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
}
func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := &v1beta1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
Namespace: "default",
},
AddressType: v1beta1.AddressTypeIPv4,
Ports: []v1beta1.EndpointPort{
{
Name: strptr("testport"),
Port: int32ptr(9000),
Protocol: protocolptr(corev1.ProtocolTCP),
},
},
Endpoints: []v1beta1.Endpoint{},
}
c.DiscoveryV1beta1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
obj := makeEndpointSliceV1()
obj.Endpoints = []v1.Endpoint{}
c.DiscoveryV1().EndpointSlices(obj.Namespace).Update(context.Background(), obj, metav1.UpdateOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testendpoints": {
Targets: []model.LabelSet{
{
"__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "",
"__meta_kubernetes_endpointslice_address_target_name": "",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpointslice_endpoint_node_name": "foobar",
"__meta_kubernetes_endpointslice_endpoint_topology_present_topology": "true",
"__meta_kubernetes_endpointslice_endpoint_topology_topology": "value",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "2.3.4.5:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1b",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "3.4.5.6:9000",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "false",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "true",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1c",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_endpoint_zone": "us-east-1a",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
@ -721,7 +422,7 @@ func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) {
}
func TestEndpointSliceDiscoveryWithService(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,
@ -813,7 +514,7 @@ func TestEndpointSliceDiscoveryWithService(t *testing.T) {
}
func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, "v1.21.0", makeEndpointSliceV1())
n, c := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{Names: []string{"default"}}, makeEndpointSliceV1())
k8sDiscoveryTest{
discovery: n,

View file

@ -24,7 +24,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@ -127,8 +126,6 @@ func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) b
switch ingress := o.(type) {
case *v1.Ingress:
ia = newIngressAdaptorFromV1(ingress)
case *v1beta1.Ingress:
ia = newIngressAdaptorFromV1beta1(ingress)
default:
level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err",
fmt.Errorf("received unexpected object: %v", o))

View file

@ -15,7 +15,6 @@ package kubernetes
import (
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -89,56 +88,3 @@ func (i *ingressRuleAdaptorV1) paths() []string {
}
func (i *ingressRuleAdaptorV1) host() string { return i.rule.Host }
// Adaptor for networking.k8s.io/v1beta1.
type ingressAdaptorV1Beta1 struct {
ingress *v1beta1.Ingress
}
func newIngressAdaptorFromV1beta1(ingress *v1beta1.Ingress) ingressAdaptor {
return &ingressAdaptorV1Beta1{ingress: ingress}
}
func (i *ingressAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta }
func (i *ingressAdaptorV1Beta1) name() string { return i.ingress.Name }
func (i *ingressAdaptorV1Beta1) namespace() string { return i.ingress.Namespace }
func (i *ingressAdaptorV1Beta1) labels() map[string]string { return i.ingress.Labels }
func (i *ingressAdaptorV1Beta1) annotations() map[string]string { return i.ingress.Annotations }
func (i *ingressAdaptorV1Beta1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
func (i *ingressAdaptorV1Beta1) tlsHosts() []string {
var hosts []string
for _, tls := range i.ingress.Spec.TLS {
hosts = append(hosts, tls.Hosts...)
}
return hosts
}
func (i *ingressAdaptorV1Beta1) rules() []ingressRuleAdaptor {
var rules []ingressRuleAdaptor
for _, rule := range i.ingress.Spec.Rules {
rules = append(rules, newIngressRuleAdaptorFromV1Beta1(rule))
}
return rules
}
type ingressRuleAdaptorV1Beta1 struct {
rule v1beta1.IngressRule
}
func newIngressRuleAdaptorFromV1Beta1(rule v1beta1.IngressRule) ingressRuleAdaptor {
return &ingressRuleAdaptorV1Beta1{rule: rule}
}
func (i *ingressRuleAdaptorV1Beta1) paths() []string {
rv := i.rule.IngressRuleValue
if rv.HTTP == nil {
return nil
}
paths := make([]string, len(rv.HTTP.Paths))
for n, p := range rv.HTTP.Paths {
paths[n] = p.Path
}
return paths
}
func (i *ingressRuleAdaptorV1Beta1) host() string { return i.rule.Host }

View file

@ -20,7 +20,6 @@ import (
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -89,60 +88,6 @@ func makeIngress(tls TLSMode) *v1.Ingress {
return ret
}
func makeIngressV1beta1(tls TLSMode) *v1beta1.Ingress {
ret := &v1beta1.Ingress{
ObjectMeta: metav1.ObjectMeta{
Name: "testingress",
Namespace: "default",
Labels: map[string]string{"test/label": "testvalue"},
Annotations: map[string]string{"test/annotation": "testannotationvalue"},
},
Spec: v1beta1.IngressSpec{
IngressClassName: classString("testclass"),
TLS: nil,
Rules: []v1beta1.IngressRule{
{
Host: "example.com",
IngressRuleValue: v1beta1.IngressRuleValue{
HTTP: &v1beta1.HTTPIngressRuleValue{
Paths: []v1beta1.HTTPIngressPath{
{Path: "/"},
{Path: "/foo"},
},
},
},
},
{
// No backend config, ignored
Host: "nobackend.example.com",
IngressRuleValue: v1beta1.IngressRuleValue{
HTTP: &v1beta1.HTTPIngressRuleValue{},
},
},
{
Host: "test.example.com",
IngressRuleValue: v1beta1.IngressRuleValue{
HTTP: &v1beta1.HTTPIngressRuleValue{
Paths: []v1beta1.HTTPIngressPath{{}},
},
},
},
},
},
}
switch tls {
case TLSYes:
ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com", "test.example.com"}}}
case TLSMixed:
ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"example.com"}}}
case TLSWildcard:
ret.Spec.TLS = []v1beta1.IngressTLS{{Hosts: []string{"*.example.com"}}}
}
return ret
}
func classString(v string) *string {
return &v
}
@ -212,20 +157,6 @@ func TestIngressDiscoveryAdd(t *testing.T) {
}.Run(t)
}
func TestIngressDiscoveryAddV1beta1(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0")
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := makeIngressV1beta1(TLSNo)
c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{})
},
expectedMaxItems: 1,
expectedRes: expectedTargetGroups("default", TLSNo),
}.Run(t)
}
func TestIngressDiscoveryAddTLS(t *testing.T) {
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
@ -240,20 +171,6 @@ func TestIngressDiscoveryAddTLS(t *testing.T) {
}.Run(t)
}
func TestIngressDiscoveryAddTLSV1beta1(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0")
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := makeIngressV1beta1(TLSYes)
c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{})
},
expectedMaxItems: 1,
expectedRes: expectedTargetGroups("default", TLSYes),
}.Run(t)
}
func TestIngressDiscoveryAddMixed(t *testing.T) {
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
@ -268,20 +185,6 @@ func TestIngressDiscoveryAddMixed(t *testing.T) {
}.Run(t)
}
func TestIngressDiscoveryAddMixedV1beta1(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}, "v1.18.0")
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
obj := makeIngressV1beta1(TLSMixed)
c.NetworkingV1beta1().Ingresses("default").Create(context.Background(), obj, metav1.CreateOptions{})
},
expectedMaxItems: 1,
expectedRes: expectedTargetGroups("default", TLSMixed),
}.Run(t)
}
func TestIngressDiscoveryNamespaces(t *testing.T) {
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
@ -303,27 +206,6 @@ func TestIngressDiscoveryNamespaces(t *testing.T) {
}.Run(t)
}
func TestIngressDiscoveryNamespacesV1beta1(t *testing.T) {
n, c := makeDiscoveryWithVersion(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, "v1.18.0")
expected := expectedTargetGroups("ns1", TLSNo)
for k, v := range expectedTargetGroups("ns2", TLSNo) {
expected[k] = v
}
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
for _, ns := range []string{"ns1", "ns2"} {
obj := makeIngressV1beta1(TLSNo)
obj.Namespace = ns
c.NetworkingV1beta1().Ingresses(obj.Namespace).Create(context.Background(), obj, metav1.CreateOptions{})
}
},
expectedMaxItems: 2,
expectedRes: expected,
}.Run(t)
}
func TestIngressDiscoveryOwnNamespace(t *testing.T) {
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{IncludeOwnNamespace: true})

View file

@ -25,8 +25,6 @@ import (
"github.com/prometheus/prometheus/util/strutil"
disv1beta1 "k8s.io/api/discovery/v1beta1"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
@ -36,12 +34,10 @@ import (
apiv1 "k8s.io/api/core/v1"
disv1 "k8s.io/api/discovery/v1"
networkv1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
@ -401,55 +397,22 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
switch d.role {
case RoleEndpointSlice:
// Check "networking.k8s.io/v1" availability with retries.
// If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility
var v1Supported bool
if retryOnError(ctx, 10*time.Second,
func() (err error) {
v1Supported, err = checkDiscoveryV1Supported(d.client)
if err != nil {
level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err)
}
return err
},
) {
d.Unlock()
return
}
for _, namespace := range namespaces {
var informer cache.SharedIndexInformer
if v1Supported {
e := d.client.DiscoveryV1().EndpointSlices(namespace)
elw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.endpointslice.field
options.LabelSelector = d.selectors.endpointslice.label
return e.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.endpointslice.field
options.LabelSelector = d.selectors.endpointslice.label
return e.Watch(ctx, options)
},
}
informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{})
} else {
e := d.client.DiscoveryV1beta1().EndpointSlices(namespace)
elw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.endpointslice.field
options.LabelSelector = d.selectors.endpointslice.label
return e.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.endpointslice.field
options.LabelSelector = d.selectors.endpointslice.label
return e.Watch(ctx, options)
},
}
informer = d.newEndpointSlicesByNodeInformer(elw, &disv1beta1.EndpointSlice{})
e := d.client.DiscoveryV1().EndpointSlices(namespace)
elw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.endpointslice.field
options.LabelSelector = d.selectors.endpointslice.label
return e.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.endpointslice.field
options.LabelSelector = d.selectors.endpointslice.label
return e.Watch(ctx, options)
},
}
informer = d.newEndpointSlicesByNodeInformer(elw, &disv1.EndpointSlice{})
s := d.client.CoreV1().Services(namespace)
slw := &cache.ListWatch{
@ -609,55 +572,22 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
go svc.informer.Run(ctx.Done())
}
case RoleIngress:
// Check "networking.k8s.io/v1" availability with retries.
// If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility
var v1Supported bool
if retryOnError(ctx, 10*time.Second,
func() (err error) {
v1Supported, err = checkNetworkingV1Supported(d.client)
if err != nil {
level.Error(d.logger).Log("msg", "Failed to check networking.k8s.io/v1 availability", "err", err)
}
return err
},
) {
d.Unlock()
return
}
for _, namespace := range namespaces {
var informer cache.SharedInformer
if v1Supported {
i := d.client.NetworkingV1().Ingresses(namespace)
ilw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.ingress.field
options.LabelSelector = d.selectors.ingress.label
return i.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.ingress.field
options.LabelSelector = d.selectors.ingress.label
return i.Watch(ctx, options)
},
}
informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled)
} else {
i := d.client.NetworkingV1beta1().Ingresses(namespace)
ilw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.ingress.field
options.LabelSelector = d.selectors.ingress.label
return i.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.ingress.field
options.LabelSelector = d.selectors.ingress.label
return i.Watch(ctx, options)
},
}
informer = d.mustNewSharedInformer(ilw, &v1beta1.Ingress{}, resyncDisabled)
i := d.client.NetworkingV1().Ingresses(namespace)
ilw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.ingress.field
options.LabelSelector = d.selectors.ingress.label
return i.List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.ingress.field
options.LabelSelector = d.selectors.ingress.label
return i.Watch(ctx, options)
},
}
informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled)
ingress := NewIngress(
log.With(d.logger, "role", "ingress"),
informer,
@ -720,20 +650,6 @@ func retryOnError(ctx context.Context, interval time.Duration, f func() error) (
}
}
func checkNetworkingV1Supported(client kubernetes.Interface) (bool, error) {
k8sVer, err := client.Discovery().ServerVersion()
if err != nil {
return false, err
}
semVer, err := utilversion.ParseSemantic(k8sVer.String())
if err != nil {
return false, err
}
// networking.k8s.io/v1 is available since Kubernetes v1.19
// https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md
return semVer.Major() >= 1 && semVer.Minor() >= 19, nil
}
func (d *Discovery) newNodeInformer(ctx context.Context) cache.SharedInformer {
nlw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
@ -834,19 +750,6 @@ func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object
}
}
}
case *disv1beta1.EndpointSlice:
for _, target := range e.Endpoints {
if target.TargetRef != nil {
switch target.TargetRef.Kind {
case "Pod":
if target.NodeName != nil {
nodes = append(nodes, *target.NodeName)
}
case "Node":
nodes = append(nodes, target.TargetRef.Name)
}
}
}
default:
return nil, fmt.Errorf("object is not an endpointslice")
}
@ -882,21 +785,6 @@ func (d *Discovery) mustNewSharedIndexInformer(lw cache.ListerWatcher, exampleOb
return informer
}
func checkDiscoveryV1Supported(client kubernetes.Interface) (bool, error) {
k8sVer, err := client.Discovery().ServerVersion()
if err != nil {
return false, err
}
semVer, err := utilversion.ParseSemantic(k8sVer.String())
if err != nil {
return false, err
}
// The discovery.k8s.io/v1beta1 API version of EndpointSlice will no longer be served in v1.25.
// discovery.k8s.io/v1 is available since Kubernetes v1.21
// https://kubernetes.io/docs/reference/using-api/deprecation-guide/#v1-25
return semVer.Major() >= 1 && semVer.Minor() >= 21, nil
}
func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, role Role) {
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_name")] = lv(objectMeta.Name)

View file

@ -46,7 +46,7 @@ func TestMain(m *testing.M) {
// makeDiscovery creates a kubernetes.Discovery instance for testing.
func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) {
return makeDiscoveryWithVersion(role, nsDiscovery, "v1.22.0", objects...)
return makeDiscoveryWithVersion(role, nsDiscovery, "v1.25.0", objects...)
}
// makeDiscoveryWithVersion creates a kubernetes.Discovery instance with the specified kubernetes version for testing.
@ -154,7 +154,7 @@ func (d k8sDiscoveryTest) Run(t *testing.T) {
// readResultWithTimeout reads all targetgroups from channel with timeout.
// It merges targetgroups by source and sends the result to result channel.
func readResultWithTimeout(t *testing.T, ctx context.Context, ch <-chan []*targetgroup.Group, max int, stopAfter time.Duration, resChan chan<- map[string]*targetgroup.Group) {
func readResultWithTimeout(t *testing.T, ctx context.Context, ch <-chan []*targetgroup.Group, maxGroups int, stopAfter time.Duration, resChan chan<- map[string]*targetgroup.Group) {
res := make(map[string]*targetgroup.Group)
timeout := time.After(stopAfter)
Loop:
@ -167,7 +167,7 @@ Loop:
}
res[tg.Source] = tg
}
if len(res) == max {
if len(res) == maxGroups {
// Reached max target groups we may get, break fast.
break Loop
}
@ -175,10 +175,10 @@ Loop:
// Because we use queue, an object that is created then
// deleted or updated may be processed only once.
// So possibly we may skip events, timed out here.
t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(res), max)
t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(res), maxGroups)
break Loop
case <-ctx.Done():
t.Logf("stopped, got %d (max: %d) items", len(res), max)
t.Logf("stopped, got %d (max: %d) items", len(res), maxGroups)
break Loop
}
}
@ -285,40 +285,6 @@ func TestRetryOnError(t *testing.T) {
}
}
func TestCheckNetworkingV1Supported(t *testing.T) {
tests := []struct {
version string
wantSupported bool
wantErr bool
}{
{version: "v1.18.0", wantSupported: false, wantErr: false},
{version: "v1.18.1", wantSupported: false, wantErr: false},
// networking v1 is supported since Kubernetes v1.19
{version: "v1.19.0", wantSupported: true, wantErr: false},
{version: "v1.20.0-beta.2", wantSupported: true, wantErr: false},
// error patterns
{version: "", wantSupported: false, wantErr: true},
{version: "<>", wantSupported: false, wantErr: true},
}
for _, tc := range tests {
tc := tc
t.Run(tc.version, func(t *testing.T) {
clientset := fake.NewSimpleClientset()
fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery)
fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: tc.version}
supported, err := checkNetworkingV1Supported(clientset)
if tc.wantErr {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.Equal(t, tc.wantSupported, supported)
})
}
}
func TestFailuresCountMetric(t *testing.T) {
tests := []struct {
role Role

View file

@ -1,332 +0,0 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package legacymanager
import (
"context"
"fmt"
"reflect"
"sync"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
type poolKey struct {
setName string
provider string
}
// provider holds a Discoverer instance, its configuration and its subscribers.
type provider struct {
name string
d discovery.Discoverer
subs []string
config interface{}
}
// NewManager is the Discovery Manager constructor.
func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]discovery.DiscovererMetrics, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
}
mgr := &Manager{
logger: logger,
syncCh: make(chan map[string][]*targetgroup.Group),
targets: make(map[poolKey]map[string]*targetgroup.Group),
discoverCancel: []context.CancelFunc{},
ctx: ctx,
updatert: 5 * time.Second,
triggerSend: make(chan struct{}, 1),
registerer: registerer,
sdMetrics: sdMetrics,
}
for _, option := range options {
option(mgr)
}
// Register the metrics.
// We have to do this after setting all options, so that the name of the Manager is set.
if metrics, err := discovery.NewManagerMetrics(registerer, mgr.name); err == nil {
mgr.metrics = metrics
} else {
level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
return nil
}
return mgr
}
// Name sets the name of the manager.
func Name(n string) func(*Manager) {
return func(m *Manager) {
m.mtx.Lock()
defer m.mtx.Unlock()
m.name = n
}
}
// Manager maintains a set of discovery providers and sends each update to a map channel.
// Targets are grouped by the target set name.
type Manager struct {
logger log.Logger
name string
mtx sync.RWMutex
ctx context.Context
discoverCancel []context.CancelFunc
// Some Discoverers(eg. k8s) send only the updates for a given target group
// so we use map[tg.Source]*targetgroup.Group to know which group to update.
targets map[poolKey]map[string]*targetgroup.Group
// providers keeps track of SD providers.
providers []*provider
// The sync channel sends the updates as a map where the key is the job value from the scrape config.
syncCh chan map[string][]*targetgroup.Group
// How long to wait before sending updates to the channel. The variable
// should only be modified in unit tests.
updatert time.Duration
// The triggerSend channel signals to the manager that new updates have been received from providers.
triggerSend chan struct{}
// A registerer for all service discovery metrics.
registerer prometheus.Registerer
metrics *discovery.Metrics
sdMetrics map[string]discovery.DiscovererMetrics
}
// Run starts the background processing.
func (m *Manager) Run() error {
go m.sender()
<-m.ctx.Done()
m.cancelDiscoverers()
return m.ctx.Err()
}
// SyncCh returns a read only channel used by all the clients to receive target updates.
func (m *Manager) SyncCh() <-chan map[string][]*targetgroup.Group {
return m.syncCh
}
// ApplyConfig removes all running discovery providers and starts new ones using the provided config.
func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
m.mtx.Lock()
defer m.mtx.Unlock()
for pk := range m.targets {
if _, ok := cfg[pk.setName]; !ok {
m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName)
}
}
m.cancelDiscoverers()
m.targets = make(map[poolKey]map[string]*targetgroup.Group)
m.providers = nil
m.discoverCancel = nil
failedCount := 0
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0)
}
m.metrics.FailedConfigs.Set(float64(failedCount))
for _, prov := range m.providers {
m.startProvider(m.ctx, prov)
}
return nil
}
// StartCustomProvider is used for sdtool. Only use this if you know what you're doing.
func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker discovery.Discoverer) {
p := &provider{
name: name,
d: worker,
subs: []string{name},
}
m.providers = append(m.providers, p)
m.startProvider(ctx, p)
}
func (m *Manager) startProvider(ctx context.Context, p *provider) {
level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs))
ctx, cancel := context.WithCancel(ctx)
updates := make(chan []*targetgroup.Group)
m.discoverCancel = append(m.discoverCancel, cancel)
go p.d.Run(ctx, updates)
go m.updater(ctx, p, updates)
}
func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targetgroup.Group) {
for {
select {
case <-ctx.Done():
return
case tgs, ok := <-updates:
m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
return
}
for _, s := range p.subs {
m.updateGroup(poolKey{setName: s, provider: p.name}, tgs)
}
select {
case m.triggerSend <- struct{}{}:
default:
}
}
}
}
func (m *Manager) sender() {
ticker := time.NewTicker(m.updatert)
defer ticker.Stop()
for {
select {
case <-m.ctx.Done():
return
case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
select {
case <-m.triggerSend:
m.metrics.SentUpdates.Inc()
select {
case m.syncCh <- m.allGroups():
default:
m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
default:
}
}
default:
}
}
}
}
func (m *Manager) cancelDiscoverers() {
for _, c := range m.discoverCancel {
c()
}
}
func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) {
m.mtx.Lock()
defer m.mtx.Unlock()
if _, ok := m.targets[poolKey]; !ok {
m.targets[poolKey] = make(map[string]*targetgroup.Group)
}
for _, tg := range tgs {
if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics.
m.targets[poolKey][tg.Source] = tg
}
}
}
func (m *Manager) allGroups() map[string][]*targetgroup.Group {
m.mtx.RLock()
defer m.mtx.RUnlock()
tSets := map[string][]*targetgroup.Group{}
n := map[string]int{}
for pkey, tsets := range m.targets {
for _, tg := range tsets {
// Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager'
// to signal that it needs to stop all scrape loops for this target set.
tSets[pkey.setName] = append(tSets[pkey.setName], tg)
n[pkey.setName] += len(tg.Targets)
}
}
for setName, v := range n {
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}
// registerProviders returns a number of failed SD config.
func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int {
var (
failed int
added bool
)
add := func(cfg discovery.Config) {
for _, p := range m.providers {
if reflect.DeepEqual(cfg, p.config) {
p.subs = append(p.subs, setName)
added = true
return
}
}
typ := cfg.Name()
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{
Logger: log.With(m.logger, "discovery", typ, "config", setName),
Metrics: m.sdMetrics[typ],
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)
failed++
return
}
m.providers = append(m.providers, &provider{
name: fmt.Sprintf("%s/%d", typ, len(m.providers)),
d: d,
config: cfg,
subs: []string{setName},
})
added = true
}
for _, cfg := range cfgs {
add(cfg)
}
if !added {
// Add an empty target group to force the refresh of the corresponding
// scrape pool and to notify the receiver that this target set has no
// current targets.
// It can happen because the combined set of SD configurations is empty
// or because we fail to instantiate all the SD configurations.
add(discovery.StaticConfig{{}})
}
return failed
}
// StaticProvider holds a list of target groups that never change.
type StaticProvider struct {
TargetGroups []*targetgroup.Group
}
// Run implements the Worker interface.
func (sd *StaticProvider) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// We still have to consider that the consumer exits right away in which case
// the context will be canceled.
select {
case ch <- sd.TargetGroups:
case <-ctx.Done():
}
close(ch)
}

File diff suppressed because it is too large Load diff

View file

@ -1,261 +0,0 @@
// Copyright 2020 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package legacymanager
import (
"errors"
"fmt"
"reflect"
"sort"
"strconv"
"strings"
"sync"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
const (
configFieldPrefix = "AUTO_DISCOVERY_"
staticConfigsKey = "static_configs"
staticConfigsFieldName = configFieldPrefix + staticConfigsKey
)
var (
configNames = make(map[string]discovery.Config)
configFieldNames = make(map[reflect.Type]string)
configFields []reflect.StructField
configTypesMu sync.Mutex
configTypes = make(map[reflect.Type]reflect.Type)
emptyStructType = reflect.TypeOf(struct{}{})
configsType = reflect.TypeOf(discovery.Configs{})
)
// RegisterConfig registers the given Config type for YAML marshaling and unmarshaling.
func RegisterConfig(config discovery.Config) {
registerConfig(config.Name()+"_sd_configs", reflect.TypeOf(config), config)
}
func init() {
// N.B.: static_configs is the only Config type implemented by default.
// All other types are registered at init by their implementing packages.
elemTyp := reflect.TypeOf(&targetgroup.Group{})
registerConfig(staticConfigsKey, elemTyp, discovery.StaticConfig{})
}
func registerConfig(yamlKey string, elemType reflect.Type, config discovery.Config) {
name := config.Name()
if _, ok := configNames[name]; ok {
panic(fmt.Sprintf("discovery: Config named %q is already registered", name))
}
configNames[name] = config
fieldName := configFieldPrefix + yamlKey // Field must be exported.
configFieldNames[elemType] = fieldName
// Insert fields in sorted order.
i := sort.Search(len(configFields), func(k int) bool {
return fieldName < configFields[k].Name
})
configFields = append(configFields, reflect.StructField{}) // Add empty field at end.
copy(configFields[i+1:], configFields[i:]) // Shift fields to the right.
configFields[i] = reflect.StructField{ // Write new field in place.
Name: fieldName,
Type: reflect.SliceOf(elemType),
Tag: reflect.StructTag(`yaml:"` + yamlKey + `,omitempty"`),
}
}
func getConfigType(out reflect.Type) reflect.Type {
configTypesMu.Lock()
defer configTypesMu.Unlock()
if typ, ok := configTypes[out]; ok {
return typ
}
// Initial exported fields map one-to-one.
var fields []reflect.StructField
for i, n := 0, out.NumField(); i < n; i++ {
switch field := out.Field(i); {
case field.PkgPath == "" && field.Type != configsType:
fields = append(fields, field)
default:
fields = append(fields, reflect.StructField{
Name: "_" + field.Name, // Field must be unexported.
PkgPath: out.PkgPath(),
Type: emptyStructType,
})
}
}
// Append extra config fields on the end.
fields = append(fields, configFields...)
typ := reflect.StructOf(fields)
configTypes[out] = typ
return typ
}
// UnmarshalYAMLWithInlineConfigs helps implement yaml.Unmarshal for structs
// that have a Configs field that should be inlined.
func UnmarshalYAMLWithInlineConfigs(out interface{}, unmarshal func(interface{}) error) error {
outVal := reflect.ValueOf(out)
if outVal.Kind() != reflect.Ptr {
return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out)
}
outVal = outVal.Elem()
if outVal.Kind() != reflect.Struct {
return fmt.Errorf("discovery: can only unmarshal into a struct pointer: %T", out)
}
outTyp := outVal.Type()
cfgTyp := getConfigType(outTyp)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
// Copy shared fields (defaults) to dynamic value.
var configs *discovery.Configs
for i, n := 0, outVal.NumField(); i < n; i++ {
if outTyp.Field(i).Type == configsType {
configs = outVal.Field(i).Addr().Interface().(*discovery.Configs)
continue
}
if cfgTyp.Field(i).PkgPath != "" {
continue // Field is unexported: ignore.
}
cfgVal.Field(i).Set(outVal.Field(i))
}
if configs == nil {
return fmt.Errorf("discovery: Configs field not found in type: %T", out)
}
// Unmarshal into dynamic value.
if err := unmarshal(cfgPtr.Interface()); err != nil {
return replaceYAMLTypeError(err, cfgTyp, outTyp)
}
// Copy shared fields from dynamic value.
for i, n := 0, outVal.NumField(); i < n; i++ {
if cfgTyp.Field(i).PkgPath != "" {
continue // Field is unexported: ignore.
}
outVal.Field(i).Set(cfgVal.Field(i))
}
var err error
*configs, err = readConfigs(cfgVal, outVal.NumField())
return err
}
func readConfigs(structVal reflect.Value, startField int) (discovery.Configs, error) {
var (
configs discovery.Configs
targets []*targetgroup.Group
)
for i, n := startField, structVal.NumField(); i < n; i++ {
field := structVal.Field(i)
if field.Kind() != reflect.Slice {
panic("discovery: internal error: field is not a slice")
}
for k := 0; k < field.Len(); k++ {
val := field.Index(k)
if val.IsZero() || (val.Kind() == reflect.Ptr && val.Elem().IsZero()) {
key := configFieldNames[field.Type().Elem()]
key = strings.TrimPrefix(key, configFieldPrefix)
return nil, fmt.Errorf("empty or null section in %s", key)
}
switch c := val.Interface().(type) {
case *targetgroup.Group:
// Add index to the static config target groups for unique identification
// within scrape pool.
c.Source = strconv.Itoa(len(targets))
// Coalesce multiple static configs into a single static config.
targets = append(targets, c)
case discovery.Config:
configs = append(configs, c)
default:
panic("discovery: internal error: slice element is not a Config")
}
}
}
if len(targets) > 0 {
configs = append(configs, discovery.StaticConfig(targets))
}
return configs, nil
}
// MarshalYAMLWithInlineConfigs helps implement yaml.Marshal for structs
// that have a Configs field that should be inlined.
func MarshalYAMLWithInlineConfigs(in interface{}) (interface{}, error) {
inVal := reflect.ValueOf(in)
for inVal.Kind() == reflect.Ptr {
inVal = inVal.Elem()
}
inTyp := inVal.Type()
cfgTyp := getConfigType(inTyp)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
// Copy shared fields to dynamic value.
var configs *discovery.Configs
for i, n := 0, inTyp.NumField(); i < n; i++ {
if inTyp.Field(i).Type == configsType {
configs = inVal.Field(i).Addr().Interface().(*discovery.Configs)
}
if cfgTyp.Field(i).PkgPath != "" {
continue // Field is unexported: ignore.
}
cfgVal.Field(i).Set(inVal.Field(i))
}
if configs == nil {
return nil, fmt.Errorf("discovery: Configs field not found in type: %T", in)
}
if err := writeConfigs(cfgVal, *configs); err != nil {
return nil, err
}
return cfgPtr.Interface(), nil
}
func writeConfigs(structVal reflect.Value, configs discovery.Configs) error {
targets := structVal.FieldByName(staticConfigsFieldName).Addr().Interface().(*[]*targetgroup.Group)
for _, c := range configs {
if sc, ok := c.(discovery.StaticConfig); ok {
*targets = append(*targets, sc...)
continue
}
fieldName, ok := configFieldNames[reflect.TypeOf(c)]
if !ok {
return fmt.Errorf("discovery: cannot marshal unregistered Config type: %T", c)
}
field := structVal.FieldByName(fieldName)
field.Set(reflect.Append(field, reflect.ValueOf(c)))
}
return nil
}
func replaceYAMLTypeError(err error, oldTyp, newTyp reflect.Type) error {
var e *yaml.TypeError
if errors.As(err, &e) {
oldStr := oldTyp.String()
newStr := newTyp.String()
for i, s := range e.Errors {
e.Errors[i] = strings.ReplaceAll(s, oldStr, newStr)
}
}
return err
}

View file

@ -64,7 +64,7 @@ func (p *Provider) Config() interface{} {
return p.config
}
// Registers the metrics needed for SD mechanisms.
// CreateAndRegisterSDMetrics registers the metrics needed for SD mechanisms.
// Does not register the metrics for the Discovery Manager.
// TODO(ptodev): Add ability to unregister the metrics?
func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]DiscovererMetrics, error) {
@ -212,9 +212,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
m.metrics.FailedConfigs.Set(float64(failedCount))
var (
wg sync.WaitGroup
// keep shows if we keep any providers after reload.
keep bool
wg sync.WaitGroup
newProviders []*Provider
)
for _, prov := range m.providers {
@ -228,13 +226,12 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
continue
}
newProviders = append(newProviders, prov)
// refTargets keeps reference targets used to populate new subs' targets
// refTargets keeps reference targets used to populate new subs' targets as they should be the same.
var refTargets map[string]*targetgroup.Group
prov.mu.Lock()
m.targetsMtx.Lock()
for s := range prov.subs {
keep = true
refTargets = m.targets[poolKey{s, prov.name}]
// Remove obsolete subs' targets.
if _, ok := prov.newSubs[s]; !ok {
@ -267,7 +264,9 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
// While startProvider does pull the trigger, it may take some time to do so, therefore
// we pull the trigger as soon as possible so that downstream managers can populate their state.
// See https://github.com/prometheus/prometheus/pull/8639 for details.
if keep {
// This also helps making the downstream managers drop stale targets as soon as possible.
// See https://github.com/prometheus/prometheus/pull/13147 for details.
if len(m.providers) > 0 {
select {
case m.triggerSend <- struct{}{}:
default:
@ -288,7 +287,9 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D
name: {},
},
}
m.mtx.Lock()
m.providers = append(m.providers, p)
m.mtx.Unlock()
m.startProvider(ctx, p)
}
@ -393,8 +394,16 @@ func (m *Manager) updateGroup(poolKey poolKey, tgs []*targetgroup.Group) {
m.targets[poolKey] = make(map[string]*targetgroup.Group)
}
for _, tg := range tgs {
if tg != nil { // Some Discoverers send nil target group so need to check for it to avoid panics.
// Some Discoverers send nil target group so need to check for it to avoid panics.
if tg == nil {
continue
}
if len(tg.Targets) > 0 {
m.targets[poolKey][tg.Source] = tg
} else {
// The target group is empty, drop the corresponding entry to avoid leaks.
// In case the group yielded targets before, allGroups() will take care of making consumers drop them.
delete(m.targets[poolKey], tg.Source)
}
}
}
@ -403,19 +412,33 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
tSets := map[string][]*targetgroup.Group{}
n := map[string]int{}
m.mtx.RLock()
m.targetsMtx.Lock()
defer m.targetsMtx.Unlock()
for pkey, tsets := range m.targets {
for _, tg := range tsets {
// Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager'
// to signal that it needs to stop all scrape loops for this target set.
tSets[pkey.setName] = append(tSets[pkey.setName], tg)
n[pkey.setName] += len(tg.Targets)
for _, p := range m.providers {
p.mu.RLock()
for s := range p.subs {
// Send empty lists for subs without any targets to make sure old stale targets are dropped by consumers.
// See: https://github.com/prometheus/prometheus/issues/12858 for details.
if _, ok := tSets[s]; !ok {
tSets[s] = []*targetgroup.Group{}
n[s] = 0
}
if tsets, ok := m.targets[poolKey{s, p.name}]; ok {
for _, tg := range tsets {
tSets[s] = append(tSets[s], tg)
n[s] += len(tg.Targets)
}
}
}
p.mu.RUnlock()
}
m.targetsMtx.Unlock()
m.mtx.RUnlock()
for setName, v := range n {
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}

View file

@ -939,11 +939,13 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
discoveryManager.ApplyConfig(c)
// Original targets should be present as soon as possible.
// An empty list should be sent for prometheus2 to drop any stale targets
syncedTargets = <-discoveryManager.SyncCh()
mu.Unlock()
require.Len(t, syncedTargets, 1)
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Len(t, syncedTargets["prometheus"], 1)
require.Empty(t, syncedTargets["prometheus2"])
// prometheus2 configs should be ready on second sync.
syncedTargets = <-discoveryManager.SyncCh()
@ -1049,8 +1051,8 @@ func TestDiscovererConfigs(t *testing.T) {
}
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
// removing all targets from the static_configs sends an update with empty targetGroups.
// This is required to signal the receiver that this target set has no current targets.
// removing all targets from the static_configs cleans the corresponding targetGroups entries to avoid leaks and sends an empty update.
// The update is required to signal the consumers that the previous targets should be dropped.
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -1083,16 +1085,14 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
require.Len(t, discoveryManager.targets, 1)
p = pk("static", "prometheus", 1)
targetGroups, ok := discoveryManager.targets[p]
require.True(t, ok, "'%v' should be present in target groups", p)
group, ok := targetGroups[""]
require.True(t, ok, "missing '' key in target groups %v", targetGroups)
require.Empty(t, group.Targets, "Invalid number of targets.")
require.Len(t, syncedTargets, 1)
require.Len(t, syncedTargets["prometheus"], 1)
require.Nil(t, syncedTargets["prometheus"][0].Labels)
require.True(t, ok, "'%v' should be present in targets", p)
// Otherwise the targetGroups will leak, see https://github.com/prometheus/prometheus/issues/12436.
require.Empty(t, targetGroups, 0, "'%v' should no longer have any associated target groups", p)
require.Len(t, syncedTargets, 1, "an update with no targetGroups should still be sent.")
require.Empty(t, syncedTargets["prometheus"], 0)
}
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
@ -1275,6 +1275,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
Targets: []model.LabelSet{{"__instance__": "1"}},
},
},
"mock1": {},
},
},
{

View file

@ -17,7 +17,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
// Metric vectors for the "refresh" package.
// RefreshMetricsVecs are metric vectors for the "refresh" package.
// We define them here in the "discovery" package in order to avoid a cyclic dependency between
// "discovery" and "refresh".
type RefreshMetricsVecs struct {

View file

@ -19,6 +19,7 @@ import (
"net"
"net/http"
"net/url"
"sort"
"strconv"
"time"
@ -251,28 +252,26 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
}
if d.matchFirstNetwork && len(networks) > 1 {
// Match user defined network
if containerNetworkMode.IsUserDefined() {
networkMode := string(containerNetworkMode)
networks = map[string]*network.EndpointSettings{networkMode: networks[networkMode]}
} else {
// Get first network if container network mode has "none" value.
// This case appears under certain condition:
// 1. Container created with network set to "--net=none".
// 2. Disconnect network "none".
// 3. Reconnect network with user defined networks.
var first string
for k, n := range networks {
if n != nil {
first = k
break
}
// Sort networks by name and take first non-nil network.
keys := make([]string, 0, len(networks))
for k, n := range networks {
if n != nil {
keys = append(keys, k)
}
networks = map[string]*network.EndpointSettings{first: networks[first]}
}
if len(keys) > 0 {
sort.Strings(keys)
firstNetworkMode := keys[0]
firstNetwork := networks[firstNetworkMode]
networks = map[string]*network.EndpointSettings{firstNetworkMode: firstNetwork}
}
}
for _, n := range networks {
if n == nil {
continue
}
var added bool
for _, p := range c.Ports {

View file

@ -60,9 +60,9 @@ host: %s
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Len(t, tg.Targets, 6)
require.Len(t, tg.Targets, 8)
for i, lbls := range []model.LabelSet{
expected := []model.LabelSet{
{
"__address__": "172.19.0.2:9100",
"__meta_docker_container_id": "c301b928faceb1a18fe379f6bc178727ef920bb30b0f9b8592b32b36255a0eca",
@ -163,7 +163,43 @@ host: %s
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "9104",
},
} {
{
"__address__": "172.20.0.3:3306",
"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"__meta_docker_container_label_com_docker_compose_project": "dockersd",
"__meta_docker_container_label_com_docker_compose_service": "mysql",
"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
"__meta_docker_container_name": "/dockersd_multi_networks",
"__meta_docker_container_network_mode": "dockersd_private_none",
"__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
"__meta_docker_network_ingress": "false",
"__meta_docker_network_internal": "false",
"__meta_docker_network_ip": "172.20.0.3",
"__meta_docker_network_name": "dockersd_private",
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "3306",
},
{
"__address__": "172.20.0.3:33060",
"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"__meta_docker_container_label_com_docker_compose_project": "dockersd",
"__meta_docker_container_label_com_docker_compose_service": "mysql",
"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
"__meta_docker_container_name": "/dockersd_multi_networks",
"__meta_docker_container_network_mode": "dockersd_private_none",
"__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
"__meta_docker_network_ingress": "false",
"__meta_docker_network_internal": "false",
"__meta_docker_network_ip": "172.20.0.3",
"__meta_docker_network_name": "dockersd_private",
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "33060",
},
}
sortFunc(expected)
sortFunc(tg.Targets)
for i, lbls := range expected {
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
require.Equal(t, lbls, tg.Targets[i])
})
@ -202,13 +238,8 @@ host: %s
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Len(t, tg.Targets, 9)
require.Len(t, tg.Targets, 13)
sortFunc := func(labelSets []model.LabelSet) {
sort.Slice(labelSets, func(i, j int) bool {
return labelSets[i]["__address__"] < labelSets[j]["__address__"]
})
}
expected := []model.LabelSet{
{
"__address__": "172.19.0.2:9100",
@ -359,6 +390,70 @@ host: %s
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "9104",
},
{
"__address__": "172.20.0.3:3306",
"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"__meta_docker_container_label_com_docker_compose_project": "dockersd",
"__meta_docker_container_label_com_docker_compose_service": "mysql",
"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
"__meta_docker_container_name": "/dockersd_multi_networks",
"__meta_docker_container_network_mode": "dockersd_private_none",
"__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
"__meta_docker_network_ingress": "false",
"__meta_docker_network_internal": "false",
"__meta_docker_network_ip": "172.20.0.3",
"__meta_docker_network_name": "dockersd_private",
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "3306",
},
{
"__address__": "172.20.0.3:33060",
"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"__meta_docker_container_label_com_docker_compose_project": "dockersd",
"__meta_docker_container_label_com_docker_compose_service": "mysql",
"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
"__meta_docker_container_name": "/dockersd_multi_networks",
"__meta_docker_container_network_mode": "dockersd_private_none",
"__meta_docker_network_id": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
"__meta_docker_network_ingress": "false",
"__meta_docker_network_internal": "false",
"__meta_docker_network_ip": "172.20.0.3",
"__meta_docker_network_name": "dockersd_private",
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "33060",
},
{
"__address__": "172.21.0.3:3306",
"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"__meta_docker_container_label_com_docker_compose_project": "dockersd",
"__meta_docker_container_label_com_docker_compose_service": "mysql",
"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
"__meta_docker_container_name": "/dockersd_multi_networks",
"__meta_docker_container_network_mode": "dockersd_private_none",
"__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
"__meta_docker_network_ingress": "false",
"__meta_docker_network_internal": "false",
"__meta_docker_network_ip": "172.21.0.3",
"__meta_docker_network_name": "dockersd_private1",
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "3306",
},
{
"__address__": "172.21.0.3:33060",
"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"__meta_docker_container_label_com_docker_compose_project": "dockersd",
"__meta_docker_container_label_com_docker_compose_service": "mysql",
"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
"__meta_docker_container_name": "/dockersd_multi_networks",
"__meta_docker_container_network_mode": "dockersd_private_none",
"__meta_docker_network_id": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
"__meta_docker_network_ingress": "false",
"__meta_docker_network_internal": "false",
"__meta_docker_network_ip": "172.21.0.3",
"__meta_docker_network_name": "dockersd_private1",
"__meta_docker_network_scope": "local",
"__meta_docker_port_private": "33060",
},
}
sortFunc(expected)
@ -370,3 +465,9 @@ host: %s
})
}
}
func sortFunc(labelSets []model.LabelSet) {
sort.Slice(labelSets, func(i, j int) bool {
return labelSets[i]["__address__"] < labelSets[j]["__address__"]
})
}

View file

@ -98,7 +98,7 @@ func (m *SDMock) SetupHandlers() {
if len(query) == 2 {
h := sha1.New()
h.Write([]byte(query[1]))
// Avoing long filenames for Windows.
// Avoiding long filenames for Windows.
f += "__" + base64.URLEncoding.EncodeToString(h.Sum(nil))[:10]
}
}

View file

@ -17,7 +17,7 @@ import (
"context"
"strconv"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/client"
"github.com/prometheus/prometheus/util/strutil"
@ -34,7 +34,7 @@ const (
)
func getNetworksLabels(ctx context.Context, client *client.Client, labelPrefix string) (map[string]map[string]string, error) {
networks, err := client.NetworkList(ctx, types.NetworkListOptions{})
networks, err := client.NetworkList(ctx, network.ListOptions{})
if err != nil {
return nil, err
}

View file

@ -228,5 +228,74 @@
"Networks": {}
},
"Mounts": []
},
{
"Id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
"Names": [
"/dockersd_multi_networks"
],
"Image": "mysql:5.7.29",
"ImageID": "sha256:16ae2f4625ba63a250462bedeece422e741de9f0caf3b1d89fd5b257aca80cd1",
"Command": "mysqld",
"Created": 1616273136,
"Ports": [
{
"PrivatePort": 3306,
"Type": "tcp"
},
{
"PrivatePort": 33060,
"Type": "tcp"
}
],
"Labels": {
"com.docker.compose.project": "dockersd",
"com.docker.compose.service": "mysql",
"com.docker.compose.version": "2.2.2"
},
"State": "running",
"Status": "Up 40 seconds",
"HostConfig": {
"NetworkMode": "dockersd_private_none"
},
"NetworkSettings": {
"Networks": {
"dockersd_private": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
"EndpointID": "972d6807997369605ace863af58de6cb90c787a5bf2ffc4105662d393ae539b7",
"Gateway": "172.20.0.1",
"IPAddress": "172.20.0.3",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:14:00:02",
"DriverOpts": null
},
"dockersd_private1": {
"IPAMConfig": {},
"Links": null,
"Aliases": [
"mysql",
"mysql",
"f9ade4b83199"
],
"NetworkID": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
"EndpointID": "91a98405344ee1cb7d977cafabe634837876651544b32da20a5e0155868e6f5f",
"Gateway": "172.21.0.1",
"IPAddress": "172.21.0.3",
"IPPrefixLen": 24,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:15:00:02",
"DriverOpts": null
}
}
},
"Mounts": []
}
]

View file

@ -224,7 +224,7 @@
"Args": [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
"--storage.tsdb.retention=24h"
"--storage.tsdb.retention.time=24h"
],
"Privileges": {
"CredentialSpec": null,

View file

@ -973,7 +973,7 @@
"Args": [
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
"--storage.tsdb.retention=24h"
"--storage.tsdb.retention.time=24h"
],
"Privileges": {
"CredentialSpec": null,

View file

@ -19,8 +19,8 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
// A utility to be used by implementations of discovery.Discoverer
// which need to manage the lifetime of their metrics.
// MetricRegisterer is used by implementations of discovery.Discoverer that need
// to manage the lifetime of their metrics.
type MetricRegisterer interface {
RegisterMetrics() error
UnregisterMetrics()
@ -34,7 +34,7 @@ type metricRegistererImpl struct {
var _ MetricRegisterer = &metricRegistererImpl{}
// Creates an instance of a MetricRegisterer.
// NewMetricRegisterer creates an instance of a MetricRegisterer.
// Typically called inside the implementation of the NewDiscoverer() method.
func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer {
return &metricRegistererImpl{

View file

@ -41,10 +41,10 @@ const (
uyuniMetaLabelPrefix = model.MetaLabelPrefix + "uyuni_"
uyuniLabelMinionHostname = uyuniMetaLabelPrefix + "minion_hostname"
uyuniLabelPrimaryFQDN = uyuniMetaLabelPrefix + "primary_fqdn"
uyuniLablelSystemID = uyuniMetaLabelPrefix + "system_id"
uyuniLablelGroups = uyuniMetaLabelPrefix + "groups"
uyuniLablelEndpointName = uyuniMetaLabelPrefix + "endpoint_name"
uyuniLablelExporter = uyuniMetaLabelPrefix + "exporter"
uyuniLabelSystemID = uyuniMetaLabelPrefix + "system_id"
uyuniLabelGroups = uyuniMetaLabelPrefix + "groups"
uyuniLabelEndpointName = uyuniMetaLabelPrefix + "endpoint_name"
uyuniLabelExporter = uyuniMetaLabelPrefix + "exporter"
uyuniLabelProxyModule = uyuniMetaLabelPrefix + "proxy_module"
uyuniLabelMetricsPath = uyuniMetaLabelPrefix + "metrics_path"
uyuniLabelScheme = uyuniMetaLabelPrefix + "scheme"
@ -270,10 +270,10 @@ func (d *Discovery) getEndpointLabels(
model.AddressLabel: model.LabelValue(addr),
uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname),
uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN),
uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)),
uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)),
uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName),
uyuniLablelExporter: model.LabelValue(endpoint.ExporterName),
uyuniLabelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)),
uyuniLabelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)),
uyuniLabelEndpointName: model.LabelValue(endpoint.EndpointName),
uyuniLabelExporter: model.LabelValue(endpoint.ExporterName),
uyuniLabelProxyModule: model.LabelValue(endpoint.Module),
uyuniLabelMetricsPath: model.LabelValue(endpoint.Path),
uyuniLabelScheme: model.LabelValue(scheme),

View file

@ -15,24 +15,25 @@ The Prometheus monitoring server
| <code class="text-nowrap">-h</code>, <code class="text-nowrap">--help</code> | Show context-sensitive help (also try --help-long and --help-man). | |
| <code class="text-nowrap">--version</code> | Show application version. | |
| <code class="text-nowrap">--config.file</code> | Prometheus configuration file path. | `prometheus.yml` |
| <code class="text-nowrap">--web.listen-address</code> | Address to listen on for UI, API, and telemetry. | `0.0.0.0:9090` |
| <code class="text-nowrap">--config.auto-reload-interval</code> | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` |
| <code class="text-nowrap">--web.listen-address</code> <code class="text-nowrap">...<code class="text-nowrap"> | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` |
| <code class="text-nowrap">--auto-gomemlimit.ratio</code> | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` |
| <code class="text-nowrap">--web.config.file</code> | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. | |
| <code class="text-nowrap">--web.read-timeout</code> | Maximum duration before timing out read of the request, and closing idle connections. | `5m` |
| <code class="text-nowrap">--web.max-connections</code> | Maximum number of simultaneous connections. | `512` |
| <code class="text-nowrap">--web.max-connections</code> | Maximum number of simultaneous connections across all listeners. | `512` |
| <code class="text-nowrap">--web.external-url</code> | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. | |
| <code class="text-nowrap">--web.route-prefix</code> | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. | |
| <code class="text-nowrap">--web.user-assets</code> | Path to static asset directory, available at /user. | |
| <code class="text-nowrap">--web.enable-lifecycle</code> | Enable shutdown and reload via HTTP request. | `false` |
| <code class="text-nowrap">--web.enable-admin-api</code> | Enable API endpoints for admin control actions. | `false` |
| <code class="text-nowrap">--web.enable-remote-write-receiver</code> | Enable API endpoint accepting remote write requests. | `false` |
| <code class="text-nowrap">--web.remote-write-receiver.accepted-protobuf-messages</code> | List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: prometheus.WriteRequest, io.prometheus.write.v2.Request | `prometheus.WriteRequest` |
| <code class="text-nowrap">--web.console.templates</code> | Path to the console template directory, available at /consoles. | `consoles` |
| <code class="text-nowrap">--web.console.libraries</code> | Path to the console library directory. | `console_libraries` |
| <code class="text-nowrap">--web.page-title</code> | Document title of Prometheus instance. | `Prometheus Time Series Collection and Processing Server` |
| <code class="text-nowrap">--web.cors.origin</code> | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1|domain2)\.com' | `.*` |
| <code class="text-nowrap">--web.cors.origin</code> | Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1\|domain2)\.com' | `.*` |
| <code class="text-nowrap">--storage.tsdb.path</code> | Base path for metrics storage. Use with server mode only. | `data/` |
| <code class="text-nowrap">--storage.tsdb.retention</code> | [DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use "storage.tsdb.retention.time" instead. Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.retention.time</code> | How long to retain samples in storage. When this flag is set it overrides "storage.tsdb.retention". If neither this flag nor "storage.tsdb.retention" nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.retention.time</code> | How long to retain samples in storage. If neither this flag nor "storage.tsdb.retention.size" is set, the retention time defaults to 15d. Units Supported: y, w, d, h, m, s, ms. Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.retention.size</code> | Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Use with server mode only. | |
| <code class="text-nowrap">--storage.tsdb.no-lockfile</code> | Do not create lockfile in data directory. Use with server mode only. | `false` |
| <code class="text-nowrap">--storage.tsdb.head-chunks-write-queue-size</code> | Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental. Use with server mode only. | `0` |
@ -55,7 +56,8 @@ The Prometheus monitoring server
| <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
| <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
| <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. | |
| <code class="text-nowrap">--agent</code> | Run Prometheus in 'Agent mode'. | |
| <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
| <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |

View file

@ -15,7 +15,7 @@ Tooling for the Prometheus monitoring system.
| <code class="text-nowrap">-h</code>, <code class="text-nowrap">--help</code> | Show context-sensitive help (also try --help-long and --help-man). |
| <code class="text-nowrap">--version</code> | Show application version. |
| <code class="text-nowrap">--experimental</code> | Enable experimental commands. |
| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. |
| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. |
@ -281,7 +281,7 @@ Run series query.
| Flag | Description |
| --- | --- |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. |
| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. |
| <code class="text-nowrap">--start</code> | Start time (RFC3339 or Unix timestamp). |
| <code class="text-nowrap">--end</code> | End time (RFC3339 or Unix timestamp). |
@ -309,7 +309,7 @@ Run labels query.
| --- | --- |
| <code class="text-nowrap">--start</code> | Start time (RFC3339 or Unix timestamp). |
| <code class="text-nowrap">--end</code> | End time (RFC3339 or Unix timestamp). |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. |
| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. |
@ -338,7 +338,7 @@ Run queries against your Prometheus to analyze the usage pattern of certain metr
| <code class="text-nowrap">--type</code> | Type of metric: histogram. | |
| <code class="text-nowrap">--duration</code> | Time frame to analyze. | `1h` |
| <code class="text-nowrap">--time</code> | Query time (RFC3339 or Unix timestamp), defaults to now. | |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | |
| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. | |
@ -442,6 +442,15 @@ Unit testing.
#### Flags
| Flag | Description |
| --- | --- |
| <code class="text-nowrap">--junit</code> | File path to store JUnit XML test results. |
##### `promtool test rules`
Unit tests for rules.
@ -452,7 +461,7 @@ Unit tests for rules.
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--run</code> | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | |
| <code class="text-nowrap">--run</code> <code class="text-nowrap">...<code class="text-nowrap"> | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. | |
| <code class="text-nowrap">--diff</code> | [Experimental] Print colored differential output between expected & received output. | `false` |
@ -566,10 +575,10 @@ Dump samples from a TSDB.
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--sandbox-dir-root</code> | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` |
| <code class="text-nowrap">--sandbox-dir-root</code> | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | |
| <code class="text-nowrap">--min-time</code> | Minimum timestamp to dump. | `-9223372036854775808` |
| <code class="text-nowrap">--max-time</code> | Maximum timestamp to dump. | `9223372036854775807` |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
@ -593,10 +602,10 @@ Dump samples from a TSDB.
| Flag | Description | Default |
| --- | --- | --- |
| <code class="text-nowrap">--sandbox-dir-root</code> | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` |
| <code class="text-nowrap">--sandbox-dir-root</code> | Root directory where a sandbox directory will be created, this sandbox is used in case WAL replay generates chunks (default is the database path). The sandbox is cleaned up at the end. | |
| <code class="text-nowrap">--min-time</code> | Minimum timestamp to dump. | `-9223372036854775808` |
| <code class="text-nowrap">--max-time</code> | Maximum timestamp to dump. | `9223372036854775807` |
| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
@ -632,6 +641,15 @@ Import samples from OpenMetrics input and produce TSDB blocks. Please refer to t
###### Flags
| Flag | Description |
| --- | --- |
| <code class="text-nowrap">--label</code> | Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value |
###### Arguments
| Argument | Description | Default | Required |

File diff suppressed because it is too large Load diff

View file

@ -92,7 +92,7 @@ series: <string>
#
# Native histogram notation:
# Native histograms can be used instead of floating point numbers using the following notation:
# {{schema:1 sum:-0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5}}
# {{schema:1 sum:-0.3 count:3.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5 counter_reset_hint:gauge}}
# Native histograms support the same expanding notation as floating point numbers, i.e. 'axn', 'a+bxn' and 'a-bxn'.
# All properties are optional and default to 0. The order is not important. The following properties are supported:
# - schema (int):
@ -119,6 +119,8 @@ series: <string>
# Observation counts in negative buckets. Each represents an absolute count.
# - n_offset (int):
# The starting index of the first entry in the negative buckets.
# - counter_reset_hint (one of 'unknown', 'reset', 'not_reset' or 'gauge')
# The counter reset hint associated with this histogram. Defaults to 'unknown' if not set.
values: <string>
```

View file

@ -20,14 +20,6 @@ values according to the values of the current environment variables. References
to undefined variables are replaced by the empty string.
The `$` character can be escaped by using `$$`.
## Remote Write Receiver
`--enable-feature=remote-write-receiver`
The remote write receiver allows Prometheus to accept remote write requests from other Prometheus servers. More details can be found [here](storage.md#overview).
Activating the remote write receiver via a feature flag is deprecated. Use `--web.enable-remote-write-receiver` instead. This feature flag will be ignored in future versions of Prometheus.
## Exemplars storage
`--enable-feature=exemplar-storage`
@ -55,30 +47,6 @@ When enabled, for each instance scrape, Prometheus stores a sample in the follow
to find out how close they are to reaching the limit with `scrape_samples_post_metric_relabeling / scrape_sample_limit`. Note that `scrape_sample_limit` can be zero if there is no limit configured, which means that the query above can return `+Inf` for targets with no limit (as we divide by zero). If you want to query only for targets that do have a sample limit use this query: `scrape_samples_post_metric_relabeling / (scrape_sample_limit > 0)`.
- `scrape_body_size_bytes`. The uncompressed size of the most recent scrape response, if successful. Scrapes failing because `body_size_limit` is exceeded report `-1`, other scrape failures report `0`.
## New service discovery manager
`--enable-feature=new-service-discovery-manager`
When enabled, Prometheus uses a new service discovery manager that does not
restart unchanged discoveries upon reloading. This makes reloads faster and reduces
pressure on service discoveries' sources.
Users are encouraged to test the new service discovery manager and report any
issues upstream.
In future releases, this new service discovery manager will become the default and
this feature flag will be ignored.
## Prometheus agent
`--enable-feature=agent`
When enabled, Prometheus runs in agent mode. The agent mode is limited to
discovery, scrape and remote write.
This is useful when you do not need to query the Prometheus data locally, but
only from a central [remote endpoint](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
## Per-step stats
`--enable-feature=promql-per-step-stats`
@ -193,15 +161,16 @@ This should **only** be applied to metrics that currently produce such labels.
`--enable-feature=otlp-write-receiver`
The OTLP receiver allows Prometheus to accept [OpenTelemetry](https://opentelemetry.io/) metrics writes.
Prometheus is best used as a Pull based system, and staleness, `up` metric, and other Pull enabled features
Prometheus is best used as a Pull based system, and staleness, `up` metric, and other Pull enabled features
won't work when you push OTLP metrics.
## Experimental PromQL functions
`--enable-feature=promql-experimental-functions`
Enables PromQL functions that are considered experimental and whose name or
semantics could change.
Enables PromQL functions that are considered experimental. These functions
might change their name, syntax, or semantics. They might also get removed
entirely.
## Created Timestamps Zero Injection
@ -224,3 +193,54 @@ When the `concurrent-rule-eval` feature flag is enabled, rules without any depen
This has the potential to improve rule group evaluation latency and resource utilization at the expense of adding more concurrent query load.
The number of concurrent rule evaluations can be configured with `--rules.max-concurrent-rule-evals`, which is set to `4` by default.
## Serve old Prometheus UI
Fall back to serving the old (Prometheus 2.x) web UI instead of the new UI. The new UI that was released as part of Prometheus 3.0 is a complete rewrite and aims to be cleaner, less cluttered, and more modern under the hood. However, it is not fully feature complete and battle-tested yet, so some users may still prefer using the old UI.
`--enable-feature=old-ui`
## Metadata WAL Records
`--enable-feature=metadata-wal-records`
When enabled, Prometheus will store metadata in-memory and keep track of
metadata changes as WAL records on a per-series basis.
This must be used if
you are also using remote write 2.0 as it will only gather metadata from the WAL.
## Delay compaction start time
`--enable-feature=delayed-compaction`
A random offset, up to `10%` of the chunk range, is added to the Head compaction start time. This assists Prometheus instances in avoiding simultaneous compactions and reduces the load on shared resources.
Only auto Head compactions and the operations directly resulting from them are subject to this delay.
In the event of multiple consecutive Head compactions being possible, only the first compaction experiences this delay.
Note that during this delay, the Head continues its usual operations, which include serving and appending series.
Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place.
## Delay __name__ label removal for PromQL engine
`--enable-feature=promql-delayed-name-removal`
When enabled, Prometheus will change the way in which the `__name__` label is removed from PromQL query results (for functions and expressions for which this is necessary). Specifically, it will delay the removal to the last step of the query evaluation, instead of every time an expression or function creating derived metrics is evaluated.
This allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the "vector cannot contain metrics with the same labelset" error, which can happen when applying a regex-matcher to the `__name__` label.
## Auto Reload Config
`--enable-feature=auto-reload-config`
When enabled, Prometheus will automatically reload its configuration file at a
specified interval. The interval is defined by the
`--config.auto-reload-interval` flag, which defaults to `30s`.
Configuration reloads are triggered by detecting changes in the checksum of the
main configuration file or any referenced files, such as rule and scrape
configurations. To ensure consistency and avoid issues during reloads, it's
recommended to update these files atomically.

View file

@ -25,8 +25,10 @@ Other non-`2xx` codes may be returned for errors occurring before the API
endpoint is reached.
An array of warnings may be returned if there are errors that do
not inhibit the request execution. All of the data that was successfully
collected will be returned in the data field.
not inhibit the request execution. An additional array of info-level
annotations may be returned for potential query issues that may or may
not be false positives. All of the data that was successfully collected
will be returned in the data field.
The JSON response envelope format is as follows:
@ -40,9 +42,11 @@ The JSON response envelope format is as follows:
"errorType": "<string>",
"error": "<string>",
// Only if there were warnings while executing the request.
// Only set if there were warnings while executing the request.
// There will still be data in the data field.
"warnings": ["<string>"]
"warnings": ["<string>"],
// Only set if there were info-level annnotations while executing the request.
"infos": ["<string>"]
}
```
@ -235,6 +239,75 @@ $ curl 'http://localhost:9090/api/v1/format_query?query=foo/bar'
}
```
## Parsing a PromQL expressions into a abstract syntax tree (AST)
This endpoint is **experimental** and might change in the future. It is currently only meant to be used by Prometheus' own web UI, and the endpoint name and exact format returned may change from one Prometheus version to another. It may also be removed again in case it is no longer needed by the UI.
The following endpoint parses a PromQL expression and returns it as a JSON-formatted AST (abstract syntax tree) representation:
```
GET /api/v1/parse_query
POST /api/v1/parse_query
```
URL query parameters:
- `query=<string>`: Prometheus expression query string.
You can URL-encode these parameters directly in the request body by using the `POST` method and
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
query that may breach server-side URL character limits.
The `data` section of the query result is a string containing the AST of the parsed query expression.
The following example parses the expression `foo/bar`:
```json
$ curl 'http://localhost:9090/api/v1/parse_query?query=foo/bar'
{
"data" : {
"bool" : false,
"lhs" : {
"matchers" : [
{
"name" : "__name__",
"type" : "=",
"value" : "foo"
}
],
"name" : "foo",
"offset" : 0,
"startOrEnd" : null,
"timestamp" : null,
"type" : "vectorSelector"
},
"matching" : {
"card" : "one-to-one",
"include" : [],
"labels" : [],
"on" : false
},
"op" : "/",
"rhs" : {
"matchers" : [
{
"name" : "__name__",
"type" : "=",
"value" : "bar"
}
],
"name" : "bar",
"offset" : 0,
"startOrEnd" : null,
"timestamp" : null,
"type" : "vectorSelector"
},
"type" : "binaryExpr"
},
"status" : "success"
}
```
## Querying metadata
Prometheus offers a set of API endpoints to query metadata about series and their labels.
@ -256,7 +329,7 @@ URL query parameters:
series to return. At least one `match[]` argument must be provided.
- `start=<rfc3339 | unix_timestamp>`: Start timestamp.
- `end=<rfc3339 | unix_timestamp>`: End timestamp.
- `limit=<number>`: Maximum number of returned series. Optional.
- `limit=<number>`: Maximum number of returned series. Optional. 0 means disabled.
You can URL-encode these parameters directly in the request body by using the `POST` method and
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
@ -307,7 +380,7 @@ URL query parameters:
- `end=<rfc3339 | unix_timestamp>`: End timestamp. Optional.
- `match[]=<series_selector>`: Repeated series selector argument that selects the
series from which to read the label names. Optional.
- `limit=<number>`: Maximum number of returned series. Optional.
- `limit=<number>`: Maximum number of returned series. Optional. 0 means disabled.
The `data` section of the JSON response is a list of string label names.
@ -358,7 +431,7 @@ URL query parameters:
- `end=<rfc3339 | unix_timestamp>`: End timestamp. Optional.
- `match[]=<series_selector>`: Repeated series selector argument that selects the
series from which to read the label values. Optional.
- `limit=<number>`: Maximum number of returned series. Optional.
- `limit=<number>`: Maximum number of returned series. Optional. 0 means disabled.
The `data` section of the JSON response is a list of string label values.
@ -452,7 +525,7 @@ raw numbers.
The keys `"histogram"` and `"histograms"` only show up if the experimental
native histograms are present in the response. Their placeholder `<histogram>`
is explained in detail in its own section below.
is explained in detail in its own section below.
### Range vectors
@ -470,7 +543,7 @@ Range vectors are returned as result type `matrix`. The corresponding
]
```
Each series could have the `"values"` key, or the `"histograms"` key, or both.
Each series could have the `"values"` key, or the `"histograms"` key, or both.
For a given timestamp, there will only be one sample of either float or histogram type.
Series are returned sorted by `metric`. Functions such as [`sort`](functions.md#sort)
@ -690,6 +763,7 @@ URL query parameters:
- `rule_group[]=<string>`: only return rules with the given rule group name. If the parameter is repeated, rules with any of the provided rule group names are returned. When the parameter is absent or empty, no filtering is done.
- `file[]=<string>`: only return rules with the given filepath. If the parameter is repeated, rules with any of the provided filepaths are returned. When the parameter is absent or empty, no filtering is done.
- `exclude_alerts=<bool>`: only return rules, do not return active alerts.
- `match[]=<label_selector>`: only return rules that have configured labels that satisfy the label selectors. If the parameter is repeated, rules that match any of the sets of label selectors are returned. Note that matching is on the labels in the definition of each rule, not on the values after template expansion (for alerting rules). Optional.
```json
$ curl http://localhost:9090/api/v1/rules
@ -1309,7 +1383,7 @@ endpoint is `/api/v1/write`. Find more details [here](../storage.md#overview).
## OTLP Receiver
Prometheus can be configured as a receiver for the OTLP Metrics protocol. This
Prometheus can be configured as a receiver for the OTLP Metrics protocol. This
is not considered an efficient way of ingesting samples. Use it
with caution for specific low-volume use cases. It is not suitable for
replacing the ingestion via scraping.

View file

@ -8,9 +8,15 @@ sort_rank: 1
Prometheus provides a functional query language called PromQL (Prometheus Query
Language) that lets the user select and aggregate time series data in real
time. The result of an expression can either be shown as a graph, viewed as
tabular data in Prometheus's expression browser, or consumed by external
systems via the [HTTP API](api.md).
time.
When you send a query request to Prometheus, it can be an _instant query_, evaluated at one point in time,
or a _range query_ at equally-spaced steps between a start and an end time. PromQL works exactly the same
in each cases; the range query is just like an instant query run multiple times at different timestamps.
In the Prometheus UI, the "Table" tab is for instant queries and the "Graph" tab is for range queries.
Other programs can fetch the result of a PromQL expression via the [HTTP API](api.md).
## Examples
@ -35,7 +41,7 @@ vector is the only type which can be graphed.
_Notes about the experimental native histograms:_
* Ingesting native histograms has to be enabled via a [feature
flag](../../feature_flags.md#native-histograms).
flag](../feature_flags.md#native-histograms).
* Once native histograms have been ingested into the TSDB (and even after
disabling the feature flag again), both instant vectors and range vectors may
now contain samples that aren't simple floating point numbers (float samples)
@ -81,12 +87,20 @@ Examples:
0x8f
-Inf
NaN
As of version 2.54, float literals can also be represented using the syntax of time durations, where the time duration is converted into a float value corresponding to the number of seconds the time duration represents. This is an experimental feature and might still change.
Examples:
1s # Equivalent to 1.0
2m # Equivalent to 120.0
1ms # Equivalent to 0.001
## Time series selectors
Time series selectors are responsible for selecting the times series and raw or inferred sample timestamps and values.
Time series *selectors* are not to be confused with higher level concept of instant and range *queries* that can execute the time series *selectors*. A higher level instant query would evaluate the given selector at one point in time, however the range query would evaluate the selector at multiple different times in between a minimum and maximum timestamp at regular steps.
These are the basic building-blocks that instruct PromQL what data to fetch.
### Instant vector selectors
@ -189,12 +203,12 @@ Range vector literals work like instant vector literals, except that they
select a range of samples back from the current instant. Syntactically, a [time
duration](#time-durations) is appended in square brackets (`[]`) at the end of
a vector selector to specify how far back in time values should be fetched for
each resulting range vector element. The range is a closed interval,
i.e. samples with timestamps coinciding with either boundary of the range are
still included in the selection.
each resulting range vector element. The range is a left-open and right-closed interval,
i.e. samples with timestamps coinciding with the left boundary of the range are excluded from the selection,
while samples coinciding with the right boundary of the range are included in the selection.
In this example, we select all the values we have recorded within the last 5
minutes for all time series that have the metric name `http_requests_total` and
In this example, we select all the values recorded less than 5m ago for all time series
that have the metric name `http_requests_total` and
a `job` label set to `prometheus`:
http_requests_total{job="prometheus"}[5m]
@ -224,6 +238,15 @@ Here are some examples of valid time durations:
5m
10s
As of version 2.54, time durations can also be represented using the syntax of float literals, implying the number of seconds of the time duration. This is an experimental feature and might still change.
Examples:
1.0 # Equivalent to 1s
0.001 # Equivalent to 1ms
120 # Equivalent to 2m
### Offset modifier
The `offset` modifier allows changing the time offset for individual
@ -335,7 +358,7 @@ independently of the actual present time series data. This is mainly to support
cases like aggregation (`sum`, `avg`, and so on), where multiple aggregated
time series do not precisely align in time. Because of their independence,
Prometheus needs to assign a value at those timestamps for each relevant time
series. It does so by taking the newest sample before this timestamp within the lookback period.
series. It does so by taking the newest sample that is less than the lookback period ago.
The lookback period is 5 minutes by default.
If a target scrape or rule evaluation no longer returns a sample for a time

View file

@ -95,3 +95,13 @@ Assuming this metric contains one time series per running instance, you could
count the number of running instances per application like this:
count by (app) (instance_cpu_time_ns)
If we are exploring some metrics for their labels, to e.g. be able to aggregate
over some of them, we could use the following:
limitk(10, app_foo_metric_bar)
Alternatively, if we wanted the returned timeseries to be more evenly sampled,
we could use the following to get approximately 10% of them:
limit_ratio(0.1, app_foo_metric_bar)

View file

@ -98,8 +98,9 @@ vector.
clamps the sample values of all elements in `v` to have a lower limit of `min` and an upper limit of `max`.
Special cases:
- Return an empty vector if `min > max`
- Return `NaN` if `min` or `max` is `NaN`
* Return an empty vector if `min > max`
* Return `NaN` if `min` or `max` is `NaN`
## `clamp_max()`
@ -325,45 +326,70 @@ With native histograms, aggregating everything works as usual without any `by` c
histogram_quantile(0.9, sum(rate(http_request_duration_seconds[10m])))
The `histogram_quantile()` function interpolates quantile values by
assuming a linear distribution within a bucket.
In the (common) case that a quantile value does not coincide with a bucket
boundary, the `histogram_quantile()` function interpolates the quantile value
within the bucket the quantile value falls into. For classic histograms, for
native histograms with custom bucket boundaries, and for the zero bucket of
other native histograms, it assumes a uniform distribution of observations
within the bucket (also called _linear interpolation_). For the
non-zero-buckets of native histograms with a standard exponential bucketing
schema, the interpolation is done under the assumption that the samples within
the bucket are distributed in a way that they would uniformly populate the
buckets in a hypothetical histogram with higher resolution. (This is also
called _exponential interpolation_.)
If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is
returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned.
The following is only relevant for classic histograms: If `b` contains
fewer than two buckets, `NaN` is returned. The highest bucket must have an
upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located
in the highest bucket, the upper bound of the second highest bucket is
returned. A lower limit of the lowest bucket is assumed to be 0 if the upper
bound of that bucket is greater than
0. In that case, the usual linear interpolation is applied within that
bucket. Otherwise, the upper bound of the lowest bucket is returned for
quantiles located in the lowest bucket.
Special cases for classic histograms:
You can use `histogram_quantile(0, v instant-vector)` to get the estimated minimum value stored in
a histogram.
* If `b` contains fewer than two buckets, `NaN` is returned.
* The highest bucket must have an upper bound of `+Inf`. (Otherwise, `NaN` is
returned.)
* If a quantile is located in the highest bucket, the upper bound of the second
highest bucket is returned.
* The lower limit of the lowest bucket is assumed to be 0 if the upper bound of
that bucket is greater than 0. In that case, the usual linear interpolation
is applied within that bucket. Otherwise, the upper bound of the lowest
bucket is returned for quantiles located in the lowest bucket.
You can use `histogram_quantile(1, v instant-vector)` to get the estimated maximum value stored in
a histogram.
Special cases for native histograms (relevant for the exact interpolation
happening within the zero bucket):
Buckets of classic histograms are cumulative. Therefore, the following should always be the case:
* A zero bucket with finite width is assumed to contain no negative
observations if the histogram has observations in positive buckets, but none
in negative buckets.
* A zero bucket with finite width is assumed to contain no positive
observations if the histogram has observations in negative buckets, but none
in positive buckets.
- The counts in the buckets are monotonically increasing (strictly non-decreasing).
- A lack of observations between the upper limits of two consecutive buckets results in equal counts
in those two buckets.
You can use `histogram_quantile(0, v instant-vector)` to get the estimated
minimum value stored in a histogram.
However, floating point precision issues (e.g. small discrepancies introduced by computing of buckets
with `sum(rate(...))`) or invalid data might violate these assumptions. In that case,
`histogram_quantile` would be unable to return meaningful results. To mitigate the issue,
`histogram_quantile` assumes that tiny relative differences between consecutive buckets are happening
because of floating point precision errors and ignores them. (The threshold to ignore a difference
between two buckets is a trillionth (1e-12) of the sum of both buckets.) Furthermore, if there are
non-monotonic bucket counts even after this adjustment, they are increased to the value of the
previous buckets to enforce monotonicity. The latter is evidence for an actual issue with the input
data and is therefore flagged with an informational annotation reading `input to histogram_quantile
needed to be fixed for monotonicity`. If you encounter this annotation, you should find and remove
the source of the invalid data.
You can use `histogram_quantile(1, v instant-vector)` to get the estimated
maximum value stored in a histogram.
Buckets of classic histograms are cumulative. Therefore, the following should
always be the case:
* The counts in the buckets are monotonically increasing (strictly
non-decreasing).
* A lack of observations between the upper limits of two consecutive buckets
results in equal counts in those two buckets.
However, floating point precision issues (e.g. small discrepancies introduced
by computing of buckets with `sum(rate(...))`) or invalid data might violate
these assumptions. In that case, `histogram_quantile` would be unable to return
meaningful results. To mitigate the issue, `histogram_quantile` assumes that
tiny relative differences between consecutive buckets are happening because of
floating point precision errors and ignores them. (The threshold to ignore a
difference between two buckets is a trillionth (1e-12) of the sum of both
buckets.) Furthermore, if there are non-monotonic bucket counts even after this
adjustment, they are increased to the value of the previous buckets to enforce
monotonicity. The latter is evidence for an actual issue with the input data
and is therefore flagged with an informational annotation reading `input to
histogram_quantile needed to be fixed for monotonicity`. If you encounter this
annotation, you should find and remove the source of the invalid data.
## `histogram_stddev()` and `histogram_stdvar()`
@ -379,15 +405,22 @@ do not show up in the returned vector.
Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard
variance of observations in a native histogram.
## `holt_winters()`
## `double_exponential_smoothing()`
`holt_winters(v range-vector, sf scalar, tf scalar)` produces a smoothed value
**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.**
`double_exponential_smoothing(v range-vector, sf scalar, tf scalar)` produces a smoothed value
for time series based on the range in `v`. The lower the smoothing factor `sf`,
the more importance is given to old data. The higher the trend factor `tf`, the
more trends in the data is considered. Both `sf` and `tf` must be between 0 and
1.
For additional details, refer to [NIST Engineering Statistics Handbook](https://www.itl.nist.gov/div898/handbook/pmc/section4/pmc433.htm).
In Prometheus V2 this function was called `holt_winters`. This caused confusion
since the Holt-Winters method usually refers to triple exponential smoothing.
Double exponential smoothing as implemented here is also referred to as "Holt
Linear".
`holt_winters` should only be used with gauges.
`double_exponential_smoothing` should only be used with gauges.
## `hour()`
@ -616,9 +649,9 @@ Like `sort`, `sort_desc` only affects the results of instant queries, as range q
## `sort_by_label()`
**This function has to be enabled via the [feature flag](../feature_flags/) `--enable-feature=promql-experimental-functions`.**
**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.**
`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by their label values and sample value in case of label values being equal, in ascending order.
`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by the values of the given labels in ascending order. In case these label values are equal, elements are sorted by their full label sets.
Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.
@ -626,7 +659,7 @@ This function uses [natural sort order](https://en.wikipedia.org/wiki/Natural_so
## `sort_by_label_desc()`
**This function has to be enabled via the [feature flag](../feature_flags/) `--enable-feature=promql-experimental-functions`.**
**This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.**
Same as `sort_by_label`, but sorts in descending order.
@ -675,7 +708,7 @@ over time and return an instant vector with per-series aggregation results:
* `last_over_time(range-vector)`: the most recent point value in the specified interval.
* `present_over_time(range-vector)`: the value 1 for any series in the specified interval.
If the [feature flag](../feature_flags/)
If the [feature flag](../feature_flags.md#experimental-promql-functions)
`--enable-feature=promql-experimental-functions` is set, the following
additional functions are available:
@ -692,21 +725,21 @@ ignore histogram samples.
The trigonometric functions work in radians:
- `acos(v instant-vector)`: calculates the arccosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Acos)).
- `acosh(v instant-vector)`: calculates the inverse hyperbolic cosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Acosh)).
- `asin(v instant-vector)`: calculates the arcsine of all elements in `v` ([special cases](https://pkg.go.dev/math#Asin)).
- `asinh(v instant-vector)`: calculates the inverse hyperbolic sine of all elements in `v` ([special cases](https://pkg.go.dev/math#Asinh)).
- `atan(v instant-vector)`: calculates the arctangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Atan)).
- `atanh(v instant-vector)`: calculates the inverse hyperbolic tangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Atanh)).
- `cos(v instant-vector)`: calculates the cosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Cos)).
- `cosh(v instant-vector)`: calculates the hyperbolic cosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Cosh)).
- `sin(v instant-vector)`: calculates the sine of all elements in `v` ([special cases](https://pkg.go.dev/math#Sin)).
- `sinh(v instant-vector)`: calculates the hyperbolic sine of all elements in `v` ([special cases](https://pkg.go.dev/math#Sinh)).
- `tan(v instant-vector)`: calculates the tangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Tan)).
- `tanh(v instant-vector)`: calculates the hyperbolic tangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Tanh)).
* `acos(v instant-vector)`: calculates the arccosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Acos)).
* `acosh(v instant-vector)`: calculates the inverse hyperbolic cosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Acosh)).
* `asin(v instant-vector)`: calculates the arcsine of all elements in `v` ([special cases](https://pkg.go.dev/math#Asin)).
* `asinh(v instant-vector)`: calculates the inverse hyperbolic sine of all elements in `v` ([special cases](https://pkg.go.dev/math#Asinh)).
* `atan(v instant-vector)`: calculates the arctangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Atan)).
* `atanh(v instant-vector)`: calculates the inverse hyperbolic tangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Atanh)).
* `cos(v instant-vector)`: calculates the cosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Cos)).
* `cosh(v instant-vector)`: calculates the hyperbolic cosine of all elements in `v` ([special cases](https://pkg.go.dev/math#Cosh)).
* `sin(v instant-vector)`: calculates the sine of all elements in `v` ([special cases](https://pkg.go.dev/math#Sin)).
* `sinh(v instant-vector)`: calculates the hyperbolic sine of all elements in `v` ([special cases](https://pkg.go.dev/math#Sinh)).
* `tan(v instant-vector)`: calculates the tangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Tan)).
* `tanh(v instant-vector)`: calculates the hyperbolic tangent of all elements in `v` ([special cases](https://pkg.go.dev/math#Tanh)).
The following are useful for converting between degrees and radians:
- `deg(v instant-vector)`: converts radians to degrees for all elements in `v`.
- `pi()`: returns pi.
- `rad(v instant-vector)`: converts degrees to radians for all elements in `v`.
* `deg(v instant-vector)`: converts radians to degrees for all elements in `v`.
* `pi()`: returns pi.
* `rad(v instant-vector)`: converts degrees to radians for all elements in `v`.

View file

@ -230,6 +230,8 @@ vector of fewer elements with aggregated values:
* `bottomk` (smallest k elements by sample value)
* `topk` (largest k elements by sample value)
* `quantile` (calculate φ-quantile (0 ≤ φ ≤ 1) over dimensions)
* `limitk` (sample n elements)
* `limit_ratio` (sample elements with approximately 𝑟 ratio if `𝑟 > 0`, and the complement of such samples if `𝑟 = -(1.0 - 𝑟)`)
These operators can either be used to aggregate over **all** label dimensions
or preserve distinct dimensions by including a `without` or `by` clause. These
@ -249,8 +251,8 @@ all other labels are preserved in the output. `by` does the opposite and drops
labels that are not listed in the `by` clause, even if their label values are
identical between all elements of the vector.
`parameter` is only required for `count_values`, `quantile`, `topk` and
`bottomk`.
`parameter` is only required for `count_values`, `quantile`, `topk`,
`bottomk`, `limitk` and `limit_ratio`.
`count_values` outputs one time series per unique sample value. Each series has
an additional label. The name of that label is given by the aggregation
@ -261,11 +263,16 @@ time series is the number of times that sample value was present.
the input samples, including the original labels, are returned in the result
vector. `by` and `without` are only used to bucket the input vector.
`limitk` and `limit_ratio` also return a subset of the input samples,
including the original labels in the result vector, these are experimental
operators that must be enabled with `--enable-feature=promql-experimental-functions`.
`quantile` calculates the φ-quantile, the value that ranks at number φ*N among
the N metric values of the dimensions aggregated over. φ is provided as the
aggregation parameter. For example, `quantile(0.5, ...)` calculates the median,
`quantile(0.95, ...)` the 95th percentile. For φ = `NaN`, `NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned.
Example:
If the metric `http_requests_total` had time series that fan out by
@ -291,6 +298,33 @@ To get the 5 largest HTTP requests counts across all instances we could write:
topk(5, http_requests_total)
To sample 10 timeseries, for example to inspect labels and their values, we
could write:
limitk(10, http_requests_total)
To deterministically sample approximately 10% of timeseries we could write:
limit_ratio(0.1, http_requests_total)
Given that `limit_ratio()` implements a deterministic sampling algorithm (based
on labels' hash), you can get the _complement_ of the above samples, i.e.
approximately 90%, but precisely those not returned by `limit_ratio(0.1, ...)`
with:
limit_ratio(-0.9, http_requests_total)
You can also use this feature to e.g. verify that `avg()` is a representative
aggregation for your samples' values, by checking that the difference between
averaging two samples' subsets is "small" when compared to the standard
deviation.
abs(
avg(limit_ratio(0.5, http_requests_total))
-
avg(limit_ratio(-0.5, http_requests_total))
) <= bool stddev(http_requests_total)
## Binary operator precedence
The following list shows the precedence of binary operators in Prometheus, from

View file

@ -87,10 +87,9 @@ or 31 days, whichever is smaller.
Prometheus has several flags that configure local storage. The most important are:
- `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`.
- `--storage.tsdb.retention.time`: How long to retain samples in storage. When this flag is
set, it overrides `storage.tsdb.retention`. If neither this flag nor `storage.tsdb.retention`
nor `storage.tsdb.retention.size` is set, the retention time defaults to `15d`.
Supported units: y, w, d, h, m, s, ms.
- `--storage.tsdb.retention.time`: How long to retain samples in storage. If neither
this flag nor `storage.tsdb.retention.size` is set, the retention time defaults to
`15d`. Supported units: y, w, d, h, m, s, ms.
- `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain.
The oldest data will be removed first. Defaults to `0` or disabled. Units supported:
B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only
@ -98,7 +97,6 @@ Prometheus has several flags that configure local storage. The most important ar
chunks are counted in the total size. So the minimum requirement for the disk is the
peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head`
(m-mapped Head chunks) directory combined (peaks every 2 hours).
- `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`.
- `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL).
Depending on your data, you can expect the WAL size to be halved with little extra
cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0.
@ -137,6 +135,18 @@ will be used.
Expired block cleanup happens in the background. It may take up to two hours
to remove expired blocks. Blocks must be fully expired before they are removed.
## Right-Sizing Retention Size
If you are utilizing `storage.tsdb.retention.size` to set a size limit, you
will want to consider the right size for this value relative to the storage you
have allocated for Prometheus. It is wise to reduce the retention size to provide
a buffer, ensuring that older entries will be removed before the allocated storage
for Prometheus becomes full.
At present, we recommend setting the retention size to, at most, 80-85% of your
allocated Prometheus disk space. This increases the likelihood that older entries
will be removed prior to hitting any disk limitations.
## Remote storage integrations
Prometheus's local storage is limited to a single node's scalability and durability.
@ -145,31 +155,27 @@ a set of interfaces that allow integrating with remote storage systems.
### Overview
Prometheus integrates with remote storage systems in three ways:
Prometheus integrates with remote storage systems in four ways:
- Prometheus can write samples that it ingests to a remote URL in a standardized format.
- Prometheus can receive samples from other Prometheus servers in a standardized format.
- Prometheus can read (back) sample data from a remote URL in a standardized format.
- Prometheus can write samples that it ingests to a remote URL in a [Remote Write format](https://prometheus.io/docs/specs/remote_write_spec_2_0/).
- Prometheus can receive samples from other clients in a [Remote Write format](https://prometheus.io/docs/specs/remote_write_spec_2_0/).
- Prometheus can read (back) sample data from a remote URL in a [Remote Read format](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto#L31).
- Prometheus can return sample data requested by clients in a [Remote Read format](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto#L31).
![Remote read and write architecture](images/remote_integrations.png)
The read and write protocols both use a snappy-compressed protocol buffer encoding over
HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC
over HTTP/2 in the future, when all hops between Prometheus and the remote storage can
safely be assumed to support HTTP/2.
The remote read and write protocols both use a snappy-compressed protocol buffer encoding over
HTTP. The read protocol is not yet considered as stable API.
For details on configuring remote storage integrations in Prometheus, see the
The write protocol has a [stable specification for 1.0 version](https://prometheus.io/docs/specs/remote_write_spec/)
and [experimental specification for 2.0 version](https://prometheus.io/docs/specs/remote_write_spec_2_0/),
both supported by Prometheus server.
For details on configuring remote storage integrations in Prometheus as a client, see the
[remote write](configuration/configuration.md#remote_write) and
[remote read](configuration/configuration.md#remote_read) sections of the Prometheus
configuration documentation.
The built-in remote write receiver can be enabled by setting the
`--web.enable-remote-write-receiver` command line flag. When enabled,
the remote write receiver endpoint is `/api/v1/write`.
For details on the request and response messages, see the
[remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto).
Note that on the read path, Prometheus only fetches raw series data for a set of
label selectors and time ranges from the remote end. All PromQL evaluation on the
raw data still happens in Prometheus itself. This means that remote read queries
@ -177,6 +183,11 @@ have some scalability limit, since all necessary data needs to be loaded into th
querying Prometheus server first and then processed there. However, supporting
fully distributed evaluation of PromQL was deemed infeasible for the time being.
Prometheus also serves both protocols. The built-in remote write receiver can be enabled
by setting the `--web.enable-remote-write-receiver` command line flag. When enabled,
the remote write receiver endpoint is `/api/v1/write`. The remote read endpoint is
available on [`/api/v1/read`](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/).
### Existing integrations
To learn more about existing integrations with remote storage systems, see the

View file

@ -1,4 +1,4 @@
# An example scrape configuration for running Prometheus with Ovhcloud.
# An example scrape configuration for running Prometheus with OVHcloud.
scrape_configs:
- job_name: 'ovhcloud'
ovhcloud_sd_configs:

View file

@ -7,6 +7,7 @@ To use it:
```
go build
./example_write_adapter
```
@ -15,10 +16,19 @@ go build
```yaml
remote_write:
- url: "http://localhost:1234/receive"
protobuf_message: "io.prometheus.write.v2.Request"
```
Then start Prometheus:
or for the eventually deprecated Remote Write 1.0 message:
```yaml
remote_write:
- url: "http://localhost:1234/receive"
protobuf_message: "prometheus.WriteRequest"
```
Then start Prometheus (in separate terminal):
```
./prometheus
./prometheus --enable-feature=metadata-wal-records
```

View file

@ -18,44 +18,103 @@ import (
"log"
"net/http"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/prompb"
writev2 "github.com/prometheus/prometheus/prompb/io/prometheus/write/v2"
"github.com/prometheus/prometheus/storage/remote"
)
func main() {
http.HandleFunc("/receive", func(w http.ResponseWriter, r *http.Request) {
req, err := remote.DecodeWriteRequest(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
enc := r.Header.Get("Content-Encoding")
if enc == "" {
http.Error(w, "missing Content-Encoding header", http.StatusUnsupportedMediaType)
return
}
if enc != "snappy" {
http.Error(w, "unknown encoding, only snappy supported", http.StatusUnsupportedMediaType)
return
}
for _, ts := range req.Timeseries {
m := make(model.Metric, len(ts.Labels))
for _, l := range ts.Labels {
m[model.LabelName(l.Name)] = model.LabelValue(l.Value)
}
fmt.Println(m)
contentType := r.Header.Get("Content-Type")
if contentType == "" {
http.Error(w, "missing Content-Type header", http.StatusUnsupportedMediaType)
}
for _, s := range ts.Samples {
fmt.Printf("\tSample: %f %d\n", s.Value, s.Timestamp)
}
defer func() { _ = r.Body.Close() }()
for _, e := range ts.Exemplars {
m := make(model.Metric, len(e.Labels))
for _, l := range e.Labels {
m[model.LabelName(l.Name)] = model.LabelValue(l.Value)
}
fmt.Printf("\tExemplar: %+v %f %d\n", m, e.Value, e.Timestamp)
// Very simplistic content parsing, see
// storage/remote/write_handler.go#WriteHandler.ServeHTTP for production example.
switch contentType {
case "application/x-protobuf", "application/x-protobuf;proto=prometheus.WriteRequest":
req, err := remote.DecodeWriteRequest(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
for _, hp := range ts.Histograms {
h := remote.HistogramProtoToHistogram(hp)
fmt.Printf("\tHistogram: %s\n", h.String())
printV1(req)
case "application/x-protobuf;proto=io.prometheus.write.v2.Request":
req, err := remote.DecodeWriteV2Request(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
printV2(req)
default:
msg := fmt.Sprintf("Unknown remote write content type: %s", contentType)
fmt.Println(msg)
http.Error(w, msg, http.StatusBadRequest)
}
})
log.Fatal(http.ListenAndServe(":1234", nil))
}
func printV1(req *prompb.WriteRequest) {
b := labels.NewScratchBuilder(0)
for _, ts := range req.Timeseries {
fmt.Println(ts.ToLabels(&b, nil))
for _, s := range ts.Samples {
fmt.Printf("\tSample: %f %d\n", s.Value, s.Timestamp)
}
for _, ep := range ts.Exemplars {
e := ep.ToExemplar(&b, nil)
fmt.Printf("\tExemplar: %+v %f %d\n", e.Labels, e.Value, ep.Timestamp)
}
for _, hp := range ts.Histograms {
if hp.IsFloatHistogram() {
h := hp.ToFloatHistogram()
fmt.Printf("\tHistogram: %s\n", h.String())
continue
}
h := hp.ToIntHistogram()
fmt.Printf("\tHistogram: %s\n", h.String())
}
}
}
func printV2(req *writev2.Request) {
b := labels.NewScratchBuilder(0)
for _, ts := range req.Timeseries {
l := ts.ToLabels(&b, req.Symbols)
m := ts.ToMetadata(req.Symbols)
fmt.Println(l, m)
for _, s := range ts.Samples {
fmt.Printf("\tSample: %f %d\n", s.Value, s.Timestamp)
}
for _, ep := range ts.Exemplars {
e := ep.ToExemplar(&b, req.Symbols)
fmt.Printf("\tExemplar: %+v %f %d\n", e.Labels, e.Value, ep.Timestamp)
}
for _, hp := range ts.Histograms {
if hp.IsFloatHistogram() {
h := hp.ToFloatHistogram()
fmt.Printf("\tHistogram: %s\n", h.String())
continue
}
h := hp.ToIntHistogram()
fmt.Printf("\tHistogram: %s\n", h.String())
}
}
}

View file

@ -1,26 +1,26 @@
module github.com/prometheus/prometheus/documentation/examples/remote_storage
go 1.21
go 1.22.0
require (
github.com/alecthomas/kingpin/v2 v2.4.0
github.com/go-kit/log v0.2.1
github.com/gogo/protobuf v1.3.2
github.com/golang/snappy v0.0.4
github.com/influxdata/influxdb v1.11.5
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/common v0.55.0
github.com/prometheus/prometheus v0.52.1
github.com/influxdata/influxdb v1.11.6
github.com/prometheus/client_golang v1.20.2
github.com/prometheus/common v0.57.0
github.com/prometheus/prometheus v0.53.1
github.com/stretchr/testify v1.9.0
)
require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.11.1 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.5.2 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.5.2 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.6.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.8.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 // indirect
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
github.com/aws/aws-sdk-go v1.51.25 // indirect
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
@ -31,13 +31,11 @@ require (
github.com/go-logr/stdr v1.2.2 // indirect
github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.8 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
@ -49,23 +47,22 @@ require (
github.com/prometheus/common/sigv4 v0.1.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/xhit/go-str2duration/v2 v2.1.0 // indirect
go.opentelemetry.io/collector/featuregate v1.5.0 // indirect
go.opentelemetry.io/collector/pdata v1.5.0 // indirect
go.opentelemetry.io/collector/semconv v0.98.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.50.0 // indirect
go.opentelemetry.io/otel v1.25.0 // indirect
go.opentelemetry.io/otel/metric v1.25.0 // indirect
go.opentelemetry.io/otel/trace v1.25.0 // indirect
go.opentelemetry.io/collector/pdata v1.8.0 // indirect
go.opentelemetry.io/collector/semconv v0.101.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.52.0 // indirect
go.opentelemetry.io/otel v1.27.0 // indirect
go.opentelemetry.io/otel/metric v1.27.0 // indirect
go.opentelemetry.io/otel/trace v1.27.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/crypto v0.24.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240415180920-8c6c420018be // indirect
google.golang.org/grpc v1.63.2 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240528184218-531527333157 // indirect
google.golang.org/grpc v1.65.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
@ -82,4 +79,10 @@ exclude (
cloud.google.com/go v0.34.0
cloud.google.com/go v0.65.0
cloud.google.com/go v0.82.0
// Fixing ambiguous import: found package google.golang.org/genproto/googleapis/api/annotations in multiple modules.
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1
)
// TODO(bwplotka): Move to main branch commit or perhaps released version.
replace github.com/prometheus/prometheus => github.com/prometheus/prometheus v0.53.1-0.20240704074759-c137febfcf8c

Some files were not shown because too many files have changed in this diff Show more