Merge remote-tracking branch 'origin/main' into njpm/metadata-remote-write-wiring-2-update

Signed-off-by: Nicolás Pazos <npazosmendez@gmail.com>
This commit is contained in:
Nicolás Pazos 2023-12-21 15:49:56 -03:00
commit 89a9484f6c
447 changed files with 36524 additions and 14640 deletions

5
.github/CODEOWNERS vendored
View file

@ -1,7 +1,10 @@
/web/ui @juliusv
/web/ui/module @juliusv @nexucis
/storage/remote @csmarchbanks @cstyan @bwplotka @tomwilkie
/storage/remote/otlptranslator @gouthamve @jesusvazquez
/discovery/kubernetes @brancz
/tsdb @codesome
/tsdb @jesusvazquez
/promql @roidelapluie
/cmd/promtool @dgl
/documentation/prometheus-mixin @metalmatze

View file

@ -1,14 +1,16 @@
<!--
Please give your PR a title in the form "area: short description". For example "tsdb: reduce disk usage by 95%"
If your PR is to fix an issue, put "Fixes #issue-number" in the description.
Don't forget!
- Please sign CNCF's Developer Certificate of Origin and sign-off your commits by adding the -s / --sign-off flag to `git commit`. See https://github.com/apps/dco for more information.
- Please sign CNCF's Developer Certificate of Origin and sign-off your commits by adding the -s / --signoff flag to `git commit`. See https://github.com/apps/dco for more information.
- If the PR adds or changes a behaviour or fixes a bug of an exported API it would need a unit/e2e test.
- Where possible use only exported APIs for tests to simplify the review and make it as close as possible to an actual library usage.
- No tests are needed for internal implementation changes.
- Performance improvements would need a benchmark test to prove it.
- All exposed objects should have a comment.

View file

@ -4,6 +4,13 @@ updates:
directory: "/"
schedule:
interval: "monthly"
groups:
k8s.io:
patterns:
- "k8s.io/*"
go.opentelemetry.io:
patterns:
- "go.opentelemetry.io/*"
- package-ecosystem: "gomod"
directory: "/documentation/examples/remote_storage"
schedule:
@ -16,6 +23,10 @@ updates:
directory: "/"
schedule:
interval: "monthly"
- package-ecosystem: "github-actions"
directory: "/scripts"
schedule:
interval: "monthly"
- package-ecosystem: "docker"
directory: "/"
schedule:

View file

@ -4,19 +4,22 @@ on:
paths:
- ".github/workflows/buf-lint.yml"
- "**.proto"
permissions:
contents: read
jobs:
buf:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: bufbuild/buf-setup-action@v1.16.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: bufbuild/buf-setup-action@382440cdb8ec7bc25a68d7b4711163d95f7cc3aa # v1.28.1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@v1
- uses: bufbuild/buf-lint-action@bd48f53224baaaf0fc55de9a913e7680ca6dbea4 # v1.0.3
with:
input: 'prompb'
- uses: bufbuild/buf-breaking-action@v1
- uses: bufbuild/buf-breaking-action@f47418c81c00bfd65394628385593542f64db477 # v1.1.2
with:
input: 'prompb'
against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD,subdir=prompb'

View file

@ -3,23 +3,27 @@ on:
push:
branches:
- main
permissions:
contents: read
jobs:
buf:
name: lint and publish
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: actions/checkout@v3
- uses: bufbuild/buf-setup-action@v1.16.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: bufbuild/buf-setup-action@382440cdb8ec7bc25a68d7b4711163d95f7cc3aa # v1.28.1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@v1
- uses: bufbuild/buf-lint-action@bd48f53224baaaf0fc55de9a913e7680ca6dbea4 # v1.0.3
with:
input: 'prompb'
- uses: bufbuild/buf-breaking-action@v1
- uses: bufbuild/buf-breaking-action@f47418c81c00bfd65394628385593542f64db477 # v1.1.2
with:
input: 'prompb'
against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD~1,subdir=prompb'
- uses: bufbuild/buf-push-action@v1
- uses: bufbuild/buf-push-action@342fc4cdcf29115a01cf12a2c6dd6aac68dc51e1 # v1.1.1
with:
input: 'prompb'
buf_token: ${{ secrets.BUF_TOKEN }}

View file

@ -11,10 +11,10 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.20-base
image: quay.io/prometheus/golang-builder:1.21-base
steps:
- uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/setup_environment
- run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1
- run: go test ./tsdb/ -test.tsdb-isolation=false
@ -32,11 +32,11 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.20-base
image: quay.io/prometheus/golang-builder:1.21-base
steps:
- uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/setup_environment
with:
enable_go: false
@ -52,10 +52,10 @@ jobs:
name: Go tests on Windows
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-go@v4
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
go-version: '>=1.20 <1.21'
go-version: 1.21.x
- run: |
$TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
go test $TestTargets -vet=off -v
@ -66,9 +66,9 @@ jobs:
runs-on: ubuntu-latest
# The go verson in this image should be N-1 wrt test_go.
container:
image: quay.io/prometheus/golang-builder:1.19-base
image: quay.io/prometheus/golang-builder:1.20-base
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: make build
- run: go test ./tsdb/...
- run: go test ./tsdb/ -test.tsdb-isolation=false
@ -79,9 +79,9 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
container:
image: quay.io/prometheus/golang-builder:1.19-base
image: quay.io/prometheus/golang-builder:1.20-base
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: go install ./cmd/promtool/.
- run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest
- run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
@ -104,8 +104,8 @@ jobs:
matrix:
thread: [ 0, 1, 2 ]
steps:
- uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/build
with:
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
@ -127,8 +127,8 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
steps:
- uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/build
with:
parallelism: 12
@ -138,19 +138,21 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Install Go
uses: actions/setup-go@v4
uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
go-version: 1.20.x
cache: false
go-version: 1.21.x
- name: Install snmp_exporter/generator dependencies
run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
if: github.repository == 'prometheus/snmp_exporter'
- name: Lint
uses: golangci/golangci-lint-action@v3.4.0
uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0
with:
args: --verbose
version: v1.51.2
# Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
version: v1.55.2
fuzzing:
uses: ./.github/workflows/fuzzing.yml
if: github.event_name == 'pull_request'
@ -163,8 +165,8 @@ jobs:
needs: [test_ui, test_go, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/publish_main
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -172,13 +174,13 @@ jobs:
quay_io_login: ${{ secrets.quay_io_login }}
quay_io_password: ${{ secrets.quay_io_password }}
publish_release:
name: Publish release arfefacts
name: Publish release artefacts
runs-on: ubuntu-latest
needs: [test_ui, test_go, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
steps:
- uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/publish_release
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -192,14 +194,14 @@ jobs:
needs: [test_ui, codeql]
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: prometheus/promci@v0.1.0
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- name: Install nodejs
uses: actions/setup-node@v3
uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d # v3.8.1
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"
- uses: actions/cache@v3.3.1
- uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}

View file

@ -6,6 +6,10 @@ on:
schedule:
- cron: "26 14 * * 1"
permissions:
contents: read
security-events: write
jobs:
analyze:
name: Analyze
@ -20,18 +24,18 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v3
- uses: actions/setup-go@v4
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
go-version: '>=1.20 <1.21'
go-version: 1.21.x
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8

View file

@ -2,6 +2,9 @@ on:
repository_dispatch:
types: [funcbench_start]
name: Funcbench Workflow
permissions:
contents: read
jobs:
run_funcbench:
name: Running funcbench

View file

@ -1,6 +1,9 @@
name: CIFuzz
on:
workflow_call:
permissions:
contents: read
jobs:
Fuzzing:
runs-on: ubuntu-latest
@ -18,7 +21,7 @@ jobs:
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts

View file

@ -14,8 +14,9 @@ concurrency:
jobs:
action:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: dessant/lock-threads@v4
- uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1
with:
process-only: 'issues'
issue-inactive-days: '180'

View file

@ -3,13 +3,17 @@ name: Sync repo files
on:
schedule:
- cron: '44 17 * * *'
permissions:
contents: read
jobs:
repo_sync:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
container:
image: quay.io/prometheus/golang-builder
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: ./scripts/sync_repo_files.sh
env:
GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}

50
.github/workflows/scorecards.yml vendored Normal file
View file

@ -0,0 +1,50 @@
# Copyright 2022 Google LLC
name: Scorecards supply-chain security
on:
pull_request:
push:
branches: [ "main" ]
# Declare default permissions as read only.
permissions: read-all
jobs:
analysis:
name: Scorecards analysis
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to code-scanning dashboard.
security-events: write
# Used to receive a badge.
id-token: write
steps:
- name: "Checkout code"
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
with:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # tag=v2.3.1
with:
results_file: results.sarif
results_format: sarif
# Publish the results for public repositories to enable scorecard badges. For more details, see
# https://github.com/ossf/scorecard-action#publishing-results.
publish_results: ${{ github.event_name != 'pull_request' }}
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # tag=v3.1.3
with:
name: SARIF file
path: results.sarif
retention-days: 5
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@407ffafae6a767df3e0230c3df91b6443ae8df75 # tag=v2.22.8
with:
sarif_file: results.sarif

View file

@ -3,6 +3,10 @@ run:
skip-files:
# Skip autogenerated files.
- ^.*\.(pb|y)\.go$
skip-dirs:
# Copied it from a different source
- storage/remote/otlptranslator/prometheusremotewrite
- storage/remote/otlptranslator/prometheus
output:
sort-results: true
@ -10,11 +14,16 @@ output:
linters:
enable:
- depguard
- errorlint
- gocritic
- godot
- gofumpt
- goimports
- revive
- misspell
- nolintlint
- predeclared
- revive
- testifylint
- unconvert
- unused
@ -27,17 +36,30 @@ issues:
- path: _test.go
linters:
- errcheck
- path: "tsdb/head_wal.go"
linters:
- errorlint
- linters:
- godot
source: "^// ==="
linters-settings:
depguard:
list-type: blacklist
include-go-root: true
packages-with-error-message:
- sync/atomic: "Use go.uber.org/atomic instead of sync/atomic"
- github.com/stretchr/testify/assert: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
- github.com/go-kit/kit/log: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
- io/ioutil: "Use corresponding 'os' or 'io' functions instead."
- regexp: "Use github.com/grafana/regexp instead of regexp"
rules:
main:
deny:
- pkg: "sync/atomic"
desc: "Use go.uber.org/atomic instead of sync/atomic"
- pkg: "github.com/stretchr/testify/assert"
desc: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
- pkg: "github.com/go-kit/kit/log"
desc: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
- pkg: "io/ioutil"
desc: "Use corresponding 'os' or 'io' functions instead."
- pkg: "regexp"
desc: "Use github.com/grafana/regexp instead of regexp"
- pkg: "github.com/pkg/errors"
desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors"
errcheck:
exclude-functions:
# Don't flag lines such as "io.Copy(io.Discard, resp.Body)".
@ -55,3 +77,56 @@ linters-settings:
local-prefixes: github.com/prometheus/prometheus
gofumpt:
extra-rules: true
revive:
# By default, revive will enable only the linting rules that are named in the configuration file.
# So, it's needed to explicitly set in configuration all required rules.
# The following configuration enables all the rules from the defaults.toml
# https://github.com/mgechev/revive/blob/master/defaults.toml
rules:
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
- name: blank-imports
- name: context-as-argument
arguments:
# allow functions with test or bench signatures
- allowTypesBefore: "*testing.T,testing.TB"
- name: context-keys-type
- name: dot-imports
# A lot of false positives: incorrectly identifies channel draining as "empty code block".
# See https://github.com/mgechev/revive/issues/386
- name: empty-block
disabled: true
- name: error-naming
- name: error-return
- name: error-strings
- name: errorf
- name: exported
- name: increment-decrement
- name: indent-error-flow
- name: package-comments
- name: range
- name: receiver-naming
- name: redefines-builtin-id
- name: superfluous-else
- name: time-naming
- name: unexported-return
- name: unreachable-code
- name: unused-parameter
disabled: true
- name: var-declaration
- name: var-naming
testifylint:
disable:
- float-compare
- go-require
enable:
- bool-compare
- compares
- empty
- error-is-as
- error-nil
- expected-actual
- len
- require-error
- suite-dont-use-pkg
- suite-extra-assert-call

View file

@ -1,7 +1,7 @@
go:
# Whenever the Go version is updated here,
# .circle/config.yml should also be updated.
version: 1.20
version: 1.21
repository:
path: github.com/prometheus/prometheus
build:
@ -14,9 +14,10 @@ build:
all:
- netgo
- builtinassets
- stringlabels
windows:
- builtinassets
flags: -a
- stringlabels
ldflags: |
-X github.com/prometheus/common/version.Version={{.Version}}
-X github.com/prometheus/common/version.Revision={{.Revision}}

View file

@ -20,5 +20,4 @@ rules:
config/testdata/section_key_dup.bad.yml
line-length: disable
truthy:
ignore: |
.github/workflows/*.yml
check-keys: false

View file

@ -1,5 +1,139 @@
# Changelog
## unreleased
* [ENHANCEMENT] TSDB: Make the wlog watcher read segments synchronously when not tailing. #13224
* [BUGFIX] Agent: Participate in notify calls. #13223
## 2.48.0 / 2023-11-16
* [CHANGE] Remote-write: respect Retry-After header on 5xx errors. #12677
* [FEATURE] Alerting: Add AWS SigV4 authentication support for Alertmanager endpoints. #12774
* [FEATURE] Promtool: Add support for histograms in the TSDB dump command. #12775
* [FEATURE] PromQL: Add warnings (and annotations) to PromQL query results. #12152 #12982 #12988 #13012
* [FEATURE] Remote-write: Add Azure AD OAuth authentication support for remote write requests. #12572
* [ENHANCEMENT] Remote-write: Add a header to count retried remote write requests. #12729
* [ENHANCEMENT] TSDB: Improve query performance by re-using iterator when moving between series. #12757
* [ENHANCEMENT] UI: Move /targets page discovered labels to expandable section #12824
* [ENHANCEMENT] TSDB: Optimize WBL loading by not sending empty buffers over channel. #12808
* [ENHANCEMENT] TSDB: Reply WBL mmap markers concurrently. #12801
* [ENHANCEMENT] Promtool: Add support for specifying series matchers in the TSDB analyze command. #12842
* [ENHANCEMENT] PromQL: Prevent Prometheus from overallocating memory on subquery with large amount of steps. #12734
* [ENHANCEMENT] PromQL: Add warning when monotonicity is forced in the input to histogram_quantile. #12931
* [ENHANCEMENT] Scraping: Optimize sample appending by reducing garbage. #12939
* [ENHANCEMENT] Storage: Reduce memory allocations in queries that merge series sets. #12938
* [ENHANCEMENT] UI: Show group interval in rules display. #12943
* [ENHANCEMENT] Scraping: Save memory when scraping by delaying creation of buffer. #12953
* [ENHANCEMENT] Agent: Allow ingestion of out-of-order samples. #12897
* [ENHANCEMENT] Promtool: Improve support for native histograms in TSDB analyze command. #12869
* [ENHANCEMENT] Scraping: Add configuration option for tracking staleness of scraped timestamps. #13060
* [BUGFIX] SD: Ensure that discovery managers are properly canceled. #10569
* [BUGFIX] TSDB: Fix PostingsForMatchers race with creating new series. #12558
* [BUGFIX] TSDB: Fix handling of explicit counter reset header in histograms. #12772
* [BUGFIX] SD: Validate HTTP client configuration in HTTP, EC2, Azure, Uyuni, PuppetDB, and Lightsail SDs. #12762 #12811 #12812 #12815 #12814 #12816
* [BUGFIX] TSDB: Fix counter reset edgecases causing native histogram panics. #12838
* [BUGFIX] TSDB: Fix duplicate sample detection at chunk size limit. #12874
* [BUGFIX] Promtool: Fix errors not being reported in check rules command. #12715
* [BUGFIX] TSDB: Avoid panics reported in logs when head initialization takes a long time. #12876
* [BUGFIX] TSDB: Ensure that WBL is repaired when possible. #12406
* [BUGFIX] Storage: Fix crash caused by incorrect mixed samples handling. #13055
* [BUGFIX] TSDB: Fix compactor failures by adding min time to histogram chunks. #13062
## 2.47.1 / 2023-10-04
* [BUGFIX] Fix duplicate sample detection at chunk size limit #12874
## 2.47.0 / 2023-09-06
This release adds an experimental OpenTelemetry (OTLP) Ingestion feature,
and also new setting `keep_dropped_targets` to limit the amount of dropped
targets held in memory. This defaults to 0 meaning 'no limit', so we encourage
users with large Prometheus to try setting a limit such as 100.
* [FEATURE] Web: Add OpenTelemetry (OTLP) Ingestion endpoint. #12571 #12643
* [FEATURE] Scraping: Optionally limit detail on dropped targets, to save memory. #12647
* [ENHANCEMENT] TSDB: Write head chunks to disk in the background to reduce blocking. #11818
* [ENHANCEMENT] PromQL: Speed up aggregate and function queries. #12682
* [ENHANCEMENT] PromQL: More efficient evaluation of query with `timestamp()`. #12579
* [ENHANCEMENT] API: Faster streaming of Labels to JSON. #12598
* [ENHANCEMENT] Agent: Memory pooling optimisation. #12651
* [ENHANCEMENT] TSDB: Prevent storage space leaks due to terminated snapshots on shutdown. #12664
* [ENHANCEMENT] Histograms: Refactoring and optimisations. #12352 #12584 #12596 #12711 #12054
* [ENHANCEMENT] Histograms: Add `histogram_stdvar` and `histogram_stddev` functions. #12614
* [ENHANCEMENT] Remote-write: add http.resend_count tracing attribute. #12676
* [ENHANCEMENT] TSDB: Support native histograms in snapshot on shutdown. #12722
* [BUGFIX] TSDB/Agent: ensure that new series get written to WAL on rollback. #12592
* [BUGFIX] Scraping: fix infinite loop on exemplar in protobuf format. #12737
## 2.46.0 / 2023-07-25
* [FEATURE] Promtool: Add PromQL format and label matcher set/delete commands to promtool. #11411
* [FEATURE] Promtool: Add push metrics command. #12299
* [ENHANCEMENT] Promtool: Read from stdin if no filenames are provided in check rules. #12225
* [ENHANCEMENT] Hetzner SD: Support larger ID's that will be used by Hetzner in September. #12569
* [ENHANCEMENT] Kubernetes SD: Add more labels for endpointslice and endpoints role. #10914
* [ENHANCEMENT] Kubernetes SD: Do not add pods to target group if the PodIP status is not set. #11642
* [ENHANCEMENT] OpenStack SD: Include instance image ID in labels. #12502
* [ENHANCEMENT] Remote Write receiver: Validate the metric names and labels. #11688
* [ENHANCEMENT] Web: Initialize `prometheus_http_requests_total` metrics with `code` label set to `200`. #12472
* [ENHANCEMENT] TSDB: Add Zstandard compression option for wlog. #11666
* [ENHANCEMENT] TSDB: Support native histograms in snapshot on shutdown. #12258
* [ENHANCEMENT] Labels: Avoid compiling regexes that are literal. #12434
* [BUGFIX] Histograms: Fix parsing of float histograms without zero bucket. #12577
* [BUGFIX] Histograms: Fix scraping native and classic histograms missing some histograms. #12554
* [BUGFIX] Histograms: Enable ingestion of multiple exemplars per sample. 12557
* [BUGFIX] File SD: Fix path handling in File-SD watcher to allow directory monitoring on Windows. #12488
* [BUGFIX] Linode SD: Cast `InstanceSpec` values to `int64` to avoid overflows on 386 architecture. #12568
* [BUGFIX] PromQL Engine: Include query parsing in active-query tracking. #12418
* [BUGFIX] TSDB: Handle TOC parsing failures. #10623
## 2.45.0 / 2023-06-23
This release is a LTS (Long-Term Support) release of Prometheus and will
receive security, documentation and bugfix patches for at least 12 months.
Please read more about our LTS release cycle at
<https://prometheus.io/docs/introduction/release-cycle/>.
* [FEATURE] API: New limit parameter to limit the number of items returned by `/api/v1/status/tsdb` endpoint. #12336
* [FEATURE] Config: Add limits to global config. #12126
* [FEATURE] Consul SD: Added support for `path_prefix`. #12372
* [FEATURE] Native histograms: Add option to scrape both classic and native histograms. #12350
* [FEATURE] Native histograms: Added support for two more arithmetic operators `avg_over_time` and `sum_over_time`. #12262
* [FEATURE] Promtool: When providing the block id, only one block will be loaded and analyzed. #12031
* [FEATURE] Remote-write: New Azure ad configuration to support remote writing directly to Azure Monitor workspace. #11944
* [FEATURE] TSDB: Samples per chunk are now configurable with flag `storage.tsdb.samples-per-chunk`. By default set to its former value 120. #12055
* [ENHANCEMENT] Native histograms: bucket size can now be limited to avoid scrape fails. #12254
* [ENHANCEMENT] TSDB: Dropped series are now deleted from the WAL sooner. #12297
* [BUGFIX] Native histograms: ChunkSeries iterator now checks if a new sample can be appended to the open chunk. #12185
* [BUGFIX] Native histograms: Fix Histogram Appender `Appendable()` segfault. #12357
* [BUGFIX] Native histograms: Fix setting reset header to gauge histograms in seriesToChunkEncoder. #12329
* [BUGFIX] TSDB: Tombstone intervals are not modified after Get() call. #12245
* [BUGFIX] TSDB: Use path/filepath to set the WAL directory. #12349
## 2.44.0 / 2023-05-13
This version is built with Go tag `stringlabels`, to use the smaller data
structure for Labels that was optional in the previous release. For more
details about this code change see #10991.
* [CHANGE] Remote-write: Raise default samples per send to 2,000. #12203
* [FEATURE] Remote-read: Handle native histograms. #12085, #12192
* [FEATURE] Promtool: Health and readiness check of prometheus server in CLI. #12096
* [FEATURE] PromQL: Add `query_samples_total` metric, the total number of samples loaded by all queries. #12251
* [ENHANCEMENT] Storage: Optimise buffer used to iterate through samples. #12326
* [ENHANCEMENT] Scrape: Reduce memory allocations on target labels. #12084
* [ENHANCEMENT] PromQL: Use faster heap method for `topk()` / `bottomk()`. #12190
* [ENHANCEMENT] Rules API: Allow filtering by rule name. #12270
* [ENHANCEMENT] Native Histograms: Various fixes and improvements. #11687, #12264, #12272
* [ENHANCEMENT] UI: Search of scraping pools is now case-insensitive. #12207
* [ENHANCEMENT] TSDB: Add an affirmative log message for successful WAL repair. #12135
* [BUGFIX] TSDB: Block compaction failed when shutting down. #12179
* [BUGFIX] TSDB: Out-of-order chunks could be ignored if the write-behind log was deleted. #12127
## 2.43.1 / 2023-05-03
* [BUGFIX] Labels: `Set()` after `Del()` would be ignored, which broke some relabeling rules. #12322
## 2.43.0 / 2023-03-21
We are working on some performance improvements in Prometheus, which are only
@ -13,9 +147,9 @@ the gains on their production architecture. We are providing release artefacts
improvements for testing. #10991
* [FEATURE] Promtool: Add HTTP client configuration to query commands. #11487
* [FEATURE] Scrape: Add `include_scrape_configs` to include scrape configs from different files. #12019
* [FEATURE] Scrape: Add `scrape_config_files` to include scrape configs from different files. #12019
* [FEATURE] HTTP client: Add `no_proxy` to exclude URLs from proxied requests. #12098
* [FEATURE] HTTP client: Add `proxy_from_enviroment` to read proxies from env variables. #12098
* [FEATURE] HTTP client: Add `proxy_from_environment` to read proxies from env variables. #12098
* [ENHANCEMENT] API: Add support for setting lookback delta per query via the API. #12088
* [ENHANCEMENT] API: Change HTTP status code from 503/422 to 499 if a request is canceled. #11897
* [ENHANCEMENT] Scrape: Allow exemplars for all metric types. #11984

View file

@ -85,7 +85,7 @@ The PromQL parser grammar is located in `promql/parser/generated_parser.y` and i
The parser is built using [goyacc](https://pkg.go.dev/golang.org/x/tools/cmd/goyacc)
If doing some sort of debugging, then it is possible to add some verbose output. After generating the parser, then you
can modify the the `./promql/parser/generated_parser.y.go` manually.
can modify the `./promql/parser/generated_parser.y.go` manually.
```golang
// As of writing this was somewhere around line 600.

View file

@ -7,7 +7,7 @@ Julien Pivotto (<roidelapluie@prometheus.io> / @roidelapluie) and Levi Harrison
* `discovery`
* `k8s`: Frederic Branczyk (<fbranczyk@gmail.com> / @brancz)
* `documentation`
* `prometheus-mixin`: Björn Rabenstein (<beorn@grafana.com> / @beorn7)
* `prometheus-mixin`: Matthias Loibl (<mail@matthiasloibl.com> / @metalmatze)
* `storage`
* `remote`: Chris Marchbanks (<csmarchbanks@gmail.com> / @csmarchbanks), Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (<tom.wilkie@gmail.com> / @tomwilkie)
* `tsdb`: Ganesh Vernekar (<ganesh@grafana.com> / @codesome), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)

View file

@ -82,7 +82,7 @@ assets-tarball: assets
.PHONY: parser
parser:
@echo ">> running goyacc to generate the .go file."
ifeq (, $(shell which goyacc))
ifeq (, $(shell command -v goyacc > /dev/null))
@echo "goyacc not installed so skipping"
@echo "To install: go install golang.org/x/tools/cmd/goyacc@v0.6.0"
else
@ -93,9 +93,9 @@ endif
# If we only want to only test go code we have to change the test target
# which is called by all.
ifeq ($(GO_ONLY),1)
test: common-test
test: common-test check-go-mod-version
else
test: common-test ui-build-module ui-test ui-lint
test: common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
.PHONY: npm_licenses
@ -138,3 +138,17 @@ bench_tsdb: $(PROMU)
cli-documentation:
$(GO) run ./cmd/prometheus/ --write-documentation > docs/command-line/prometheus.md
$(GO) run ./cmd/promtool/ write-documentation > docs/command-line/promtool.md
.PHONY: check-go-mod-version
check-go-mod-version:
@echo ">> checking go.mod version matching"
@./scripts/check-go-mod-version.sh
.PHONY: update-all-go-deps
update-all-go-deps:
@$(MAKE) update-go-deps
@echo ">> updating Go dependencies in ./documentation/examples/remote_storage/"
@cd ./documentation/examples/remote_storage/ && for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \
$(GO) get -d $$m; \
done
@cd ./documentation/examples/remote_storage/ && $(GO) mod tidy

View file

@ -49,19 +49,19 @@ endif
GOTEST := $(GO) test
GOTEST_DIR :=
ifneq ($(CIRCLE_JOB),)
ifneq ($(shell which gotestsum),)
ifneq ($(shell command -v gotestsum > /dev/null),)
GOTEST_DIR := test-results
GOTEST := gotestsum --junitfile $(GOTEST_DIR)/unit-tests.xml --
endif
endif
PROMU_VERSION ?= 0.14.0
PROMU_VERSION ?= 0.15.0
PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v1.51.2
GOLANGCI_LINT_VERSION ?= v1.55.2
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
@ -178,7 +178,7 @@ endif
.PHONY: common-yamllint
common-yamllint:
@echo ">> running yamllint on all YAML files in the repository"
ifeq (, $(shell which yamllint))
ifeq (, $(shell command -v yamllint > /dev/null))
@echo "yamllint not installed so skipping"
else
yamllint .

View file

@ -14,6 +14,7 @@ examples and guides.</p>
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/486/badge)](https://bestpractices.coreinfrastructure.org/projects/486)
[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://gitpod.io/#https://github.com/prometheus/prometheus)
[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/prometheus.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:prometheus)
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus/badge)](https://api.securityscorecards.dev/projects/github.com/prometheus/prometheus)
</div>
@ -34,7 +35,7 @@ The features that distinguish Prometheus from other metrics and monitoring syste
## Architecture overview
![Architecture overview](https://cdn.jsdelivr.net/gh/prometheus/prometheus@c34257d069c630685da35bcef084632ffd5d6209/documentation/images/architecture.svg)
![Architecture overview](documentation/images/architecture.svg)
## Install

View file

@ -49,7 +49,12 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.42 | 2023-01-25 | Kemal Akkoyun (GitHub: @kakkoyun) |
| v2.43 | 2023-03-08 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.44 | 2023-04-19 | Bryan Boreham (GitHub: @bboreham) |
| v2.45 | 2023-05-31 | **searching for volunteer** |
| v2.45 LTS | 2023-05-31 | Jesus Vazquez (Github: @jesusvazquez) |
| v2.46 | 2023-07-12 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) |
| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) |
| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) |
| v2.50 | 2024-01-16 | **searching for volunteer** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@ -104,14 +109,16 @@ This is also a good time to consider any experimental features and feature
flags for promotion to stable or for deprecation or ultimately removal. Do any
of these in pull requests, one per feature.
> NOTE: As a validation step check if all security alerts are closed after this step: https://github.com/prometheus/prometheus/security/dependabot. Sometimes it's ok
> if not critical and e.g. fix is not released yet (or it does not relate to
> upgrading) or when we are unaffected.
#### Manually updating Go dependencies
This is usually only needed for `+incompatible` and `v0.0.0` non-semver updates.
```bash
make update-go-deps
git add go.mod go.sum
git commit -m "Update dependencies"
make update-all-go-deps
```
#### Manually updating React dependencies

View file

@ -1 +1 @@
2.43.0
2.48.0

View file

@ -12,7 +12,6 @@
// limitations under the License.
// The main package for the Prometheus server executable.
// nolint:revive // Many unsued function arguments in this file by design.
package main
import (
@ -64,6 +63,7 @@ import (
"github.com/prometheus/prometheus/notifier"
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/storage"
@ -71,6 +71,7 @@ import (
"github.com/prometheus/prometheus/tracing"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/agent"
"github.com/prometheus/prometheus/tsdb/wlog"
"github.com/prometheus/prometheus/util/documentcli"
"github.com/prometheus/prometheus/util/logging"
prom_runtime "github.com/prometheus/prometheus/util/runtime"
@ -169,6 +170,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "remote-write-receiver":
c.web.EnableRemoteWriteReceiver = true
level.Warn(logger).Log("msg", "Remote write receiver enabled via feature flag remote-write-receiver. This is DEPRECATED. Use --web.enable-remote-write-receiver.")
case "otlp-write-receiver":
c.web.EnableOTLPWriteReceiver = true
level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled")
case "expand-external-labels":
c.enableExpandExternalLabels = true
level.Info(logger).Log("msg", "Experimental expand-external-labels enabled")
@ -199,10 +203,21 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "no-default-scrape-port":
c.scrape.NoDefaultPort = true
level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
case "promql-experimental-functions":
parser.EnableExperimentalFunctions = true
level.Info(logger).Log("msg", "Experimental PromQL functions enabled.")
case "native-histograms":
c.tsdb.EnableNativeHistograms = true
c.scrape.EnableProtobufNegotiation = true
level.Info(logger).Log("msg", "Experimental native histogram support enabled.")
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "created-timestamp-zero-ingestion":
c.scrape.EnableCreatedTimestampZeroIngestion = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
@ -213,11 +228,6 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
}
}
if c.tsdb.EnableNativeHistograms && c.tsdb.EnableMemorySnapshotOnShutdown {
c.tsdb.EnableMemorySnapshotOnShutdown = false
level.Warn(logger).Log("msg", "memory-snapshot-on-shutdown has been disabled automatically because memory-snapshot-on-shutdown and native-histograms cannot be enabled at the same time.")
}
return nil
}
@ -337,9 +347,15 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL.").
Hidden().Default("true").BoolVar(&cfg.tsdb.WALCompression)
serverOnlyFlag(a, "storage.tsdb.wal-compression-type", "Compression algorithm for the tsdb WAL.").
Hidden().Default(string(wlog.CompressionSnappy)).EnumVar(&cfg.tsdb.WALCompressionType, string(wlog.CompressionSnappy), string(wlog.CompressionZstd))
serverOnlyFlag(a, "storage.tsdb.head-chunks-write-queue-size", "Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental.").
Default("0").IntVar(&cfg.tsdb.HeadChunksWriteQueueSize)
serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk.").
Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk)
agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage.").
Default("data-agent/").StringVar(&cfg.agentStoragePath)
@ -350,6 +366,9 @@ func main() {
agentOnlyFlag(a, "storage.agent.wal-compression", "Compress the agent WAL.").
Default("true").BoolVar(&cfg.agent.WALCompression)
agentOnlyFlag(a, "storage.agent.wal-compression-type", "Compression algorithm for the agent WAL.").
Hidden().Default(string(wlog.CompressionSnappy)).EnumVar(&cfg.agent.WALCompressionType, string(wlog.CompressionSnappy), string(wlog.CompressionZstd))
agentOnlyFlag(a, "storage.agent.wal-truncate-frequency",
"The frequency at which to truncate the WAL and remove old data.").
Hidden().PlaceHolder("<duration>").SetValue(&cfg.agent.TruncateFrequency)
@ -413,7 +432,7 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, metadata-storage. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, metadata-storage. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
promlogflag.AddFlags(a, &cfg.promlogConfig)
@ -429,7 +448,7 @@ func main() {
_, err := a.Parse(os.Args[1:])
if err != nil {
fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing commandline arguments: %w", err))
fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing command line arguments: %w", err))
a.Usage(os.Args[1:])
os.Exit(2)
}
@ -610,18 +629,66 @@ func main() {
discoveryManagerNotify discoveryManager
)
// Kubernetes client metrics are used by Kubernetes SD.
// They are registered here in the main function, because SD mechanisms
// can only register metrics specific to a SD instance.
// Kubernetes client metrics are the same for the whole process -
// they are not specific to an SD instance.
err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer)
if err != nil {
level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err)
os.Exit(1)
}
if cfg.enableNewSDManager {
discovery.RegisterMetrics()
discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape"))
discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify"))
{
discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discovery.Name("scrape"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerScrape = discMgr
}
{
discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discovery.Name("notify"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
discoveryManagerNotify = discMgr
}
} else {
legacymanager.RegisterMetrics()
discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), legacymanager.Name("scrape"))
discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify"))
{
discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, legacymanager.Name("scrape"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerScrape = discMgr
}
{
discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, legacymanager.Name("notify"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
discoveryManagerNotify = discMgr
}
}
scrapeManager, err := scrape.NewManager(
&cfg.scrape,
log.With(logger, "component", "scrape manager"),
fanoutStorage,
prometheus.DefaultRegisterer,
)
if err != nil {
level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err)
os.Exit(1)
}
var (
scrapeManager = scrape.NewManager(&cfg.scrape, log.With(logger, "component", "scrape manager"), fanoutStorage)
tracingManager = tracing.NewManager(logger)
queryEngine *promql.Engine
@ -1059,6 +1126,7 @@ func main() {
startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000)
localStorage.Set(db, startTimeMargin)
db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
return nil
@ -1112,6 +1180,7 @@ func main() {
)
localStorage.Set(db, 0)
db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
return nil
@ -1272,7 +1341,7 @@ func startsOrEndsWithQuote(s string) bool {
strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'")
}
// compileCORSRegexString compiles given string and adds anchors
// compileCORSRegexString compiles given string and adds anchors.
func compileCORSRegexString(s string) (*regexp.Regexp, error) {
r, err := relabel.NewRegexp(s)
if err != nil {
@ -1378,17 +1447,17 @@ func (s *readyStorage) StartTime() (int64, error) {
}
// Querier implements the Storage interface.
func (s *readyStorage) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
func (s *readyStorage) Querier(mint, maxt int64) (storage.Querier, error) {
if x := s.get(); x != nil {
return x.Querier(ctx, mint, maxt)
return x.Querier(mint, maxt)
}
return nil, tsdb.ErrNotReady
}
// ChunkQuerier implements the Storage interface.
func (s *readyStorage) ChunkQuerier(ctx context.Context, mint, maxt int64) (storage.ChunkQuerier, error) {
func (s *readyStorage) ChunkQuerier(mint, maxt int64) (storage.ChunkQuerier, error) {
if x := s.get(); x != nil {
return x.ChunkQuerier(ctx, mint, maxt)
return x.ChunkQuerier(mint, maxt)
}
return nil, tsdb.ErrNotReady
}
@ -1433,6 +1502,10 @@ func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels,
return 0, tsdb.ErrNotReady
}
func (n notReadyAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
func (n notReadyAppender) Commit() error { return tsdb.ErrNotReady }
func (n notReadyAppender) Rollback() error { return tsdb.ErrNotReady }
@ -1461,11 +1534,11 @@ func (s *readyStorage) CleanTombstones() error {
}
// Delete implements the api_v1.TSDBAdminStats and api_v2.TSDBAdmin interfaces.
func (s *readyStorage) Delete(mint, maxt int64, ms ...*labels.Matcher) error {
func (s *readyStorage) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Matcher) error {
if x := s.get(); x != nil {
switch db := x.(type) {
case *tsdb.DB:
return db.Delete(mint, maxt, ms...)
return db.Delete(ctx, mint, maxt, ms...)
case *agent.DB:
return agent.ErrUnsupported
default:
@ -1491,11 +1564,11 @@ func (s *readyStorage) Snapshot(dir string, withHead bool) error {
}
// Stats implements the api_v1.TSDBAdminStats interface.
func (s *readyStorage) Stats(statsByLabelName string) (*tsdb.Stats, error) {
func (s *readyStorage) Stats(statsByLabelName string, limit int) (*tsdb.Stats, error) {
if x := s.get(); x != nil {
switch db := x.(type) {
case *tsdb.DB:
return db.Head().Stats(statsByLabelName), nil
return db.Head().Stats(statsByLabelName, limit), nil
case *agent.DB:
return nil, agent.ErrUnsupported
default:
@ -1551,7 +1624,9 @@ type tsdbOptions struct {
MaxBytes units.Base2Bytes
NoLockfile bool
WALCompression bool
WALCompressionType string
HeadChunksWriteQueueSize int
SamplesPerChunk int
StripeSize int
MinBlockDuration model.Duration
MaxBlockDuration model.Duration
@ -1569,9 +1644,9 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
MaxBytes: int64(opts.MaxBytes),
NoLockfile: opts.NoLockfile,
AllowOverlappingCompaction: true,
WALCompression: opts.WALCompression,
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,
SamplesPerChunk: opts.SamplesPerChunk,
StripeSize: opts.StripeSize,
MinBlockDuration: int64(time.Duration(opts.MinBlockDuration) / time.Millisecond),
MaxBlockDuration: int64(time.Duration(opts.MaxBlockDuration) / time.Millisecond),
@ -1588,6 +1663,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
type agentOptions struct {
WALSegmentSize units.Base2Bytes
WALCompression bool
WALCompressionType string
StripeSize int
TruncateFrequency model.Duration
MinWALTime, MaxWALTime model.Duration
@ -1597,7 +1673,7 @@ type agentOptions struct {
func (opts agentOptions) ToAgentOptions() agent.Options {
return agent.Options{
WALSegmentSize: int(opts.WALSegmentSize),
WALCompression: opts.WALCompression,
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
StripeSize: opts.StripeSize,
TruncateFrequency: time.Duration(opts.TruncateFrequency),
MinWALTime: durationToInt64Millis(time.Duration(opts.MinWALTime)),

View file

@ -346,7 +346,7 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
continue
}
require.Equal(t, 1, len(g.GetMetric()))
require.Len(t, g.GetMetric(), 1)
if _, ok := res[m]; ok {
t.Error("expected only one metric family for", m)
t.FailNow()
@ -498,10 +498,9 @@ func TestDocumentation(t *testing.T) {
cmd.Stdout = &stdout
if err := cmd.Run(); err != nil {
if exitError, ok := err.(*exec.ExitError); ok {
if exitError.ExitCode() != 0 {
fmt.Println("Command failed with non-zero exit code")
}
var exitError *exec.ExitError
if errors.As(err, &exitError) && exitError.ExitCode() != 0 {
fmt.Println("Command failed with non-zero exit code")
}
}

View file

@ -284,7 +284,7 @@ func (p *queryLogTest) run(t *testing.T) {
if !p.enabledAtStart {
p.query(t)
require.Equal(t, 0, len(readQueryLog(t, queryLogFile.Name())))
require.Empty(t, readQueryLog(t, queryLogFile.Name()))
p.setQueryLog(t, queryLogFile.Name())
p.reloadConfig(t)
}
@ -309,7 +309,7 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
ql = readQueryLog(t, queryLogFile.Name())
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
qc = len(ql)
p.setQueryLog(t, queryLogFile.Name())
@ -320,7 +320,7 @@ func (p *queryLogTest) run(t *testing.T) {
ql = readQueryLog(t, queryLogFile.Name())
if p.exactQueryCount() {
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
}
@ -340,7 +340,7 @@ func (p *queryLogTest) run(t *testing.T) {
require.NoError(t, os.Rename(queryLogFile.Name(), newFile.Name()))
ql = readQueryLog(t, newFile.Name())
if p.exactQueryCount() {
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
}
p.validateLastQuery(t, ql)
qc = len(ql)
@ -351,7 +351,7 @@ func (p *queryLogTest) run(t *testing.T) {
ql = readQueryLog(t, newFile.Name())
if p.exactQueryCount() {
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
}

View file

@ -44,8 +44,8 @@ func sortSamples(samples []backfillSample) {
})
}
func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample { // nolint:revive
ss := q.Select(false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample {
ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
samples := []backfillSample{}
for ss.Next() {
series := ss.At()
@ -61,13 +61,13 @@ func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMa
func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, expectedBlockDuration int64, expectedSamples []backfillSample, expectedNumBlocks int) {
blocks := db.Blocks()
require.Equal(t, expectedNumBlocks, len(blocks), "did not create correct number of blocks")
require.Len(t, blocks, expectedNumBlocks, "did not create correct number of blocks")
for i, block := range blocks {
require.Equal(t, block.MinTime()/expectedBlockDuration, (block.MaxTime()-1)/expectedBlockDuration, "block %d contains data outside of one aligned block duration", i)
}
q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
q, err := db.Querier(math.MinInt64, math.MaxInt64)
require.NoError(t, err)
defer func() {
require.NoError(t, q.Close())

View file

@ -36,6 +36,7 @@ import (
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
config_util "github.com/prometheus/common/config"
@ -58,6 +59,7 @@ import (
"github.com/prometheus/prometheus/notifier"
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/util/documentcli"
)
@ -81,15 +83,20 @@ func main() {
var (
httpRoundTripper = api.DefaultRoundTripper
serverURL *url.URL
remoteWriteURL *url.URL
httpConfigFilePath string
)
ctx := context.Background()
app := kingpin.New(filepath.Base(os.Args[0]), "Tooling for the Prometheus monitoring system.").UsageWriter(os.Stdout)
app.Version(version.Print("promtool"))
app.HelpFlag.Short('h')
checkCmd := app.Command("check", "Check the resources for validity.")
experimental := app.Flag("experimental", "Enable experimental commands.").Bool()
sdCheckCmd := checkCmd.Command("service-discovery", "Perform service discovery for the given job name and report the results, including relabeling.")
sdConfigFile := sdCheckCmd.Arg("config-file", "The prometheus config file.").Required().ExistingFile()
sdJobName := sdCheckCmd.Arg("job", "The job to run service discovery for.").Required().String()
@ -126,8 +133,8 @@ func main() {
checkRulesCmd := checkCmd.Command("rules", "Check if the rule files are valid or not.")
ruleFiles := checkRulesCmd.Arg(
"rule-files",
"The rule files to check.",
).Required().ExistingFiles()
"The rule files to check, default is read from standard input.",
).ExistingFiles()
checkRulesLint := checkRulesCmd.Flag(
"lint",
"Linting checks to apply. Available options are: "+strings.Join(lintOptions, ", ")+". Use --lint=none to disable linting",
@ -178,8 +185,21 @@ func main() {
queryLabelsEnd := queryLabelsCmd.Flag("end", "End time (RFC3339 or Unix timestamp).").String()
queryLabelsMatch := queryLabelsCmd.Flag("match", "Series selector. Can be specified multiple times.").Strings()
pushCmd := app.Command("push", "Push to a Prometheus server.")
pushCmd.Flag("http.config.file", "HTTP client configuration file for promtool to connect to Prometheus.").PlaceHolder("<filename>").ExistingFileVar(&httpConfigFilePath)
pushMetricsCmd := pushCmd.Command("metrics", "Push metrics to a prometheus remote write (for testing purpose only).")
pushMetricsCmd.Arg("remote-write-url", "Prometheus remote write url to push metrics.").Required().URLVar(&remoteWriteURL)
metricFiles := pushMetricsCmd.Arg(
"metric-files",
"The metric files to push, default is read from standard input.",
).ExistingFiles()
pushMetricsLabels := pushMetricsCmd.Flag("label", "Label to attach to metrics. Can be specified multiple times.").Default("job=promtool").StringMap()
pushMetricsTimeout := pushMetricsCmd.Flag("timeout", "The time to wait for pushing metrics.").Default("30s").Duration()
pushMetricsHeaders := pushMetricsCmd.Flag("header", "Prometheus remote write header.").StringMap()
testCmd := app.Command("test", "Unit testing.")
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
testRulesRun := testRulesCmd.Flag("run", "If set, will only run test groups whose names match the regular expression. Can be specified multiple times.").Strings()
testRulesFiles := testRulesCmd.Arg(
"test-rule-file",
"The unit test file.",
@ -200,6 +220,7 @@ func main() {
analyzeBlockID := tsdbAnalyzeCmd.Arg("block id", "Block to analyze (default is the last block).").String()
analyzeLimit := tsdbAnalyzeCmd.Flag("limit", "How many items to show in each list.").Default("20").Int()
analyzeRunExtended := tsdbAnalyzeCmd.Flag("extended", "Run extended analysis.").Bool()
analyzeMatchers := tsdbAnalyzeCmd.Flag("match", "Series selector to analyze. Only 1 set of matchers is supported now.").String()
tsdbListCmd := tsdbCmd.Command("list", "List tsdb blocks.")
listHumanReadable := tsdbListCmd.Flag("human-readable", "Print human readable values.").Short('r').Bool()
@ -232,6 +253,22 @@ func main() {
"A list of one or more files containing recording rules to be backfilled. All recording rules listed in the files will be backfilled. Alerting rules are not evaluated.",
).Required().ExistingFiles()
promQLCmd := app.Command("promql", "PromQL formatting and editing. Requires the --experimental flag.")
promQLFormatCmd := promQLCmd.Command("format", "Format PromQL query to pretty printed form.")
promQLFormatQuery := promQLFormatCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsCmd := promQLCmd.Command("label-matchers", "Edit label matchers contained within an existing PromQL query.")
promQLLabelsSetCmd := promQLLabelsCmd.Command("set", "Set a label matcher in the query.")
promQLLabelsSetType := promQLLabelsSetCmd.Flag("type", "Type of the label matcher to set.").Short('t').Default("=").Enum("=", "!=", "=~", "!~")
promQLLabelsSetQuery := promQLLabelsSetCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsSetName := promQLLabelsSetCmd.Arg("name", "Name of the label matcher to set.").Required().String()
promQLLabelsSetValue := promQLLabelsSetCmd.Arg("value", "Value of the label matcher to set.").Required().String()
promQLLabelsDeleteCmd := promQLLabelsCmd.Command("delete", "Delete a label from the query.")
promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings()
documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden()
@ -281,7 +318,7 @@ func main() {
switch parsedCmd {
case sdCheckCmd.FullCommand():
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort))
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
@ -301,6 +338,9 @@ func main() {
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics(*checkMetricsExtended))
case pushMetricsCmd.FullCommand():
os.Exit(PushMetrics(remoteWriteURL, httpRoundTripper, *pushMetricsHeaders, *pushMetricsTimeout, *pushMetricsLabels, *metricFiles...))
case queryInstantCmd.FullCommand():
os.Exit(QueryInstant(serverURL, httpRoundTripper, *queryInstantExpr, *queryInstantTime, p))
@ -328,6 +368,7 @@ func main() {
EnableAtModifier: true,
EnableNegativeOffset: true,
},
*testRulesRun,
*testRulesFiles...),
)
@ -335,26 +376,45 @@ func main() {
os.Exit(checkErr(benchmarkWrite(*benchWriteOutPath, *benchSamplesFile, *benchWriteNumMetrics, *benchWriteNumScrapes)))
case tsdbAnalyzeCmd.FullCommand():
os.Exit(checkErr(analyzeBlock(*analyzePath, *analyzeBlockID, *analyzeLimit, *analyzeRunExtended)))
os.Exit(checkErr(analyzeBlock(ctx, *analyzePath, *analyzeBlockID, *analyzeLimit, *analyzeRunExtended, *analyzeMatchers)))
case tsdbListCmd.FullCommand():
os.Exit(checkErr(listBlocks(*listPath, *listHumanReadable)))
case tsdbDumpCmd.FullCommand():
os.Exit(checkErr(dumpSamples(*dumpPath, *dumpMinTime, *dumpMaxTime, *dumpMatch)))
os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpMinTime, *dumpMaxTime, *dumpMatch)))
// TODO(aSquare14): Work on adding support for custom block size.
case openMetricsImportCmd.FullCommand():
os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
case importRulesCmd.FullCommand():
os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...)))
case documentationCmd.FullCommand():
os.Exit(checkErr(documentcli.GenerateMarkdown(app.Model(), os.Stdout)))
case promQLFormatCmd.FullCommand():
checkExperimental(*experimental)
os.Exit(checkErr(formatPromQL(*promQLFormatQuery)))
case promQLLabelsSetCmd.FullCommand():
checkExperimental(*experimental)
os.Exit(checkErr(labelsSetPromQL(*promQLLabelsSetQuery, *promQLLabelsSetType, *promQLLabelsSetName, *promQLLabelsSetValue)))
case promQLLabelsDeleteCmd.FullCommand():
checkExperimental(*experimental)
os.Exit(checkErr(labelsDeletePromQL(*promQLLabelsDeleteQuery, *promQLLabelsDeleteName)))
}
}
// nolint:revive
var lintError = fmt.Errorf("lint error")
func checkExperimental(f bool) {
if !f {
fmt.Fprintln(os.Stderr, "This command is experimental and requires the --experimental flag to be set.")
os.Exit(1)
}
}
var errLint = fmt.Errorf("lint error")
type lintConfig struct {
all bool
@ -441,20 +501,12 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, lintSettings lintConfig, files
}
fmt.Println()
for _, rf := range ruleFiles {
if n, errs := checkRules(rf, lintSettings); len(errs) > 0 {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, err := range errs {
fmt.Fprintln(os.Stderr, " ", err)
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, lintError)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
}
fmt.Println()
rulesFailed, rulesHasErrors := checkRules(ruleFiles, lintSettings)
if rulesFailed {
failed = rulesFailed
}
if rulesHasErrors {
hasErrors = rulesHasErrors
}
}
if failed && hasErrors {
@ -682,39 +734,96 @@ func checkSDFile(filename string) ([]*targetgroup.Group, error) {
func CheckRules(ls lintConfig, files ...string) int {
failed := false
hasErrors := false
for _, f := range files {
if n, errs := checkRules(f, ls); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, lintError)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
}
fmt.Println()
if len(files) == 0 {
failed, hasErrors = checkRulesFromStdin(ls)
} else {
failed, hasErrors = checkRules(files, ls)
}
if failed && hasErrors {
return failureExitCode
}
if failed && ls.fatal {
return lintErrExitCode
}
return successExitCode
}
func checkRules(filename string, lintSettings lintConfig) (int, []error) {
fmt.Println("Checking", filename)
rgs, errs := rulefmt.ParseFile(filename)
if errs != nil {
return successExitCode, errs
// checkRulesFromStdin validates rule from stdin.
func checkRulesFromStdin(ls lintConfig) (bool, bool) {
failed := false
hasErrors := false
fmt.Println("Checking standard input")
data, err := io.ReadAll(os.Stdin)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return true, true
}
rgs, errs := rulefmt.Parse(data)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
return failed, hasErrors
}
}
if n, errs := checkRuleGroups(rgs, ls); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
}
fmt.Println()
return failed, hasErrors
}
// checkRules validates rule files.
func checkRules(files []string, ls lintConfig) (bool, bool) {
failed := false
hasErrors := false
for _, f := range files {
fmt.Println("Checking", f)
rgs, errs := rulefmt.ParseFile(f)
if errs != nil {
failed = true
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
continue
}
}
if n, errs := checkRuleGroups(rgs, ls); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
}
fmt.Println()
}
return failed, hasErrors
}
func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []error) {
numRules := 0
for _, rg := range rgs.Groups {
numRules += len(rg.Rules)
@ -731,7 +840,7 @@ func checkRules(filename string, lintSettings lintConfig) (int, []error) {
})
}
errMessage += "Might cause inconsistency while recording expressions"
return 0, []error{fmt.Errorf("%w %s", lintError, errMessage)}
return 0, []error{fmt.Errorf("%w %s", errLint, errMessage)}
}
}
@ -1332,3 +1441,79 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c
return nil
}
func formatPromQL(query string) error {
expr, err := parser.ParseExpr(query)
if err != nil {
return err
}
fmt.Println(expr.Pretty(0))
return nil
}
func labelsSetPromQL(query, labelMatchType, name, value string) error {
expr, err := parser.ParseExpr(query)
if err != nil {
return err
}
var matchType labels.MatchType
switch labelMatchType {
case parser.ItemType(parser.EQL).String():
matchType = labels.MatchEqual
case parser.ItemType(parser.NEQ).String():
matchType = labels.MatchNotEqual
case parser.ItemType(parser.EQL_REGEX).String():
matchType = labels.MatchRegexp
case parser.ItemType(parser.NEQ_REGEX).String():
matchType = labels.MatchNotRegexp
default:
return fmt.Errorf("invalid label match type: %s", labelMatchType)
}
parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
if n, ok := node.(*parser.VectorSelector); ok {
var found bool
for i, l := range n.LabelMatchers {
if l.Name == name {
n.LabelMatchers[i].Type = matchType
n.LabelMatchers[i].Value = value
found = true
}
}
if !found {
n.LabelMatchers = append(n.LabelMatchers, &labels.Matcher{
Type: matchType,
Name: name,
Value: value,
})
}
}
return nil
})
fmt.Println(expr.Pretty(0))
return nil
}
func labelsDeletePromQL(query, name string) error {
expr, err := parser.ParseExpr(query)
if err != nil {
return err
}
parser.Inspect(expr, func(node parser.Node, path []parser.Node) error {
if n, ok := node.(*parser.VectorSelector); ok {
for i, l := range n.LabelMatchers {
if l.Name == name {
n.LabelMatchers = append(n.LabelMatchers[:i], n.LabelMatchers[i+1:]...)
}
}
}
return nil
})
fmt.Println(expr.Pretty(0))
return nil
}

View file

@ -56,7 +56,7 @@ func TestQueryRange(t *testing.T) {
defer s.Close()
urlObject, err := url.Parse(s.URL)
require.Equal(t, nil, err)
require.NoError(t, err)
p := &promqlPrinter{}
exitCode := QueryRange(urlObject, http.DefaultTransport, map[string]string{}, "up", "0", "300", 0, p)
@ -79,7 +79,7 @@ func TestQueryInstant(t *testing.T) {
defer s.Close()
urlObject, err := url.Parse(s.URL)
require.Equal(t, nil, err)
require.NoError(t, err)
p := &promqlPrinter{}
exitCode := QueryInstant(urlObject, http.DefaultTransport, "up", "300", p)
@ -450,10 +450,9 @@ func TestDocumentation(t *testing.T) {
cmd.Stdout = &stdout
if err := cmd.Run(); err != nil {
if exitError, ok := err.(*exec.ExitError); ok {
if exitError.ExitCode() != 0 {
fmt.Println("Command failed with non-zero exit code")
}
var exitError *exec.ExitError
if errors.As(err, &exitError) && exitError.ExitCode() != 0 {
fmt.Println("Command failed with non-zero exit code")
}
}
@ -464,3 +463,88 @@ func TestDocumentation(t *testing.T) {
require.Equal(t, string(expectedContent), generatedContent, "Generated content does not match documentation. Hint: run `make cli-documentation`.")
}
func TestCheckRules(t *testing.T) {
t.Run("rules-good", func(t *testing.T) {
data, err := os.ReadFile("./testdata/rules.yml")
require.NoError(t, err)
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
_, err = w.Write(data)
if err != nil {
t.Error(err)
}
w.Close()
// Restore stdin right after the test.
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
require.Equal(t, successExitCode, exitCode, "")
})
t.Run("rules-bad", func(t *testing.T) {
data, err := os.ReadFile("./testdata/rules-bad.yml")
require.NoError(t, err)
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
_, err = w.Write(data)
if err != nil {
t.Error(err)
}
w.Close()
// Restore stdin right after the test.
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false))
require.Equal(t, failureExitCode, exitCode, "")
})
t.Run("rules-lint-fatal", func(t *testing.T) {
data, err := os.ReadFile("./testdata/prometheus-rules.lint.yml")
require.NoError(t, err)
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
_, err = w.Write(data)
if err != nil {
t.Error(err)
}
w.Close()
// Restore stdin right after the test.
defer func(v *os.File) { os.Stdin = v }(os.Stdin)
os.Stdin = r
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true))
require.Equal(t, lintErrExitCode, exitCode, "")
})
}
func TestCheckRulesWithRuleFiles(t *testing.T) {
t.Run("rules-good", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules.yml")
require.Equal(t, successExitCode, exitCode, "")
})
t.Run("rules-bad", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, false), "./testdata/rules-bad.yml")
require.Equal(t, failureExitCode, exitCode, "")
})
t.Run("rules-lint-fatal", func(t *testing.T) {
exitCode := CheckRules(newLintConfig(lintOptionDuplicateRules, true), "./testdata/prometheus-rules.lint.yml")
require.Equal(t, lintErrExitCode, exitCode, "")
})
}

138
cmd/promtool/metrics.go Normal file
View file

@ -0,0 +1,138 @@
// Copyright 2023 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
"net/url"
"os"
"time"
"github.com/golang/snappy"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/util/fmtutil"
)
// Push metrics to a prometheus remote write (for testing purpose only).
func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int {
addressURL, err := url.Parse(url.String())
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
// build remote write client
writeClient, err := remote.NewWriteClient("remote-write", &remote.ClientConfig{
URL: &config_util.URL{URL: addressURL},
Timeout: model.Duration(timeout),
})
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
// set custom tls config from httpConfigFilePath
// set custom headers to every request
client, ok := writeClient.(*remote.Client)
if !ok {
fmt.Fprintln(os.Stderr, fmt.Errorf("unexpected type %T", writeClient))
return failureExitCode
}
client.Client.Transport = &setHeadersTransport{
RoundTripper: roundTripper,
headers: headers,
}
var data []byte
var failed bool
if len(files) == 0 {
data, err = io.ReadAll(os.Stdin)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return failureExitCode
}
fmt.Printf("Parsing standard input\n")
if parseAndPushMetrics(client, data, labels) {
fmt.Printf(" SUCCESS: metrics pushed to remote write.\n")
return successExitCode
}
return failureExitCode
}
for _, file := range files {
data, err = os.ReadFile(file)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
failed = true
continue
}
fmt.Printf("Parsing metrics file %s\n", file)
if parseAndPushMetrics(client, data, labels) {
fmt.Printf(" SUCCESS: metrics file %s pushed to remote write.\n", file)
continue
}
failed = true
}
if failed {
return failureExitCode
}
return successExitCode
}
func parseAndPushMetrics(client *remote.Client, data []byte, labels map[string]string) bool {
metricsData, err := fmtutil.MetricTextToWriteRequest(bytes.NewReader(data), labels)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return false
}
raw, err := metricsData.Marshal()
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return false
}
// Encode the request body into snappy encoding.
compressed := snappy.Encode(nil, raw)
err = client.Store(context.Background(), compressed, 0)
if err != nil {
fmt.Fprintln(os.Stderr, " FAILED:", err)
return false
}
return true
}
type setHeadersTransport struct {
http.RoundTripper
headers map[string]string
}
func (s *setHeadersTransport) RoundTrip(req *http.Request) (*http.Response, error) {
for key, value := range s.headers {
req.Header.Set(key, value)
}
return s.RoundTripper.RoundTrip(req)
}

View file

@ -35,7 +35,7 @@ type mockQueryRangeAPI struct {
samples model.Matrix
}
func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, query string, r v1.Range, opts ...v1.Option) (model.Value, v1.Warnings, error) { // nolint:revive
func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, query string, r v1.Range, opts ...v1.Option) (model.Value, v1.Warnings, error) {
return mockAPI.samples, v1.Warnings{}, nil
}
@ -91,13 +91,13 @@ func TestBackfillRuleIntegration(t *testing.T) {
for _, err := range errs {
require.NoError(t, err)
}
require.Equal(t, 3, len(ruleImporter.groups))
require.Len(t, ruleImporter.groups, 3)
group1 := ruleImporter.groups[path1+";group0"]
require.NotNil(t, group1)
const defaultInterval = 60
require.Equal(t, defaultInterval*time.Second, group1.Interval())
gRules := group1.Rules()
require.Equal(t, 1, len(gRules))
require.Len(t, gRules, 1)
require.Equal(t, "rule1", gRules[0].Name())
require.Equal(t, "ruleExpr", gRules[0].Query().String())
require.Equal(t, 1, gRules[0].Labels().Len())
@ -106,7 +106,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
require.NotNil(t, group2)
require.Equal(t, defaultInterval*time.Second, group2.Interval())
g2Rules := group2.Rules()
require.Equal(t, 2, len(g2Rules))
require.Len(t, g2Rules, 2)
require.Equal(t, "grp2_rule1", g2Rules[0].Name())
require.Equal(t, "grp2_rule1_expr", g2Rules[0].Query().String())
require.Equal(t, 0, g2Rules[0].Labels().Len())
@ -122,12 +122,12 @@ func TestBackfillRuleIntegration(t *testing.T) {
require.NoError(t, err)
blocks := db.Blocks()
require.Equal(t, (i+1)*tt.expectedBlockCount, len(blocks))
require.Len(t, blocks, (i+1)*tt.expectedBlockCount)
q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
q, err := db.Querier(math.MinInt64, math.MaxInt64)
require.NoError(t, err)
selectedSeries := q.Select(false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
selectedSeries := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
var seriesCount, samplesCount int
for selectedSeries.Next() {
seriesCount++
@ -248,11 +248,11 @@ func TestBackfillLabels(t *testing.T) {
db, err := tsdb.Open(tmpDir, nil, nil, opts, nil)
require.NoError(t, err)
q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
q, err := db.Querier(math.MinInt64, math.MaxInt64)
require.NoError(t, err)
t.Run("correct-labels", func(t *testing.T) {
selectedSeries := q.Select(false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
selectedSeries := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
for selectedSeries.Next() {
series := selectedSeries.At()
expectedLabels := labels.FromStrings("__name__", "rulename", "name1", "value-from-rule")

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@ -37,7 +38,7 @@ type sdCheckResult struct {
}
// CheckSD performs service discovery for the given job name and reports the results.
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool) int {
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int {
logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
@ -77,7 +78,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefault
defer cancel()
for _, cfg := range scrapeConfig.ServiceDiscoveryConfigs {
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger})
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger, Registerer: registerer})
if err != nil {
fmt.Fprintln(os.Stderr, "Could not create new discoverer", err)
return failureExitCode

View file

@ -35,7 +35,7 @@ func TestSDCheckResult(t *testing.T) {
}}
reg, err := relabel.NewRegexp("(.*)")
require.Nil(t, err)
require.NoError(t, err)
scrapeConfig := &config.ScrapeConfig{
ScrapeInterval: model.Duration(1 * time.Minute),

View file

@ -0,0 +1,15 @@
tests:
- input_series:
- series: test
values: 0 1
promql_expr_test:
- expr: test
eval_time: 59s
exp_samples:
- value: 0
labels: test
- expr: test
eval_time: 1m
exp_samples:
- value: 1
labels: test

28
cmd/promtool/testdata/rules-bad.yml vendored Normal file
View file

@ -0,0 +1,28 @@
# This is the rules file.
groups:
- name: alerts
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: page
annotations:
summary: "Instance {{ $label.foo }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- alert: AlwaysFiring
expr: 1
- name: rules
rules:
- record: job:test:count_over_time1m
expr: sum without(instance) (count_over_time(test[1m]))
# A recording rule that doesn't depend on input series.
- record: fixed_data
expr: 1
# Subquery with default resolution test.
- record: suquery_interval_test
expr: count_over_time(up[5m:])

30
cmd/promtool/testdata/rules_run.yml vendored Normal file
View file

@ -0,0 +1,30 @@
rule_files:
- rules.yml
evaluation_interval: 1m
# Minimal test cases to check focus on a rule group.
tests:
- name: correct test
input_series:
- series: test
values: 1
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test
- name: wrong test
input_series:
- series: test
values: 0
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test

View file

@ -10,6 +10,21 @@ tests:
- series: test_full
values: "0 0"
- series: test_repeat
values: "1x2"
- series: test_increase
values: "1+1x2"
- series: test_histogram
values: "{{schema:1 sum:-0.3 count:32.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5}}"
- series: test_histogram_repeat
values: "{{sum:3 count:2 buckets:[2]}}x2"
- series: test_histogram_increase
values: "{{sum:3 count:2 buckets:[2]}}+{{sum:1.3 count:1 buckets:[1]}}x2"
- series: test_stale
values: "0 stale"
@ -31,6 +46,37 @@ tests:
exp_samples:
- value: 60
# Repeat & increase
- expr: test_repeat
eval_time: 2m
exp_samples:
- value: 1
labels: "test_repeat"
- expr: test_increase
eval_time: 2m
exp_samples:
- value: 3
labels: "test_increase"
# Histograms
- expr: test_histogram
eval_time: 1m
exp_samples:
- labels: "test_histogram"
histogram: "{{schema:1 sum:-0.3 count:32.1 z_bucket:7.1 z_bucket_w:0.05 buckets:[5.1 10 7] offset:-3 n_buckets:[4.1 5] n_offset:-5}}"
- expr: test_histogram_repeat
eval_time: 2m
exp_samples:
- labels: "test_histogram_repeat"
histogram: "{{count:2 sum:3 buckets:[2]}}"
- expr: test_histogram_increase
eval_time: 2m
exp_samples:
- labels: "test_histogram_increase"
histogram: "{{count:4 sum:5.6 buckets:[4]}}"
# Ensure a value is stale as soon as it is marked as such.
- expr: test_stale
eval_time: 59s

View file

@ -16,33 +16,32 @@ package main
import (
"bufio"
"context"
"errors"
"fmt"
"io"
"math"
"os"
"path/filepath"
"runtime"
"runtime/pprof"
"sort"
"strconv"
"strings"
"sync"
"text/tabwriter"
"time"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/index"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"golang.org/x/exp/slices"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/chunkenc"
"github.com/prometheus/prometheus/tsdb/chunks"
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
"github.com/prometheus/prometheus/tsdb/fileutil"
"github.com/prometheus/prometheus/tsdb/index"
)
const timeDelta = 30000
@ -398,29 +397,33 @@ func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error)
if err != nil {
return nil, nil, err
}
blocks, err := db.Blocks()
if blockID == "" {
blockID, err = db.LastBlockID()
if err != nil {
return nil, nil, err
}
}
b, err := db.Block(blockID)
if err != nil {
return nil, nil, err
}
var block tsdb.BlockReader
switch {
case blockID != "":
for _, b := range blocks {
if b.Meta().ULID.String() == blockID {
block = b
break
}
}
case len(blocks) > 0:
block = blocks[len(blocks)-1]
}
if block == nil {
return nil, nil, fmt.Errorf("block %s not found", blockID)
}
return db, block, nil
return db, b, nil
}
func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
func analyzeBlock(ctx context.Context, path, blockID string, limit int, runExtended bool, matchers string) error {
var (
selectors []*labels.Matcher
err error
)
if len(matchers) > 0 {
selectors, err = parser.ParseMetricSelector(matchers)
if err != nil {
return err
}
}
db, block, err := openBlock(path, blockID)
if err != nil {
return err
@ -433,14 +436,17 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
fmt.Printf("Block ID: %s\n", meta.ULID)
// Presume 1ms resolution that Prometheus uses.
fmt.Printf("Duration: %s\n", (time.Duration(meta.MaxTime-meta.MinTime) * 1e6).String())
fmt.Printf("Series: %d\n", meta.Stats.NumSeries)
fmt.Printf("Total Series: %d\n", meta.Stats.NumSeries)
if len(matchers) > 0 {
fmt.Printf("Matcher: %s\n", matchers)
}
ir, err := block.Index()
if err != nil {
return err
}
defer ir.Close()
allLabelNames, err := ir.LabelNames()
allLabelNames, err := ir.LabelNames(ctx, selectors...)
if err != nil {
return err
}
@ -453,7 +459,16 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
postingInfos := []postingInfo{}
printInfo := func(postingInfos []postingInfo) {
sort.Slice(postingInfos, func(i, j int) bool { return postingInfos[i].metric > postingInfos[j].metric })
slices.SortFunc(postingInfos, func(a, b postingInfo) int {
switch {
case b.metric < a.metric:
return -1
case b.metric > a.metric:
return 1
default:
return 0
}
})
for i, pc := range postingInfos {
if i >= limit {
@ -467,10 +482,30 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
labelpairsUncovered := map[string]uint64{}
labelpairsCount := map[string]uint64{}
entries := 0
p, err := ir.Postings("", "") // The special all key.
if err != nil {
return err
var (
p index.Postings
refs []storage.SeriesRef
)
if len(matchers) > 0 {
p, err = tsdb.PostingsForMatchers(ctx, ir, selectors...)
if err != nil {
return err
}
// Expand refs first and cache in memory.
// So later we don't have to expand again.
refs, err = index.ExpandPostings(p)
if err != nil {
return err
}
fmt.Printf("Matched series: %d\n", len(refs))
p = index.NewListPostings(refs)
} else {
p, err = ir.Postings(ctx, "", "") // The special all key.
if err != nil {
return err
}
}
chks := []chunks.Meta{}
builder := labels.ScratchBuilder{}
for p.Next() {
@ -519,7 +554,7 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
postingInfos = postingInfos[:0]
for _, n := range allLabelNames {
values, err := ir.SortedLabelValues(n)
values, err := ir.SortedLabelValues(ctx, n, selectors...)
if err != nil {
return err
}
@ -535,7 +570,7 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
postingInfos = postingInfos[:0]
for _, n := range allLabelNames {
lv, err := ir.SortedLabelValues(n)
lv, err := ir.SortedLabelValues(ctx, n, selectors...)
if err != nil {
return err
}
@ -545,15 +580,16 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
printInfo(postingInfos)
postingInfos = postingInfos[:0]
lv, err := ir.SortedLabelValues("__name__")
lv, err := ir.SortedLabelValues(ctx, "__name__", selectors...)
if err != nil {
return err
}
for _, n := range lv {
postings, err := ir.Postings("__name__", n)
postings, err := ir.Postings(ctx, "__name__", n)
if err != nil {
return err
}
postings = index.Intersect(postings, index.NewListPostings(refs))
count := 0
for postings.Next() {
count++
@ -567,17 +603,24 @@ func analyzeBlock(path, blockID string, limit int, runExtended bool) error {
printInfo(postingInfos)
if runExtended {
return analyzeCompaction(block, ir)
return analyzeCompaction(ctx, block, ir, selectors)
}
return nil
}
func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err error) {
postingsr, err := indexr.Postings(index.AllPostingsKey())
func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.IndexReader, matchers []*labels.Matcher) (err error) {
var postingsr index.Postings
if len(matchers) > 0 {
postingsr, err = tsdb.PostingsForMatchers(ctx, indexr, matchers...)
} else {
n, v := index.AllPostingsKey()
postingsr, err = indexr.Postings(ctx, n, v)
}
if err != nil {
return err
}
chunkr, err := block.Chunks()
if err != nil {
return err
@ -586,10 +629,12 @@ func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err err
err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
}()
const maxSamplesPerChunk = 120
nBuckets := 10
histogram := make([]int, nBuckets)
totalChunks := 0
floatChunkSamplesCount := make([]int, 0)
floatChunkSize := make([]int, 0)
histogramChunkSamplesCount := make([]int, 0)
histogramChunkSize := make([]int, 0)
histogramChunkBucketsCount := make([]int, 0)
var builder labels.ScratchBuilder
for postingsr.Next() {
var chks []chunks.Meta
@ -599,34 +644,69 @@ func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err err
for _, chk := range chks {
// Load the actual data of the chunk.
chk, err := chunkr.Chunk(chk)
chk, iterable, err := chunkr.ChunkOrIterable(chk)
if err != nil {
return err
}
chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk)
// Calculate the bucket for the chunk and increment it in the histogram.
bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1
histogram[bucket]++
// Chunks within blocks should not need to be re-written, so an
// iterable is not expected to be returned from the chunk reader.
if iterable != nil {
return errors.New("ChunkOrIterable should not return an iterable when reading a block")
}
switch chk.Encoding() {
case chunkenc.EncXOR:
floatChunkSamplesCount = append(floatChunkSamplesCount, chk.NumSamples())
floatChunkSize = append(floatChunkSize, len(chk.Bytes()))
case chunkenc.EncFloatHistogram:
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
if !ok {
return fmt.Errorf("chunk is not FloatHistogramChunk")
}
it := fhchk.Iterator(nil)
bucketCount := 0
for it.Next() == chunkenc.ValFloatHistogram {
_, f := it.AtFloatHistogram()
bucketCount += len(f.PositiveBuckets)
bucketCount += len(f.NegativeBuckets)
}
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
case chunkenc.EncHistogram:
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
hchk, ok := chk.(*chunkenc.HistogramChunk)
if !ok {
return fmt.Errorf("chunk is not HistogramChunk")
}
it := hchk.Iterator(nil)
bucketCount := 0
for it.Next() == chunkenc.ValHistogram {
_, f := it.AtHistogram()
bucketCount += len(f.PositiveBuckets)
bucketCount += len(f.NegativeBuckets)
}
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
}
totalChunks++
}
}
fmt.Printf("\nCompaction analysis:\n")
fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)")
// Normalize absolute counts to percentages and print them out.
for bucket, count := range histogram {
percentage := 100.0 * count / totalChunks
fmt.Printf("%7d%%: ", (bucket+1)*10)
for j := 0; j < percentage; j++ {
fmt.Printf("#")
}
fmt.Println()
}
fmt.Println()
displayHistogram("samples per float chunk", floatChunkSamplesCount, totalChunks)
displayHistogram("bytes per float chunk", floatChunkSize, totalChunks)
displayHistogram("samples per histogram chunk", histogramChunkSamplesCount, totalChunks)
displayHistogram("bytes per histogram chunk", histogramChunkSize, totalChunks)
displayHistogram("buckets per histogram chunk", histogramChunkBucketsCount, totalChunks)
return nil
}
func dumpSamples(path string, mint, maxt int64, match string) (err error) {
func dumpSamples(ctx context.Context, path string, mint, maxt int64, match string) (err error) {
db, err := tsdb.OpenDBReadOnly(path, nil)
if err != nil {
return err
@ -634,7 +714,7 @@ func dumpSamples(path string, mint, maxt int64, match string) (err error) {
defer func() {
err = tsdb_errors.NewMulti(err, db.Close()).Err()
}()
q, err := db.Querier(context.TODO(), mint, maxt)
q, err := db.Querier(mint, maxt)
if err != nil {
return err
}
@ -644,7 +724,7 @@ func dumpSamples(path string, mint, maxt int64, match string) (err error) {
if err != nil {
return err
}
ss := q.Select(false, nil, matchers...)
ss := q.Select(ctx, false, nil, matchers...)
for ss.Next() {
series := ss.At()
@ -654,13 +734,21 @@ func dumpSamples(path string, mint, maxt int64, match string) (err error) {
ts, val := it.At()
fmt.Printf("%s %g %d\n", lbs, val, ts)
}
for it.Next() == chunkenc.ValFloatHistogram {
ts, fh := it.AtFloatHistogram()
fmt.Printf("%s %s %d\n", lbs, fh.String(), ts)
}
for it.Next() == chunkenc.ValHistogram {
ts, h := it.AtHistogram()
fmt.Printf("%s %s %d\n", lbs, h.String(), ts)
}
if it.Err() != nil {
return ss.Err()
}
}
if ws := ss.Warnings(); len(ws) > 0 {
return tsdb_errors.NewMulti(ws...).Err()
return tsdb_errors.NewMulti(ws.AsErrors()...).Err()
}
if ss.Err() != nil {
@ -690,3 +778,42 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
}
func displayHistogram(dataType string, datas []int, total int) {
slices.Sort(datas)
start, end, step := generateBucket(datas[0], datas[len(datas)-1])
sum := 0
buckets := make([]int, (end-start)/step+1)
maxCount := 0
for _, c := range datas {
sum += c
buckets[(c-start)/step]++
if buckets[(c-start)/step] > maxCount {
maxCount = buckets[(c-start)/step]
}
}
avg := sum / len(datas)
fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1])
maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end)))
maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step)))
maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount)))
for bucket, count := range buckets {
percentage := 100.0 * count / total
fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage))
}
fmt.Println()
}
func generateBucket(min, max int) (start, end, step int) {
s := (max - min) / 10
step = 10
for step < s && step <= 10000 {
step *= 10
}
start = min - min%step
end = max - max%step + step
return
}

43
cmd/promtool/tsdb_test.go Normal file
View file

@ -0,0 +1,43 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestGenerateBucket(t *testing.T) {
tcs := []struct {
min, max int
start, end, step int
}{
{
min: 101,
max: 141,
start: 100,
end: 150,
step: 10,
},
}
for _, tc := range tcs {
start, end, step := generateBucket(tc.min, tc.max)
require.Equal(t, tc.start, start)
require.Equal(t, tc.end, end)
require.Equal(t, tc.step, step)
}
}

View file

@ -26,9 +26,11 @@ import (
"time"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
@ -38,11 +40,16 @@ import (
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files ...string) int {
failed := false
var run *regexp.Regexp
if runStrings != nil {
run = regexp.MustCompile(strings.Join(runStrings, "|"))
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts); errs != nil {
if errs := ruleUnitTest(f, queryOpts, run); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -60,7 +67,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp) []error {
fmt.Println("Unit Testing: ", filename)
b, err := os.ReadFile(filename)
@ -95,6 +102,13 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
// Testing.
var errs []error
for _, t := range unitTestInp.Tests {
if !matchesRun(t.TestGroupName, run) {
continue
}
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
ers := t.test(evalInterval, groupOrderMap, queryOpts, unitTestInp.RuleFiles...)
if ers != nil {
errs = append(errs, ers...)
@ -107,6 +121,14 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
return nil
}
func matchesRun(name string, run *regexp.Regexp) bool {
if run == nil {
return true
}
return run.MatchString(name)
}
// unitTestFile holds the contents of a single unit test file.
type unitTestFile struct {
RuleFiles []string `yaml:"rule_files"`
@ -240,7 +262,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
g.Eval(suite.Context(), ts)
for _, r := range g.Rules() {
if r.LastError() != nil {
evalErrs = append(evalErrs, fmt.Errorf(" rule: %s, time: %s, err: %v",
evalErrs = append(evalErrs, fmt.Errorf(" rule: %s, time: %s, err: %w",
r.Name(), ts.Sub(time.Unix(0, 0).UTC()), r.LastError()))
}
}
@ -346,14 +368,29 @@ Outer:
var gotSamples []parsedSample
for _, s := range got {
gotSamples = append(gotSamples, parsedSample{
Labels: s.Metric.Copy(),
Value: s.F,
Labels: s.Metric.Copy(),
Value: s.F,
Histogram: promql.HistogramTestExpression(s.H),
})
}
var expSamples []parsedSample
for _, s := range testCase.ExpSamples {
lb, err := parser.ParseMetric(s.Labels)
var hist *histogram.FloatHistogram
if err == nil && s.Histogram != "" {
_, values, parseErr := parser.ParseSeriesDesc("{} " + s.Histogram)
switch {
case parseErr != nil:
err = parseErr
case len(values) != 1:
err = fmt.Errorf("expected 1 value, got %d", len(values))
case values[0].Histogram == nil:
err = fmt.Errorf("expected histogram, got %v", values[0])
default:
hist = values[0].Histogram
}
}
if err != nil {
err = fmt.Errorf("labels %q: %w", s.Labels, err)
errs = append(errs, fmt.Errorf(" expr: %q, time: %s, err: %w", testCase.Expr,
@ -361,8 +398,9 @@ Outer:
continue Outer
}
expSamples = append(expSamples, parsedSample{
Labels: lb,
Value: s.Value,
Labels: lb,
Value: s.Value,
Histogram: promql.HistogramTestExpression(hist),
})
}
@ -530,14 +568,16 @@ type promqlTestCase struct {
}
type sample struct {
Labels string `yaml:"labels"`
Value float64 `yaml:"value"`
Labels string `yaml:"labels"`
Value float64 `yaml:"value"`
Histogram string `yaml:"histogram"` // A non-empty string means Value is ignored.
}
// parsedSample is a sample with parsed Labels.
type parsedSample struct {
Labels labels.Labels
Value float64
Labels labels.Labels
Value float64
Histogram string // TestExpression() of histogram.FloatHistogram
}
func parsedSamplesString(pss []parsedSample) string {
@ -552,5 +592,8 @@ func parsedSamplesString(pss []parsedSample) string {
}
func (ps *parsedSample) String() string {
if ps.Histogram != "" {
return ps.Labels.String() + " " + ps.Histogram
}
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
}

View file

@ -112,10 +112,73 @@ func TestRulesUnitTest(t *testing.T) {
},
want: 0,
},
{
name: "No test group interval",
args: args{
files: []string{"./testdata/no-test-group-interval.yml"},
},
queryOpts: promql.LazyLoaderOpts{
EnableNegativeOffset: true,
},
want: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, tt.args.files...); got != tt.want {
if got := RulesUnitTest(tt.queryOpts, nil, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
}
}
func TestRulesUnitTestRun(t *testing.T) {
type args struct {
run []string
files []string
}
tests := []struct {
name string
args args
queryOpts promql.LazyLoaderOpts
want int
}{
{
name: "Test all without run arg",
args: args{
run: nil,
files: []string{"./testdata/rules_run.yml"},
},
want: 1,
},
{
name: "Test all with run arg",
args: args{
run: []string{"correct", "wrong"},
files: []string{"./testdata/rules_run.yml"},
},
want: 1,
},
{
name: "Test correct",
args: args{
run: []string{"correct"},
files: []string{"./testdata/rules_run.yml"},
},
want: 0,
},
{
name: "Test wrong",
args: args{
run: []string{"wrong"},
files: []string{"./testdata/rules_run.yml"},
},
want: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, tt.args.run, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})

View file

@ -19,6 +19,7 @@ import (
"net/url"
"os"
"path/filepath"
"sort"
"strings"
"time"
@ -34,6 +35,7 @@ import (
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/relabel"
"github.com/prometheus/prometheus/storage/remote/azuread"
)
var (
@ -142,17 +144,21 @@ var (
ScrapeInterval: model.Duration(1 * time.Minute),
ScrapeTimeout: model.Duration(10 * time.Second),
EvaluationInterval: model.Duration(1 * time.Minute),
// When native histogram feature flag is enabled, ScrapeProtocols default
// changes to DefaultNativeHistogramScrapeProtocols.
ScrapeProtocols: DefaultScrapeProtocols,
}
// DefaultScrapeConfig is the default scrape configuration.
DefaultScrapeConfig = ScrapeConfig{
// ScrapeTimeout and ScrapeInterval default to the
// configured globals.
MetricsPath: "/metrics",
Scheme: "http",
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
// ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals.
ScrapeClassicHistograms: false,
MetricsPath: "/metrics",
Scheme: "http",
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
EnableCompression: true,
}
// DefaultAlertmanagerConfig is the default alertmanager configuration.
@ -258,7 +264,7 @@ func (c Config) String() string {
return string(b)
}
// ScrapeConfigs returns the scrape configurations.
// GetScrapeConfigs returns the scrape configurations.
func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
scfgs := make([]*ScrapeConfig, len(c.ScrapeConfigs))
@ -266,7 +272,7 @@ func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
for i, scfg := range c.ScrapeConfigs {
// We do these checks for library users that would not call Validate in
// Unmarshal.
if err := scfg.Validate(c.GlobalConfig.ScrapeInterval, c.GlobalConfig.ScrapeTimeout); err != nil {
if err := scfg.Validate(c.GlobalConfig); err != nil {
return nil, err
}
@ -293,7 +299,7 @@ func (c *Config) GetScrapeConfigs() ([]*ScrapeConfig, error) {
return nil, fileErr(filename, err)
}
for _, scfg := range cfg.ScrapeConfigs {
if err := scfg.Validate(c.GlobalConfig.ScrapeInterval, c.GlobalConfig.ScrapeTimeout); err != nil {
if err := scfg.Validate(c.GlobalConfig); err != nil {
return nil, fileErr(filename, err)
}
@ -342,7 +348,7 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
// Do global overrides and validate unique names.
jobNames := map[string]struct{}{}
for _, scfg := range c.ScrapeConfigs {
if err := scfg.Validate(c.GlobalConfig.ScrapeInterval, c.GlobalConfig.ScrapeTimeout); err != nil {
if err := scfg.Validate(c.GlobalConfig); err != nil {
return err
}
@ -383,12 +389,107 @@ type GlobalConfig struct {
ScrapeInterval model.Duration `yaml:"scrape_interval,omitempty"`
// The default timeout when scraping targets.
ScrapeTimeout model.Duration `yaml:"scrape_timeout,omitempty"`
// The protocols to negotiate during a scrape. It tells clients what
// protocol are accepted by Prometheus and with what weight (most wanted is first).
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
// OpenMetricsText1.0.0, PrometheusText0.0.4.
ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"`
// How frequently to evaluate rules by default.
EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"`
// File to which PromQL queries are logged.
QueryLogFile string `yaml:"query_log_file,omitempty"`
// The labels to add to any timeseries that this Prometheus instance scrapes.
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
// An uncompressed response body larger than this many bytes will cause the
// scrape to fail. 0 means no limit.
BodySizeLimit units.Base2Bytes `yaml:"body_size_limit,omitempty"`
// More than this many samples post metric-relabeling will cause the scrape to
// fail. 0 means no limit.
SampleLimit uint `yaml:"sample_limit,omitempty"`
// More than this many targets after the target relabeling will cause the
// scrapes to fail. 0 means no limit.
TargetLimit uint `yaml:"target_limit,omitempty"`
// More than this many labels post metric-relabeling will cause the scrape to
// fail. 0 means no limit.
LabelLimit uint `yaml:"label_limit,omitempty"`
// More than this label name length post metric-relabeling will cause the
// scrape to fail. 0 means no limit.
LabelNameLengthLimit uint `yaml:"label_name_length_limit,omitempty"`
// More than this label value length post metric-relabeling will cause the
// scrape to fail. 0 means no limit.
LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
}
// ScrapeProtocol represents supported protocol for scraping metrics.
type ScrapeProtocol string
// Validate returns error if given scrape protocol is not supported.
func (s ScrapeProtocol) Validate() error {
if _, ok := ScrapeProtocolsHeaders[s]; !ok {
return fmt.Errorf("unknown scrape protocol %v, supported: %v",
s, func() (ret []string) {
for k := range ScrapeProtocolsHeaders {
ret = append(ret, string(k))
}
sort.Strings(ret)
return ret
}())
}
return nil
}
var (
PrometheusProto ScrapeProtocol = "PrometheusProto"
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
PrometheusText0_0_4: "text/plain;version=0.0.4",
OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1",
OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0",
}
// DefaultScrapeProtocols is the set of scrape protocols that will be proposed
// to scrape target, ordered by priority.
DefaultScrapeProtocols = []ScrapeProtocol{
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText0_0_4,
}
// DefaultProtoFirstScrapeProtocols is like DefaultScrapeProtocols, but it
// favors protobuf Prometheus exposition format.
// Used by default for certain feature-flags like
// "native-histograms" and "created-timestamp-zero-ingestion".
DefaultProtoFirstScrapeProtocols = []ScrapeProtocol{
PrometheusProto,
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText0_0_4,
}
)
// validateAcceptScrapeProtocols return errors if we see problems with accept scrape protocols option.
func validateAcceptScrapeProtocols(sps []ScrapeProtocol) error {
if len(sps) == 0 {
return errors.New("scrape_protocols cannot be empty")
}
dups := map[string]struct{}{}
for _, sp := range sps {
if _, ok := dups[strings.ToLower(string(sp))]; ok {
return fmt.Errorf("duplicated protocol in scrape_protocols, got %v", sps)
}
if err := sp.Validate(); err != nil {
return fmt.Errorf("scrape_protocols: %w", err)
}
dups[strings.ToLower(string(sp))] = struct{}{}
}
return nil
}
// SetDirectory joins any relative file paths with dir.
@ -436,6 +537,14 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if gc.EvaluationInterval == 0 {
gc.EvaluationInterval = DefaultGlobalConfig.EvaluationInterval
}
if gc.ScrapeProtocols == nil {
gc.ScrapeProtocols = DefaultGlobalConfig.ScrapeProtocols
}
if err := validateAcceptScrapeProtocols(gc.ScrapeProtocols); err != nil {
return fmt.Errorf("%w for global config", err)
}
*c = *gc
return nil
}
@ -446,7 +555,8 @@ func (c *GlobalConfig) isZero() bool {
c.ScrapeInterval == 0 &&
c.ScrapeTimeout == 0 &&
c.EvaluationInterval == 0 &&
c.QueryLogFile == ""
c.QueryLogFile == "" &&
c.ScrapeProtocols == nil
}
type ScrapeConfigs struct {
@ -461,34 +571,51 @@ type ScrapeConfig struct {
HonorLabels bool `yaml:"honor_labels,omitempty"`
// Indicator whether the scraped timestamps should be respected.
HonorTimestamps bool `yaml:"honor_timestamps"`
// Indicator whether to track the staleness of the scraped timestamps.
TrackTimestampsStaleness bool `yaml:"track_timestamps_staleness"`
// A set of query parameters with which the target is scraped.
Params url.Values `yaml:"params,omitempty"`
// How frequently to scrape the targets of this scrape config.
ScrapeInterval model.Duration `yaml:"scrape_interval,omitempty"`
// The timeout for scraping targets of this config.
ScrapeTimeout model.Duration `yaml:"scrape_timeout,omitempty"`
// The protocols to negotiate during a scrape. It tells clients what
// protocol are accepted by Prometheus and with what preference (most wanted is first).
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
// OpenMetricsText1.0.0, PrometheusText0.0.4.
ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"`
// Whether to scrape a classic histogram that is also exposed as a native histogram.
ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"`
// The HTTP resource path on which to fetch metrics from targets.
MetricsPath string `yaml:"metrics_path,omitempty"`
// The URL scheme with which to fetch metrics from targets.
Scheme string `yaml:"scheme,omitempty"`
// Indicator whether to request compressed response from the target.
EnableCompression bool `yaml:"enable_compression"`
// An uncompressed response body larger than this many bytes will cause the
// scrape to fail. 0 means no limit.
BodySizeLimit units.Base2Bytes `yaml:"body_size_limit,omitempty"`
// More than this many samples post metric-relabeling will cause the scrape to
// fail.
// fail. 0 means no limit.
SampleLimit uint `yaml:"sample_limit,omitempty"`
// More than this many targets after the target relabeling will cause the
// scrapes to fail.
// scrapes to fail. 0 means no limit.
TargetLimit uint `yaml:"target_limit,omitempty"`
// More than this many labels post metric-relabeling will cause the scrape to
// fail.
// fail. 0 means no limit.
LabelLimit uint `yaml:"label_limit,omitempty"`
// More than this label name length post metric-relabeling will cause the
// scrape to fail.
// scrape to fail. 0 means no limit.
LabelNameLengthLimit uint `yaml:"label_name_length_limit,omitempty"`
// More than this label value length post metric-relabeling will cause the
// scrape to fail.
// scrape to fail. 0 means no limit.
LabelValueLengthLimit uint `yaml:"label_value_length_limit,omitempty"`
// More than this many buckets in a native histogram will cause the scrape to
// fail.
NativeHistogramBucketLimit uint `yaml:"native_histogram_bucket_limit,omitempty"`
// Keep no more than this many dropped targets per job.
// 0 means no limit.
KeepDroppedTargets uint `yaml:"keep_dropped_targets,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types.
@ -546,25 +673,55 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return nil
}
func (c *ScrapeConfig) Validate(defaultInterval, defaultTimeout model.Duration) error {
// Validate validates scrape config, but also fills relevant default values from global config if needed.
func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
if c == nil {
return errors.New("empty or null scrape config section")
}
// First set the correct scrape interval, then check that the timeout
// (inferred or explicit) is not greater than that.
if c.ScrapeInterval == 0 {
c.ScrapeInterval = defaultInterval
c.ScrapeInterval = globalConfig.ScrapeInterval
}
if c.ScrapeTimeout > c.ScrapeInterval {
return fmt.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", c.JobName)
}
if c.ScrapeTimeout == 0 {
if defaultTimeout > c.ScrapeInterval {
if globalConfig.ScrapeTimeout > c.ScrapeInterval {
c.ScrapeTimeout = c.ScrapeInterval
} else {
c.ScrapeTimeout = defaultTimeout
c.ScrapeTimeout = globalConfig.ScrapeTimeout
}
}
if c.BodySizeLimit == 0 {
c.BodySizeLimit = globalConfig.BodySizeLimit
}
if c.SampleLimit == 0 {
c.SampleLimit = globalConfig.SampleLimit
}
if c.TargetLimit == 0 {
c.TargetLimit = globalConfig.TargetLimit
}
if c.LabelLimit == 0 {
c.LabelLimit = globalConfig.LabelLimit
}
if c.LabelNameLengthLimit == 0 {
c.LabelNameLengthLimit = globalConfig.LabelNameLengthLimit
}
if c.LabelValueLengthLimit == 0 {
c.LabelValueLengthLimit = globalConfig.LabelValueLengthLimit
}
if c.KeepDroppedTargets == 0 {
c.KeepDroppedTargets = globalConfig.KeepDroppedTargets
}
if c.ScrapeProtocols == nil {
c.ScrapeProtocols = globalConfig.ScrapeProtocols
}
if err := validateAcceptScrapeProtocols(c.ScrapeProtocols); err != nil {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
}
return nil
}
@ -766,6 +923,7 @@ type AlertmanagerConfig struct {
ServiceDiscoveryConfigs discovery.Configs `yaml:"-"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
SigV4Config *sigv4.SigV4Config `yaml:"sigv4,omitempty"`
// The URL scheme to use when talking to Alertmanagers.
Scheme string `yaml:"scheme,omitempty"`
@ -801,6 +959,13 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
return err
}
httpClientConfigAuthEnabled := c.HTTPClientConfig.BasicAuth != nil ||
c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
if httpClientConfigAuthEnabled && c.SigV4Config != nil {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured")
}
// Check for users putting URLs in target groups.
if len(c.RelabelConfigs) == 0 {
if err := checkStaticTargets(c.ServiceDiscoveryConfigs); err != nil {
@ -865,6 +1030,7 @@ type RemoteWriteConfig struct {
QueueConfig QueueConfig `yaml:"queue_config,omitempty"`
MetadataConfig MetadataConfig `yaml:"metadata_config,omitempty"`
SigV4Config *sigv4.SigV4Config `yaml:"sigv4,omitempty"`
AzureADConfig *azuread.AzureADConfig `yaml:"azuread,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
@ -901,8 +1067,12 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
httpClientConfigAuthEnabled := c.HTTPClientConfig.BasicAuth != nil ||
c.HTTPClientConfig.Authorization != nil || c.HTTPClientConfig.OAuth2 != nil
if httpClientConfigAuthEnabled && c.SigV4Config != nil {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, & sigv4 must be configured")
if httpClientConfigAuthEnabled && (c.SigV4Config != nil || c.AzureADConfig != nil) {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
}
if c.SigV4Config != nil && c.AzureADConfig != nil {
return fmt.Errorf("at most one of basic_auth, authorization, oauth2, sigv4, & azuread must be configured")
}
return nil
@ -923,7 +1093,7 @@ func validateHeadersForTracing(headers map[string]string) error {
func validateHeaders(headers map[string]string) error {
for header := range headers {
if strings.ToLower(header) == "authorization" {
return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, or sigv4 parameter")
return errors.New("authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter")
}
if _, ok := reservedHeaders[strings.ToLower(header)]; ok {
return fmt.Errorf("%s is a reserved header. It must not be changed", header)

View file

@ -68,6 +68,15 @@ func mustParseURL(u string) *config.URL {
return &config.URL{URL: parsed}
}
const (
globBodySizeLimit = 15 * units.MiB
globSampleLimit = 1500
globTargetLimit = 30
globLabelLimit = 30
globLabelNameLengthLimit = 200
globLabelValueLengthLimit = 200
)
var expectedConf = &Config{
GlobalConfig: GlobalConfig{
ScrapeInterval: model.Duration(15 * time.Second),
@ -76,6 +85,14 @@ var expectedConf = &Config{
QueryLogFile: "",
ExternalLabels: labels.FromStrings("foo", "bar", "monitor", "codelab"),
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
},
RuleFiles: []string{
@ -165,10 +182,18 @@ var expectedConf = &Config{
{
JobName: "prometheus",
HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -261,11 +286,17 @@ var expectedConf = &Config{
{
JobName: "service-x",
HonorTimestamps: true,
ScrapeInterval: model.Duration(50 * time.Second),
ScrapeTimeout: model.Duration(5 * time.Second),
BodySizeLimit: 10 * units.MiB,
SampleLimit: 1000,
HonorTimestamps: true,
ScrapeInterval: model.Duration(50 * time.Second),
ScrapeTimeout: model.Duration(5 * time.Second),
EnableCompression: true,
BodySizeLimit: 10 * units.MiB,
SampleLimit: 1000,
TargetLimit: 35,
LabelLimit: 35,
LabelNameLengthLimit: 210,
LabelValueLengthLimit: 210,
ScrapeProtocols: []ScrapeProtocol{PrometheusText0_0_4},
HTTPClientConfig: config.HTTPClientConfig{
BasicAuth: &config.BasicAuth{
@ -352,9 +383,17 @@ var expectedConf = &Config{
{
JobName: "service-y",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -363,6 +402,7 @@ var expectedConf = &Config{
ServiceDiscoveryConfigs: discovery.Configs{
&consul.SDConfig{
Server: "localhost:1234",
PathPrefix: "/consul",
Token: "mysecret",
Services: []string{"nginx", "cache", "mysql"},
ServiceTags: []string{"canary", "v1"},
@ -398,9 +438,17 @@ var expectedConf = &Config{
{
JobName: "service-z",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: model.Duration(10 * time.Second),
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: model.Duration(10 * time.Second),
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: "/metrics",
Scheme: "http",
@ -423,9 +471,17 @@ var expectedConf = &Config{
{
JobName: "service-kubernetes",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -454,9 +510,17 @@ var expectedConf = &Config{
{
JobName: "service-kubernetes-namespaces",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -485,9 +549,17 @@ var expectedConf = &Config{
{
JobName: "service-kuma",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -496,6 +568,7 @@ var expectedConf = &Config{
ServiceDiscoveryConfigs: discovery.Configs{
&xds.KumaSDConfig{
Server: "http://kuma-control-plane.kuma-system.svc:5676",
ClientID: "main-prometheus",
HTTPClientConfig: config.DefaultHTTPClientConfig,
RefreshInterval: model.Duration(15 * time.Second),
FetchTimeout: model.Duration(2 * time.Minute),
@ -505,9 +578,17 @@ var expectedConf = &Config{
{
JobName: "service-marathon",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -534,9 +615,17 @@ var expectedConf = &Config{
{
JobName: "service-nomad",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -560,9 +649,17 @@ var expectedConf = &Config{
{
JobName: "service-ec2",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -593,9 +690,17 @@ var expectedConf = &Config{
{
JobName: "service-lightsail",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -616,9 +721,17 @@ var expectedConf = &Config{
{
JobName: "service-azure",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -642,9 +755,17 @@ var expectedConf = &Config{
{
JobName: "service-nerve",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -661,9 +782,17 @@ var expectedConf = &Config{
{
JobName: "0123service-xxx",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -683,9 +812,17 @@ var expectedConf = &Config{
{
JobName: "badfederation",
HonorTimestamps: false,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: false,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: "/federate",
Scheme: DefaultScrapeConfig.Scheme,
@ -705,9 +842,17 @@ var expectedConf = &Config{
{
JobName: "測試",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -727,9 +872,17 @@ var expectedConf = &Config{
{
JobName: "httpsd",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -746,9 +899,17 @@ var expectedConf = &Config{
{
JobName: "service-triton",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -773,9 +934,17 @@ var expectedConf = &Config{
{
JobName: "digitalocean-droplets",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -799,9 +968,17 @@ var expectedConf = &Config{
{
JobName: "docker",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -821,9 +998,17 @@ var expectedConf = &Config{
{
JobName: "dockerswarm",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -843,9 +1028,17 @@ var expectedConf = &Config{
{
JobName: "service-openstack",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -869,9 +1062,17 @@ var expectedConf = &Config{
{
JobName: "service-puppetdb",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -897,10 +1098,18 @@ var expectedConf = &Config{
},
},
{
JobName: "hetzner",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
JobName: "hetzner",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -946,9 +1155,17 @@ var expectedConf = &Config{
{
JobName: "service-eureka",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -965,9 +1182,18 @@ var expectedConf = &Config{
{
JobName: "ovhcloud",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -994,9 +1220,18 @@ var expectedConf = &Config{
{
JobName: "scaleway",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1029,9 +1264,17 @@ var expectedConf = &Config{
{
JobName: "linode-instances",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1056,9 +1299,18 @@ var expectedConf = &Config{
{
JobName: "uyuni",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1075,10 +1327,18 @@ var expectedConf = &Config{
},
},
{
JobName: "ionos",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
JobName: "ionos",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1100,9 +1360,17 @@ var expectedConf = &Config{
{
JobName: "vultr",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1190,8 +1458,8 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
require.Equal(t, true, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
require.Equal(t, false, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
require.True(t, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
require.False(t, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
}
func TestLoadConfig(t *testing.T) {
@ -1208,9 +1476,9 @@ func TestLoadConfig(t *testing.T) {
func TestScrapeIntervalLarger(t *testing.T) {
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, 1, len(c.ScrapeConfigs))
require.Len(t, c.ScrapeConfigs, 1)
for _, sc := range c.ScrapeConfigs {
require.Equal(t, true, sc.ScrapeInterval >= sc.ScrapeTimeout)
require.GreaterOrEqual(t, sc.ScrapeInterval, sc.ScrapeTimeout)
}
}
@ -1226,7 +1494,7 @@ func TestElideSecrets(t *testing.T) {
yamlConfig := string(config)
matches := secretRe.FindAllStringIndex(yamlConfig, -1)
require.Equal(t, 22, len(matches), "wrong number of secret matches found")
require.Len(t, matches, 22, "wrong number of secret matches found")
require.NotContains(t, yamlConfig, "mysecret",
"yaml marshal reveals authentication credentials.")
}
@ -1523,7 +1791,7 @@ var expectedErrors = []struct {
},
{
filename: "remote_write_authorization_header.bad.yml",
errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, or sigv4 parameter`,
errMsg: `authorization header must be changed via the basic_auth, authorization, oauth2, sigv4, or azuread parameter`,
},
{
filename: "remote_write_url_missing.bad.yml",
@ -1541,6 +1809,14 @@ var expectedErrors = []struct {
filename: "ec2_filters_empty_values.bad.yml",
errMsg: `EC2 SD configuration filter values cannot be empty`,
},
{
filename: "ec2_token_file.bad.yml",
errMsg: `at most one of bearer_token & bearer_token_file must be configured`,
},
{
filename: "lightsail_token_file.bad.yml",
errMsg: `at most one of bearer_token & bearer_token_file must be configured`,
},
{
filename: "section_key_dup.bad.yml",
errMsg: "field scrape_configs already set in type config.plain",
@ -1565,6 +1841,10 @@ var expectedErrors = []struct {
filename: "azure_authentication_method.bad.yml",
errMsg: "unknown authentication_type \"invalid\". Supported types are \"OAuth\" or \"ManagedIdentity\"",
},
{
filename: "azure_bearertoken_basicauth.bad.yml",
errMsg: "at most one of basic_auth, oauth2, bearer_token & bearer_token_file must be configured",
},
{
filename: "empty_scrape_config.bad.yml",
errMsg: "empty or null scrape config section",
@ -1617,6 +1897,10 @@ var expectedErrors = []struct {
filename: "puppetdb_no_scheme.bad.yml",
errMsg: "URL scheme must be 'http' or 'https'",
},
{
filename: "puppetdb_token_file.bad.yml",
errMsg: "at most one of bearer_token & bearer_token_file must be configured",
},
{
filename: "hetzner_role.bad.yml",
errMsg: "unknown role",
@ -1653,6 +1937,10 @@ var expectedErrors = []struct {
filename: "http_url_no_host.bad.yml",
errMsg: "host is missing in URL",
},
{
filename: "http_token_file.bad.yml",
errMsg: "at most one of bearer_token & bearer_token_file must be configured",
},
{
filename: "http_url_bad_scheme.bad.yml",
errMsg: "URL scheme must be 'http' or 'https'",
@ -1681,6 +1969,10 @@ var expectedErrors = []struct {
filename: "uyuni_no_server.bad.yml",
errMsg: "Uyuni SD configuration requires server host",
},
{
filename: "uyuni_token_file.bad.yml",
errMsg: "at most one of bearer_token & bearer_token_file must be configured",
},
{
filename: "ionos_datacenter.bad.yml",
errMsg: "datacenter id can't be empty",
@ -1697,6 +1989,14 @@ var expectedErrors = []struct {
filename: "scrape_config_files_glob.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_glob.bad.yml: invalid scrape config file path "scrape_configs/*/*"`,
},
{
filename: "scrape_config_files_scrape_protocols.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`,
},
{
filename: "scrape_config_files_scrape_protocols2.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`,
},
}
func TestBadConfigs(t *testing.T) {
@ -1764,7 +2064,7 @@ func TestAgentMode(t *testing.T) {
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.RemoteWriteConfigs, 0)
require.Empty(t, c.RemoteWriteConfigs)
c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger())
require.NoError(t, err)
@ -1786,13 +2086,16 @@ func TestEmptyGlobalBlock(t *testing.T) {
func TestGetScrapeConfigs(t *testing.T) {
sc := func(jobName string, scrapeInterval, scrapeTimeout model.Duration) *ScrapeConfig {
return &ScrapeConfig{
JobName: jobName,
HonorTimestamps: true,
ScrapeInterval: scrapeInterval,
ScrapeTimeout: scrapeTimeout,
MetricsPath: "/metrics",
Scheme: "http",
HTTPClientConfig: config.DefaultHTTPClientConfig,
JobName: jobName,
HonorTimestamps: true,
ScrapeInterval: scrapeInterval,
ScrapeTimeout: scrapeTimeout,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: "/metrics",
Scheme: "http",
EnableCompression: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
ServiceDiscoveryConfigs: discovery.Configs{
discovery.StaticConfig{
{
@ -1843,10 +2146,13 @@ func TestGetScrapeConfigs(t *testing.T) {
HonorTimestamps: true,
ScrapeInterval: model.Duration(60 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
EnableCompression: true,
HTTPClientConfig: config.HTTPClientConfig{
TLSConfig: config.TLSConfig{
CertFile: filepath.FromSlash("testdata/scrape_configs/valid_cert_file"),
@ -1873,6 +2179,8 @@ func TestGetScrapeConfigs(t *testing.T) {
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
HTTPClientConfig: config.HTTPClientConfig{
TLSConfig: config.TLSConfig{
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
@ -1885,6 +2193,8 @@ func TestGetScrapeConfigs(t *testing.T) {
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
EnableCompression: true,
ServiceDiscoveryConfigs: discovery.Configs{
&vultr.SDConfig{
HTTPClientConfig: config.HTTPClientConfig{
@ -1937,3 +2247,16 @@ func kubernetesSDHostURL() config.URL {
tURL, _ := url.Parse("https://localhost:1234")
return config.URL{URL: tURL}
}
func TestScrapeConfigDisableCompression(t *testing.T) {
want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
require.False(t, got.ScrapeConfigs[0].EnableCompression)
}

View file

@ -0,0 +1,11 @@
scrape_configs:
- job_name: prometheus
azure_sd_configs:
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
client_secret: mysecret
bearer_token: 1234
basic_auth:
username: user
password: password

View file

@ -2,6 +2,12 @@
global:
scrape_interval: 15s
evaluation_interval: 30s
body_size_limit: 15MB
sample_limit: 1500
target_limit: 30
label_limit: 30
label_name_length_limit: 200
label_value_length_limit: 200
# scrape_timeout is set to the global default (10s).
external_labels:
@ -108,9 +114,14 @@ scrape_configs:
scrape_interval: 50s
scrape_timeout: 5s
scrape_protocols: ["PrometheusText0.0.4"]
body_size_limit: 10MB
sample_limit: 1000
target_limit: 35
label_limit: 35
label_name_length_limit: 210
label_value_length_limit: 210
metrics_path: /my_path
scheme: https
@ -151,6 +162,7 @@ scrape_configs:
consul_sd_configs:
- server: "localhost:1234"
token: mysecret
path_prefix: /consul
services: ["nginx", "cache", "mysql"]
tags: ["canary", "v1"]
node_meta:
@ -209,6 +221,7 @@ scrape_configs:
kuma_sd_configs:
- server: http://kuma-control-plane.kuma-system.svc:5676
client_id: main-prometheus
- job_name: service-marathon
marathon_sd_configs:

View file

@ -0,0 +1,6 @@
scrape_configs:
- job_name: foo
ec2_sd_configs:
- region: us-east-1
bearer_token: foo
bearer_token_file: foo

View file

@ -0,0 +1,6 @@
scrape_configs:
- job_name: foo
http_sd_configs:
- url: http://foo
bearer_token: foo
bearer_token_file: foo

View file

@ -0,0 +1,6 @@
scrape_configs:
- job_name: foo
lightsail_sd_configs:
- region: us-east-1
bearer_token: foo
bearer_token_file: foo

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: puppetdb
puppetdb_sd_configs:
- url: http://puppet
query: 'resources { type = "Package" and title = "httpd" }'
bearer_token: foo
bearer_token_file: foo

View file

@ -108,6 +108,7 @@ scrape_configs:
kuma_sd_configs:
- server: http://kuma-control-plane.kuma-system.svc:5676
client_id: main-prometheus
marathon_sd_configs:
- servers:

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:8080']
enable_compression: false

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: node
scrape_protocols: ["prometheusproto"]
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: node
scrape_protocols: ["OpenMetricsText1.0.0", "PrometheusProto", "OpenMetricsText1.0.0"]
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,8 @@
scrape_configs:
- job_name: uyuni
uyuni_sd_configs:
- server: "server"
username: "username"
password: "password"
bearer_token: foo
bearer_token_file: foo

View file

@ -234,6 +234,11 @@ type Config interface {
type DiscovererOptions struct {
Logger log.Logger
// A registerer for the Discoverer's metrics.
Registerer prometheus.Registerer
HTTPClientOptions []config.HTTPClientOption
}
```

View file

@ -30,6 +30,7 @@ import (
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -101,7 +102,7 @@ func (*EC2SDConfig) Name() string { return "ec2" }
// NewDiscoverer returns a Discoverer for the EC2 Config.
func (c *EC2SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewEC2Discovery(c, opts.Logger), nil
return NewEC2Discovery(c, opts.Logger, opts.Registerer), nil
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the EC2 Config.
@ -129,7 +130,7 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return errors.New("EC2 SD configuration filter values cannot be empty")
}
}
return nil
return c.HTTPClientConfig.Validate()
}
// EC2Discovery periodically performs EC2-SD requests. It implements
@ -147,7 +148,7 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery {
func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, reg prometheus.Registerer) *EC2Discovery {
if logger == nil {
logger = log.NewNopLogger()
}
@ -156,10 +157,13 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery {
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
logger,
"ec2",
time.Duration(d.cfg.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "ec2",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d
}

View file

@ -29,6 +29,7 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lightsail"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -84,7 +85,7 @@ func (*LightsailSDConfig) Name() string { return "lightsail" }
// NewDiscoverer returns a Discoverer for the Lightsail Config.
func (c *LightsailSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewLightsailDiscovery(c, opts.Logger), nil
return NewLightsailDiscovery(c, opts.Logger, opts.Registerer), nil
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the Lightsail Config.
@ -109,7 +110,7 @@ func (c *LightsailSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
}
c.Region = region
}
return nil
return c.HTTPClientConfig.Validate()
}
// LightsailDiscovery periodically performs Lightsail-SD requests. It implements
@ -121,7 +122,7 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *LightsailDiscovery {
func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, reg prometheus.Registerer) *LightsailDiscovery {
if logger == nil {
logger = log.NewNopLogger()
}
@ -129,10 +130,13 @@ func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *Lightsai
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
logger,
"lightsail",
time.Duration(d.cfg.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "lightsail",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d
}

View file

@ -17,21 +17,28 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"net"
"net/http"
"strings"
"sync"
"time"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-10-01/network"
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/adal"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -68,21 +75,34 @@ var (
DefaultSDConfig = SDConfig{
Port: 80,
RefreshInterval: model.Duration(5 * time.Minute),
Environment: azure.PublicCloud.Name,
Environment: "AzurePublicCloud",
AuthenticationMethod: authMethodOAuth,
HTTPClientConfig: config_util.DefaultHTTPClientConfig,
}
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_azure_failures_total",
Help: "Number of Azure service discovery refresh failures.",
})
)
var environments = map[string]cloud.Configuration{
"AZURECHINACLOUD": cloud.AzureChina,
"AZURECLOUD": cloud.AzurePublic,
"AZUREGERMANCLOUD": cloud.AzurePublic,
"AZUREPUBLICCLOUD": cloud.AzurePublic,
"AZUREUSGOVERNMENT": cloud.AzureGovernment,
"AZUREUSGOVERNMENTCLOUD": cloud.AzureGovernment,
}
// CloudConfigurationFromName returns cloud configuration based on the common name specified.
func CloudConfigurationFromName(name string) (cloud.Configuration, error) {
name = strings.ToUpper(name)
env, ok := environments[name]
if !ok {
return env, fmt.Errorf("there is no cloud configuration matching the name %q", name)
}
return env, nil
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for Azure based service discovery.
@ -105,7 +125,7 @@ func (*SDConfig) Name() string { return "azure" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger), nil
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
func validateAuthParam(param, name string) error {
@ -123,7 +143,6 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err != nil {
return err
}
if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil {
return err
}
@ -144,114 +163,143 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return fmt.Errorf("unknown authentication_type %q. Supported types are %q or %q", c.AuthenticationMethod, authMethodOAuth, authMethodManagedIdentity)
}
return nil
return c.HTTPClientConfig.Validate()
}
type Discovery struct {
*refresh.Discovery
logger log.Logger
cfg *SDConfig
port int
logger log.Logger
cfg *SDConfig
port int
cache *cache.Cache[string, *armnetwork.Interface]
failuresCount prometheus.Counter
cacheHitCount prometheus.Counter
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
func NewDiscovery(cfg *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
d := &Discovery{
cfg: cfg,
port: cfg.Port,
logger: logger,
cache: l,
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_azure_failures_total",
Help: "Number of Azure service discovery refresh failures.",
}),
cacheHitCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_azure_cache_hit_total",
Help: "Number of cache hit during refresh.",
}),
}
d.Discovery = refresh.NewDiscovery(
logger,
"azure",
time.Duration(cfg.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "azure",
Interval: time.Duration(cfg.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount, d.cacheHitCount},
},
)
return d
return d, nil
}
// azureClient represents multiple Azure Resource Manager providers.
type azureClient struct {
nic network.InterfacesClient
vm compute.VirtualMachinesClient
vmss compute.VirtualMachineScaleSetsClient
vmssvm compute.VirtualMachineScaleSetVMsClient
nic *armnetwork.InterfacesClient
vm *armcompute.VirtualMachinesClient
vmss *armcompute.VirtualMachineScaleSetsClient
vmssvm *armcompute.VirtualMachineScaleSetVMsClient
logger log.Logger
}
// createAzureClient is a helper function for creating an Azure compute client to ARM.
func createAzureClient(cfg SDConfig) (azureClient, error) {
env, err := azure.EnvironmentFromName(cfg.Environment)
cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment)
if err != nil {
return azureClient{}, err
}
activeDirectoryEndpoint := env.ActiveDirectoryEndpoint
resourceManagerEndpoint := env.ResourceManagerEndpoint
var c azureClient
var spt *adal.ServicePrincipalToken
telemetry := policy.TelemetryOptions{
ApplicationID: userAgent,
}
switch cfg.AuthenticationMethod {
case authMethodManagedIdentity:
spt, err = adal.NewServicePrincipalTokenFromManagedIdentity(resourceManagerEndpoint, &adal.ManagedIdentityOptions{ClientID: cfg.ClientID})
if err != nil {
return azureClient{}, err
}
case authMethodOAuth:
oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, cfg.TenantID)
if err != nil {
return azureClient{}, err
}
spt, err = adal.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), resourceManagerEndpoint)
if err != nil {
return azureClient{}, err
}
credential, err := newCredential(cfg, policy.ClientOptions{
Cloud: cloudConfiguration,
Telemetry: telemetry,
})
if err != nil {
return azureClient{}, err
}
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd")
if err != nil {
return azureClient{}, err
}
sender := autorest.DecorateSender(client)
preparer := autorest.WithUserAgent(userAgent)
options := &arm.ClientOptions{
ClientOptions: policy.ClientOptions{
Transport: client,
Cloud: cloudConfiguration,
Telemetry: telemetry,
},
}
bearerAuthorizer := autorest.NewBearerAuthorizer(spt)
c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options)
if err != nil {
return azureClient{}, err
}
c.vm = compute.NewVirtualMachinesClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.vm.Authorizer = bearerAuthorizer
c.vm.Sender = sender
c.vm.RequestInspector = preparer
c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options)
if err != nil {
return azureClient{}, err
}
c.nic = network.NewInterfacesClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.nic.Authorizer = bearerAuthorizer
c.nic.Sender = sender
c.nic.RequestInspector = preparer
c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options)
if err != nil {
return azureClient{}, err
}
c.vmss = compute.NewVirtualMachineScaleSetsClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.vmss.Authorizer = bearerAuthorizer
c.vmss.Sender = sender
c.vmss.RequestInspector = preparer
c.vmssvm = compute.NewVirtualMachineScaleSetVMsClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.vmssvm.Authorizer = bearerAuthorizer
c.vmssvm.Sender = sender
c.vmssvm.RequestInspector = preparer
c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options)
if err != nil {
return azureClient{}, err
}
return c, nil
}
// azureResource represents a resource identifier in Azure.
type azureResource struct {
Name string
ResourceGroup string
func newCredential(cfg SDConfig, policyClientOptions policy.ClientOptions) (azcore.TokenCredential, error) {
var credential azcore.TokenCredential
switch cfg.AuthenticationMethod {
case authMethodManagedIdentity:
options := &azidentity.ManagedIdentityCredentialOptions{ClientOptions: policyClientOptions, ID: azidentity.ClientID(cfg.ClientID)}
managedIdentityCredential, err := azidentity.NewManagedIdentityCredential(options)
if err != nil {
return nil, err
}
credential = azcore.TokenCredential(managedIdentityCredential)
case authMethodOAuth:
options := &azidentity.ClientSecretCredentialOptions{ClientOptions: policyClientOptions}
secretCredential, err := azidentity.NewClientSecretCredential(cfg.TenantID, cfg.ClientID, string(cfg.ClientSecret), options)
if err != nil {
return nil, err
}
credential = azcore.TokenCredential(secretCredential)
}
return credential, nil
}
// virtualMachine represents an Azure virtual machine (which can also be created by a VMSS)
// virtualMachine represents an Azure virtual machine (which can also be created by a VMSS).
type virtualMachine struct {
ID string
Name string
@ -260,28 +308,24 @@ type virtualMachine struct {
Location string
OsType string
ScaleSet string
InstanceID string
Tags map[string]*string
NetworkInterfaces []string
Size string
}
// Create a new azureResource object from an ID string.
func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error) {
// Resource IDs have the following format.
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME
// or if embedded resource then
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME/TYPE/NAME
s := strings.Split(id, "/")
if len(s) != 9 && len(s) != 11 {
err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id)
level.Error(logger).Log("err", err)
return azureResource{}, err
func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) {
if logger == nil {
logger = log.NewNopLogger()
}
return azureResource{
Name: strings.ToLower(s[8]),
ResourceGroup: strings.ToLower(s[4]),
}, nil
resourceID, err := arm.ParseResourceID(id)
if err != nil {
err := fmt.Errorf("invalid ID '%s': %w", id, err)
level.Error(logger).Log("err", err)
return &arm.ResourceID{}, err
}
return resourceID, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
@ -289,13 +333,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
client, err := createAzureClient(*d.cfg)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
client.logger = d.logger
machines, err := client.getVMs(ctx, d.cfg.ResourceGroup)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machines: %w", err)
}
@ -304,14 +349,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
// Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machine scale sets: %w", err)
}
for _, scaleSet := range scaleSets {
scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machine scale set vms: %w", err)
}
machines = append(machines, scaleSetVms...)
@ -344,7 +389,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
azureLabelMachineComputerName: model.LabelValue(vm.ComputerName),
azureLabelMachineOSType: model.LabelValue(vm.OsType),
azureLabelMachineLocation: model.LabelValue(vm.Location),
azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroup),
azureLabelMachineResourceGroup: model.LabelValue(r.ResourceGroupName),
azureLabelMachineSize: model.LabelValue(vm.Size),
}
@ -359,18 +404,29 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
// Get the IP address information via separate call to the network provider.
for _, nicID := range vm.NetworkInterfaces {
networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID)
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
var networkInterface *armnetwork.Interface
if v, ok := d.getFromCache(nicID); ok {
networkInterface = v
d.cacheHitCount.Add(1)
} else {
if vm.ScaleSet == "" {
networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID)
} else {
ch <- target{labelSet: nil, err: err}
networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID)
}
// Get out of this routine because we cannot continue without a network interface.
return
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
} else {
ch <- target{labelSet: nil, err: err}
}
// Get out of this routine because we cannot continue without a network interface.
return
}
d.addToCache(nicID, networkInterface)
}
if networkInterface.InterfacePropertiesFormat == nil {
if networkInterface.Properties == nil {
continue
}
@ -378,21 +434,21 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
// This information is available via another API call however the Go SDK does not
// yet support this. On deallocated machines, this value happens to be nil so it
// is a cheap and easy way to determine if a machine is allocated or not.
if networkInterface.Primary == nil {
if networkInterface.Properties.Primary == nil {
level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name)
return
}
if *networkInterface.Primary {
for _, ip := range *networkInterface.IPConfigurations {
if *networkInterface.Properties.Primary {
for _, ip := range networkInterface.Properties.IPConfigurations {
// IPAddress is a field defined in PublicIPAddressPropertiesFormat,
// therefore we need to validate that both are not nil.
if ip.PublicIPAddress != nil && ip.PublicIPAddress.PublicIPAddressPropertiesFormat != nil && ip.PublicIPAddress.IPAddress != nil {
labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.PublicIPAddress.IPAddress)
if ip.Properties != nil && ip.Properties.PublicIPAddress != nil && ip.Properties.PublicIPAddress.Properties != nil && ip.Properties.PublicIPAddress.Properties.IPAddress != nil {
labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.Properties.PublicIPAddress.Properties.IPAddress)
}
if ip.PrivateIPAddress != nil {
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.PrivateIPAddress)
address := net.JoinHostPort(*ip.PrivateIPAddress, fmt.Sprintf("%d", d.port))
if ip.Properties != nil && ip.Properties.PrivateIPAddress != nil {
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress)
address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port))
labels[model.AddressLabel] = model.LabelValue(address)
ch <- target{labelSet: labels, err: nil}
return
@ -414,7 +470,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
var tg targetgroup.Group
for tgt := range ch {
if tgt.err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("unable to complete Azure service discovery: %w", tgt.err)
}
if tgt.labelSet != nil {
@ -427,93 +483,84 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (client *azureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) {
var vms []virtualMachine
var result compute.VirtualMachineListResultPage
var err error
if len(resourceGroup) == 0 {
result, err = client.vm.ListAll(ctx)
pager := client.vm.NewListAllPager(nil)
for pager.More() {
nextResult, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machines: %w", err)
}
for _, vm := range nextResult.Value {
vms = append(vms, mapFromVM(*vm))
}
}
} else {
result, err = client.vm.List(ctx, resourceGroup)
}
if err != nil {
return nil, fmt.Errorf("could not list virtual machines: %w", err)
}
for result.NotDone() {
for _, vm := range result.Values() {
vms = append(vms, mapFromVM(vm))
}
err = result.NextWithContext(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machines: %w", err)
pager := client.vm.NewListPager(resourceGroup, nil)
for pager.More() {
nextResult, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machines: %w", err)
}
for _, vm := range nextResult.Value {
vms = append(vms, mapFromVM(*vm))
}
}
}
return vms, nil
}
type VmssListResultPage interface {
NextWithContext(ctx context.Context) (err error)
NotDone() bool
Values() []compute.VirtualMachineScaleSet
}
func (client *azureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]compute.VirtualMachineScaleSet, error) {
var scaleSets []compute.VirtualMachineScaleSet
var result VmssListResultPage
var err error
func (client *azureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) {
var scaleSets []armcompute.VirtualMachineScaleSet
if len(resourceGroup) == 0 {
var rtn compute.VirtualMachineScaleSetListWithLinkResultPage
rtn, err = client.vmss.ListAll(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale sets: %w", err)
pager := client.vmss.NewListAllPager(nil)
for pager.More() {
nextResult, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale sets: %w", err)
}
for _, vmss := range nextResult.Value {
scaleSets = append(scaleSets, *vmss)
}
}
result = &rtn
} else {
var rtn compute.VirtualMachineScaleSetListResultPage
rtn, err = client.vmss.List(ctx, resourceGroup)
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale sets: %w", err)
}
result = &rtn
}
for result.NotDone() {
scaleSets = append(scaleSets, result.Values()...)
err = result.NextWithContext(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale sets: %w", err)
pager := client.vmss.NewListPager(resourceGroup, nil)
for pager.More() {
nextResult, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale sets: %w", err)
}
for _, vmss := range nextResult.Value {
scaleSets = append(scaleSets, *vmss)
}
}
}
return scaleSets, nil
}
func (client *azureClient) getScaleSetVMs(ctx context.Context, scaleSet compute.VirtualMachineScaleSet) ([]virtualMachine, error) {
func (client *azureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) {
var vms []virtualMachine
// TODO do we really need to fetch the resourcegroup this way?
r, err := newAzureResourceFromID(*scaleSet.ID, nil)
r, err := newAzureResourceFromID(*scaleSet.ID, client.logger)
if err != nil {
return nil, fmt.Errorf("could not parse scale set ID: %w", err)
}
result, err := client.vmssvm.List(ctx, r.ResourceGroup, *(scaleSet.Name), "", "", "")
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale set vms: %w", err)
}
for result.NotDone() {
for _, vm := range result.Values() {
vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name))
}
err = result.NextWithContext(ctx)
pager := client.vmssvm.NewListPager(r.ResourceGroupName, *(scaleSet.Name), nil)
for pager.More() {
nextResult, err := pager.NextPage(ctx)
if err != nil {
return nil, fmt.Errorf("could not list virtual machine scale set vms: %w", err)
}
for _, vmssvm := range nextResult.Value {
vms = append(vms, mapFromVMScaleSetVM(*vmssvm, *scaleSet.Name))
}
}
return vms, nil
}
func mapFromVM(vm compute.VirtualMachine) virtualMachine {
osType := string(vm.StorageProfile.OsDisk.OsType)
func mapFromVM(vm armcompute.VirtualMachine) virtualMachine {
osType := string(*vm.Properties.StorageProfile.OSDisk.OSType)
tags := map[string]*string{}
networkInterfaces := []string{}
var computerName string
@ -523,18 +570,17 @@ func mapFromVM(vm compute.VirtualMachine) virtualMachine {
tags = vm.Tags
}
if vm.NetworkProfile != nil {
for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) {
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
if vm.Properties != nil {
if vm.Properties.NetworkProfile != nil {
for _, vmNIC := range vm.Properties.NetworkProfile.NetworkInterfaces {
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
}
}
}
if vm.VirtualMachineProperties != nil {
if vm.VirtualMachineProperties.OsProfile != nil && vm.VirtualMachineProperties.OsProfile.ComputerName != nil {
computerName = *(vm.VirtualMachineProperties.OsProfile.ComputerName)
if vm.Properties.OSProfile != nil && vm.Properties.OSProfile.ComputerName != nil {
computerName = *(vm.Properties.OSProfile.ComputerName)
}
if vm.VirtualMachineProperties.HardwareProfile != nil {
size = string(vm.VirtualMachineProperties.HardwareProfile.VMSize)
if vm.Properties.HardwareProfile != nil {
size = string(*vm.Properties.HardwareProfile.VMSize)
}
}
@ -552,8 +598,8 @@ func mapFromVM(vm compute.VirtualMachine) virtualMachine {
}
}
func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName string) virtualMachine {
osType := string(vm.StorageProfile.OsDisk.OsType)
func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName string) virtualMachine {
osType := string(*vm.Properties.StorageProfile.OSDisk.OSType)
tags := map[string]*string{}
networkInterfaces := []string{}
var computerName string
@ -563,18 +609,17 @@ func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName strin
tags = vm.Tags
}
if vm.NetworkProfile != nil {
for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) {
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
if vm.Properties != nil {
if vm.Properties.NetworkProfile != nil {
for _, vmNIC := range vm.Properties.NetworkProfile.NetworkInterfaces {
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
}
}
}
if vm.VirtualMachineScaleSetVMProperties != nil {
if vm.VirtualMachineScaleSetVMProperties.OsProfile != nil && vm.VirtualMachineScaleSetVMProperties.OsProfile.ComputerName != nil {
computerName = *(vm.VirtualMachineScaleSetVMProperties.OsProfile.ComputerName)
if vm.Properties.OSProfile != nil && vm.Properties.OSProfile.ComputerName != nil {
computerName = *(vm.Properties.OSProfile.ComputerName)
}
if vm.VirtualMachineScaleSetVMProperties.HardwareProfile != nil {
size = string(vm.VirtualMachineScaleSetVMProperties.HardwareProfile.VMSize)
if vm.Properties.HardwareProfile != nil {
size = string(*vm.Properties.HardwareProfile.VMSize)
}
}
@ -586,6 +631,7 @@ func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName strin
Location: *(vm.Location),
OsType: osType,
ScaleSet: scaleSetName,
InstanceID: *(vm.InstanceID),
Tags: tags,
NetworkInterfaces: networkInterfaces,
Size: size,
@ -594,38 +640,58 @@ func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName strin
var errorNotFound = errors.New("network interface does not exist")
// getNetworkInterfaceByID gets the network interface.
// getVMNetworkInterfaceByID gets the network interface.
// If a 404 is returned from the Azure API, `errorNotFound` is returned.
// On all other errors, an autorest.DetailedError is returned.
func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*network.Interface, error) {
result := network.Interface{}
queryParameters := map[string]interface{}{
"api-version": "2018-10-01",
func (client *azureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
r, err := newAzureResourceFromID(networkInterfaceID, client.logger)
if err != nil {
return nil, fmt.Errorf("could not parse network interface ID: %w", err)
}
preparer := autorest.CreatePreparer(
autorest.AsGet(),
autorest.WithBaseURL(client.nic.BaseURI),
autorest.WithPath(networkInterfaceID),
autorest.WithQueryParameters(queryParameters),
autorest.WithUserAgent(userAgent))
req, err := preparer.Prepare((&http.Request{}).WithContext(ctx))
resp, err := client.nic.Get(ctx, r.ResourceGroupName, r.Name, &armnetwork.InterfacesClientGetOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")})
if err != nil {
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request")
}
resp, err := client.nic.GetSender(req)
if err != nil {
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure sending request")
}
result, err = client.nic.GetResponder(resp)
if err != nil {
if resp.StatusCode == http.StatusNotFound {
var responseError *azcore.ResponseError
if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound {
return nil, errorNotFound
}
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure responding to request")
return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
}
return &result, nil
return &resp.Interface, nil
}
// getVMScaleSetVMNetworkInterfaceByID gets the network interface.
// If a 404 is returned from the Azure API, `errorNotFound` is returned.
func (client *azureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) {
r, err := newAzureResourceFromID(networkInterfaceID, client.logger)
if err != nil {
return nil, fmt.Errorf("could not parse network interface ID: %w", err)
}
resp, err := client.nic.GetVirtualMachineScaleSetNetworkInterface(ctx, r.ResourceGroupName, scaleSetName, instanceID, r.Name, &armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")})
if err != nil {
var responseError *azcore.ResponseError
if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound {
return nil, errorNotFound
}
return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
}
return &resp.Interface, nil
}
// addToCache will add the network interface information for the specified nicID.
func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
random := rand.Int63n(int64(time.Duration(d.cfg.RefreshInterval * 3).Seconds()))
rs := time.Duration(random) * time.Second
exptime := time.Duration(d.cfg.RefreshInterval*10) + rs
d.cache.Set(nicID, netInt, cache.WithExpiration(exptime))
level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds())
}
// getFromCache will get the network Interface for the specified nicID
// If the cache is disabled nothing will happen.
func (d *Discovery) getFromCache(nicID string) (*armnetwork.Interface, bool) {
net, found := d.cache.Get(nicID)
return net, found
}

View file

@ -16,7 +16,8 @@ package azure
import (
"testing"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
)
@ -29,34 +30,36 @@ func TestMapFromVMWithEmptyTags(t *testing.T) {
id := "test"
name := "name"
size := "size"
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
location := "westeurope"
computerName := "computer_name"
networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineProperties{
OsProfile: &compute.OSProfile{
properties := &armcompute.VirtualMachineProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: &computerName,
},
StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{
OsType: "Linux",
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &networkProfile,
HardwareProfile: &compute.HardwareProfile{
VMSize: compute.VirtualMachineSizeTypes(size),
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
}
testVM := compute.VirtualMachine{
ID: &id,
Name: &name,
Type: &vmType,
Location: &location,
Tags: nil,
VirtualMachineProperties: properties,
testVM := armcompute.VirtualMachine{
ID: &id,
Name: &name,
Type: &vmType,
Location: &location,
Tags: nil,
Properties: properties,
}
expectedVM := virtualMachine{
@ -80,37 +83,39 @@ func TestMapFromVMWithTags(t *testing.T) {
id := "test"
name := "name"
size := "size"
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
location := "westeurope"
computerName := "computer_name"
tags := map[string]*string{
"prometheus": new(string),
}
networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineProperties{
OsProfile: &compute.OSProfile{
properties := &armcompute.VirtualMachineProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: &computerName,
},
StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{
OsType: "Linux",
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &networkProfile,
HardwareProfile: &compute.HardwareProfile{
VMSize: compute.VirtualMachineSizeTypes(size),
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
}
testVM := compute.VirtualMachine{
ID: &id,
Name: &name,
Type: &vmType,
Location: &location,
Tags: tags,
VirtualMachineProperties: properties,
testVM := armcompute.VirtualMachine{
ID: &id,
Name: &name,
Type: &vmType,
Location: &location,
Tags: tags,
Properties: properties,
}
expectedVM := virtualMachine{
@ -134,34 +139,38 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
id := "test"
name := "name"
size := "size"
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
instanceID := "123"
location := "westeurope"
computerName := "computer_name"
networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineScaleSetVMProperties{
OsProfile: &compute.OSProfile{
properties := &armcompute.VirtualMachineScaleSetVMProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: &computerName,
},
StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{
OsType: "Linux",
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &networkProfile,
HardwareProfile: &compute.HardwareProfile{
VMSize: compute.VirtualMachineSizeTypes(size),
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
}
testVM := compute.VirtualMachineScaleSetVM{
ID: &id,
Name: &name,
Type: &vmType,
Location: &location,
Tags: nil,
VirtualMachineScaleSetVMProperties: properties,
testVM := armcompute.VirtualMachineScaleSetVM{
ID: &id,
Name: &name,
Type: &vmType,
InstanceID: &instanceID,
Location: &location,
Tags: nil,
Properties: properties,
}
scaleSet := "testSet"
@ -175,6 +184,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
Tags: map[string]*string{},
NetworkInterfaces: []string{},
ScaleSet: scaleSet,
InstanceID: instanceID,
Size: size,
}
@ -187,37 +197,41 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
id := "test"
name := "name"
size := "size"
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
instanceID := "123"
location := "westeurope"
computerName := "computer_name"
tags := map[string]*string{
"prometheus": new(string),
}
networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineScaleSetVMProperties{
OsProfile: &compute.OSProfile{
properties := &armcompute.VirtualMachineScaleSetVMProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: &computerName,
},
StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{
OsType: "Linux",
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &networkProfile,
HardwareProfile: &compute.HardwareProfile{
VMSize: compute.VirtualMachineSizeTypes(size),
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
}
testVM := compute.VirtualMachineScaleSetVM{
ID: &id,
Name: &name,
Type: &vmType,
Location: &location,
Tags: tags,
VirtualMachineScaleSetVMProperties: properties,
testVM := armcompute.VirtualMachineScaleSetVM{
ID: &id,
Name: &name,
Type: &vmType,
InstanceID: &instanceID,
Location: &location,
Tags: tags,
Properties: properties,
}
scaleSet := "testSet"
@ -231,6 +245,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
Tags: tags,
NetworkInterfaces: []string{},
ScaleSet: scaleSet,
InstanceID: instanceID,
Size: size,
}
@ -242,18 +257,26 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
func TestNewAzureResourceFromID(t *testing.T) {
for _, tc := range []struct {
id string
expected azureResource
expected *arm.ResourceID
}{
{
id: "/a/b/c/group/d/e/f/name",
expected: azureResource{"name", "group"},
id: "/subscriptions/SUBSCRIPTION_ID/resourceGroups/group/providers/PROVIDER/TYPE/name",
expected: &arm.ResourceID{
Name: "name",
ResourceGroupName: "group",
},
},
{
id: "/a/b/c/group/d/e/f/name/g/h",
expected: azureResource{"name", "group"},
id: "/subscriptions/SUBSCRIPTION_ID/resourceGroups/group/providers/PROVIDER/TYPE/name/TYPE/h",
expected: &arm.ResourceID{
Name: "h",
ResourceGroupName: "group",
},
},
} {
actual, _ := newAzureResourceFromID(tc.id, nil)
require.Equal(t, tc.expected, actual)
actual, err := newAzureResourceFromID(tc.id, nil)
require.NoError(t, err)
require.Equal(t, tc.expected.Name, actual.Name)
require.Equal(t, tc.expected.ResourceGroupName, actual.ResourceGroupName)
}
}

View file

@ -50,7 +50,7 @@ const (
tagsLabel = model.MetaLabelPrefix + "consul_tags"
// serviceLabel is the name of the label containing the service name.
serviceLabel = model.MetaLabelPrefix + "consul_service"
// healthLabel is the name of the label containing the health of the service instance
// healthLabel is the name of the label containing the health of the service instance.
healthLabel = model.MetaLabelPrefix + "consul_health"
// serviceAddressLabel is the name of the label containing the (optional) service address.
serviceAddressLabel = model.MetaLabelPrefix + "consul_service_address"
@ -71,46 +71,24 @@ const (
namespace = "prometheus"
)
var (
rpcFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_consul_rpc_failures_total",
Help: "The number of Consul RPC call failures.",
})
rpcDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_consul_rpc_duration_seconds",
Help: "The duration of a Consul RPC call in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"endpoint", "call"},
)
// Initialize metric vectors.
servicesRPCDuration = rpcDuration.WithLabelValues("catalog", "services")
serviceRPCDuration = rpcDuration.WithLabelValues("catalog", "service")
// DefaultSDConfig is the default Consul SD configuration.
DefaultSDConfig = SDConfig{
TagSeparator: ",",
Scheme: "http",
Server: "localhost:8500",
AllowStale: true,
RefreshInterval: model.Duration(30 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
)
// DefaultSDConfig is the default Consul SD configuration.
var DefaultSDConfig = SDConfig{
TagSeparator: ",",
Scheme: "http",
Server: "localhost:8500",
AllowStale: true,
RefreshInterval: model.Duration(30 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(rpcFailuresCount, rpcDuration)
}
// SDConfig is the configuration for Consul service discovery.
type SDConfig struct {
Server string `yaml:"server,omitempty"`
PathPrefix string `yaml:"path_prefix,omitempty"`
Token config.Secret `yaml:"token,omitempty"`
Datacenter string `yaml:"datacenter,omitempty"`
Namespace string `yaml:"namespace,omitempty"`
@ -146,7 +124,7 @@ func (*SDConfig) Name() string { return "consul" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -183,22 +161,27 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// Discovery retrieves target information from a Consul server
// and updates them via watches.
type Discovery struct {
client *consul.Client
clientDatacenter string
clientNamespace string
clientPartition string
tagSeparator string
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
allowStale bool
refreshInterval time.Duration
finalizer func()
logger log.Logger
client *consul.Client
clientDatacenter string
clientNamespace string
clientPartition string
tagSeparator string
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
allowStale bool
refreshInterval time.Duration
finalizer func()
logger log.Logger
rpcFailuresCount prometheus.Counter
rpcDuration *prometheus.SummaryVec
servicesRPCDuration prometheus.Observer
serviceRPCDuration prometheus.Observer
metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a new Discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -211,6 +194,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
clientConf := &consul.Config{
Address: conf.Server,
PathPrefix: conf.PathPrefix,
Scheme: conf.Scheme,
Datacenter: conf.Datacenter,
Namespace: conf.Namespace,
@ -235,7 +219,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
clientPartition: conf.Partition,
finalizer: wrapper.CloseIdleConnections,
logger: logger,
rpcFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_consul_rpc_failures_total",
Help: "The number of Consul RPC call failures.",
}),
rpcDuration: prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_consul_rpc_duration_seconds",
Help: "The duration of a Consul RPC call in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"endpoint", "call"},
),
}
cd.metricRegisterer = discovery.NewMetricRegisterer(
reg,
[]prometheus.Collector{
cd.rpcFailuresCount,
cd.rpcDuration,
},
)
// Initialize metric vectors.
cd.servicesRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "services")
cd.serviceRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "service")
return cd, nil
}
@ -291,7 +303,7 @@ func (d *Discovery) getDatacenter() error {
info, err := d.client.Agent().Self()
if err != nil {
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
rpcFailuresCount.Inc()
d.rpcFailuresCount.Inc()
return err
}
@ -332,6 +344,13 @@ func (d *Discovery) initialize(ctx context.Context) {
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
err := d.metricRegisterer.RegisterMetrics()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
return
}
defer d.metricRegisterer.UnregisterMetrics()
if d.finalizer != nil {
defer d.finalizer()
}
@ -380,7 +399,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
t0 := time.Now()
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
elapsed := time.Since(t0)
servicesRPCDuration.Observe(elapsed.Seconds())
d.servicesRPCDuration.Observe(elapsed.Seconds())
// Check the context before in order to exit early.
select {
@ -391,7 +410,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
if err != nil {
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
rpcFailuresCount.Inc()
d.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
}
@ -447,13 +466,15 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
// consulService contains data belonging to the same service.
type consulService struct {
name string
tags []string
labels model.LabelSet
discovery *Discovery
client *consul.Client
tagSeparator string
logger log.Logger
name string
tags []string
labels model.LabelSet
discovery *Discovery
client *consul.Client
tagSeparator string
logger log.Logger
rpcFailuresCount prometheus.Counter
serviceRPCDuration prometheus.Observer
}
// Start watching a service.
@ -467,8 +488,10 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
serviceLabel: model.LabelValue(name),
datacenterLabel: model.LabelValue(d.clientDatacenter),
},
tagSeparator: d.tagSeparator,
logger: d.logger,
tagSeparator: d.tagSeparator,
logger: d.logger,
rpcFailuresCount: d.rpcFailuresCount,
serviceRPCDuration: d.serviceRPCDuration,
}
go func() {
@ -506,7 +529,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
t0 := time.Now()
serviceNodes, meta, err := health.ServiceMultipleTags(srv.name, srv.tags, false, opts.WithContext(ctx))
elapsed := time.Since(t0)
serviceRPCDuration.Observe(elapsed.Seconds())
srv.serviceRPCDuration.Observe(elapsed.Seconds())
// Check the context before in order to exit early.
select {
@ -518,7 +541,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
if err != nil {
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
rpcFailuresCount.Inc()
srv.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
}

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -39,7 +40,7 @@ func TestConfiguredService(t *testing.T) {
conf := &SDConfig{
Services: []string{"configuredServiceName"},
}
consulDiscovery, err := NewDiscovery(conf, nil)
consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -56,7 +57,7 @@ func TestConfiguredServiceWithTag(t *testing.T) {
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http"},
}
consulDiscovery, err := NewDiscovery(conf, nil)
consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -151,7 +152,7 @@ func TestConfiguredServiceWithTags(t *testing.T) {
}
for _, tc := range cases {
consulDiscovery, err := NewDiscovery(tc.conf, nil)
consulDiscovery, err := NewDiscovery(tc.conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -165,7 +166,7 @@ func TestConfiguredServiceWithTags(t *testing.T) {
func TestNonConfiguredService(t *testing.T) {
conf := &SDConfig{}
consulDiscovery, err := NewDiscovery(conf, nil)
consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -262,19 +263,19 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
func newDiscovery(t *testing.T, config *SDConfig) *Discovery {
logger := log.NewNopLogger()
d, err := NewDiscovery(config, logger)
d, err := NewDiscovery(config, logger, prometheus.NewRegistry())
require.NoError(t, err)
return d
}
func checkOneTarget(t *testing.T, tg []*targetgroup.Group) {
require.Equal(t, 1, len(tg))
require.Len(t, tg, 1)
target := tg[0]
require.Equal(t, "test-dc", string(target.Labels["__meta_consul_dc"]))
require.Equal(t, target.Source, string(target.Labels["__meta_consul_service"]))
if target.Source == "test" {
// test service should have one node.
require.Greater(t, len(target.Targets), 0, "Test service should have one node")
require.NotEmpty(t, target.Targets, "Test service should have one node")
}
}
@ -313,7 +314,7 @@ func TestNoTargets(t *testing.T) {
}()
targets := (<-ch)[0].Targets
require.Equal(t, 0, len(targets))
require.Empty(t, targets)
cancel()
<-ch
}
@ -484,7 +485,7 @@ oauth2:
return
}
require.Equal(t, config, test.expected)
require.Equal(t, test.expected, config)
})
}
}

View file

@ -24,6 +24,7 @@ import (
"github.com/digitalocean/godo"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -75,7 +76,7 @@ func (*SDConfig) Name() string { return "digitalocean" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -103,7 +104,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
}
@ -125,10 +126,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
logger,
"digitalocean",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "digitalocean",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@ -46,7 +47,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
cfg := DefaultSDConfig
cfg.HTTPClientConfig.BearerToken = tokenID
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
@ -56,12 +57,12 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 4, len(tg.Targets))
require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{

View file

@ -21,7 +21,7 @@ import (
"testing"
)
// SDMock is the interface for the DigitalOcean mock
// SDMock is the interface for the DigitalOcean mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -35,18 +35,18 @@ func NewSDMock(t *testing.T) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)
}
// ShutdownServer creates the mock server
// ShutdownServer creates the mock server.
func (m *SDMock) ShutdownServer() {
m.Server.Close()
}

View file

@ -18,6 +18,7 @@ import (
"reflect"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -42,6 +43,15 @@ type Discoverer interface {
type DiscovererOptions struct {
Logger log.Logger
// A registerer for the Discoverer's metrics.
// Some Discoverers may ignore this registerer and use the global one instead.
// For now this will work, because the Prometheus `main` function uses the global registry.
// However, in the future the Prometheus `main` function will be updated to not use the global registry.
// Hence, if a discoverer wants its metrics to be visible via the Prometheus executable's
// `/metrics` endpoint, it should use this explicit registerer.
// TODO(ptodev): Update this comment once the Prometheus `main` function does not use the global registry.
Registerer prometheus.Registerer
// Extra HTTP client options to expose to Discoverers. This field may be
// ignored; Discoverer implementations must opt-in to reading it.
HTTPClientOptions []config.HTTPClientOption

View file

@ -42,35 +42,21 @@ const (
dnsSrvRecordPortLabel = dnsSrvRecordPrefix + "port"
dnsMxRecordPrefix = model.MetaLabelPrefix + "dns_mx_record_"
dnsMxRecordTargetLabel = dnsMxRecordPrefix + "target"
dnsNsRecordPrefix = model.MetaLabelPrefix + "dns_ns_record_"
dnsNsRecordTargetLabel = dnsNsRecordPrefix + "target"
// Constants for instrumentation.
namespace = "prometheus"
)
var (
dnsSDLookupsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookups_total",
Help: "The number of DNS-SD lookups.",
})
dnsSDLookupFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.",
})
// DefaultSDConfig is the default DNS SD configuration.
DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(30 * time.Second),
Type: "SRV",
}
)
// DefaultSDConfig is the default DNS SD configuration.
var DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(30 * time.Second),
Type: "SRV",
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(dnsSDLookupFailuresCount, dnsSDLookupsCount)
}
// SDConfig is the configuration for DNS based service discovery.
@ -86,7 +72,7 @@ func (*SDConfig) Name() string { return "dns" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(*c, opts.Logger), nil
return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -102,7 +88,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
switch strings.ToUpper(c.Type) {
case "SRV":
case "A", "AAAA", "MX":
case "A", "AAAA", "MX", "NS":
if c.Port == 0 {
return errors.New("a port is required in DNS-SD configs for all record types except SRV")
}
@ -116,16 +102,18 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type Discovery struct {
*refresh.Discovery
names []string
port int
qtype uint16
logger log.Logger
names []string
port int
qtype uint16
logger log.Logger
dnsSDLookupsCount prometheus.Counter
dnsSDLookupFailuresCount prometheus.Counter
lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -140,6 +128,8 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
qtype = dns.TypeSRV
case "MX":
qtype = dns.TypeMX
case "NS":
qtype = dns.TypeNS
}
d := &Discovery{
names: conf.Names,
@ -147,14 +137,32 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
port: conf.Port,
logger: logger,
lookupFn: lookupWithSearchPath,
dnsSDLookupsCount: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookups_total",
Help: "The number of DNS-SD lookups.",
}),
dnsSDLookupFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.",
}),
}
d.Discovery = refresh.NewDiscovery(
logger,
"dns",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "dns",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: prometheus.NewRegistry(),
Metrics: []prometheus.Collector{d.dnsSDLookupsCount, d.dnsSDLookupFailuresCount},
},
)
return d
return d, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
@ -187,9 +195,9 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
response, err := d.lookupFn(name, d.qtype, d.logger)
dnsSDLookupsCount.Inc()
d.dnsSDLookupsCount.Inc()
if err != nil {
dnsSDLookupFailuresCount.Inc()
d.dnsSDLookupFailuresCount.Inc()
return err
}
@ -199,7 +207,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
}
for _, record := range response.Answer {
var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget model.LabelValue
var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget, dnsNsRecordTarget model.LabelValue
switch addr := record.(type) {
case *dns.SRV:
@ -217,6 +225,13 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
addr.Mx = strings.TrimRight(addr.Mx, ".")
target = hostPort(addr.Mx, d.port)
case *dns.NS:
dnsNsRecordTarget = model.LabelValue(addr.Ns)
// Remove the final dot from rooted DNS names to make them look more usual.
addr.Ns = strings.TrimRight(addr.Ns, ".")
target = hostPort(addr.Ns, d.port)
case *dns.A:
target = hostPort(addr.A.String(), d.port)
case *dns.AAAA:
@ -234,6 +249,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
dnsSrvRecordTargetLabel: dnsSrvRecordTarget,
dnsSrvRecordPortLabel: dnsSrvRecordPort,
dnsMxRecordTargetLabel: dnsMxRecordTarget,
dnsNsRecordTargetLabel: dnsNsRecordTarget,
})
}

View file

@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@ -81,6 +82,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -112,6 +114,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -143,6 +146,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db1.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
{
"__address__": "db2.example.com:3306",
@ -150,6 +154,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db2.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -180,6 +185,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db1.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -227,6 +233,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "smtp1.example.com.",
"__meta_dns_ns_record_target": "",
},
{
"__address__": "smtp2.example.com:25",
@ -234,6 +241,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "smtp2.example.com.",
"__meta_dns_ns_record_target": "",
},
},
},
@ -245,7 +253,8 @@ func TestDNS(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
sd := NewDiscovery(tc.config, nil)
sd, err := NewDiscovery(tc.config, nil, prometheus.NewRegistry())
require.NoError(t, err)
sd.lookupFn = tc.lookup
tgs, err := sd.refresh(context.Background())

View file

@ -184,17 +184,17 @@ func TestFetchApps(t *testing.T) {
apps, err := fetchApps(context.TODO(), ts.URL, &http.Client{})
require.NoError(t, err)
require.Equal(t, len(apps.Applications), 2)
require.Equal(t, apps.Applications[0].Name, "CONFIG-SERVICE")
require.Equal(t, apps.Applications[1].Name, "META-SERVICE")
require.Len(t, apps.Applications, 2)
require.Equal(t, "CONFIG-SERVICE", apps.Applications[0].Name)
require.Equal(t, "META-SERVICE", apps.Applications[1].Name)
require.Equal(t, len(apps.Applications[1].Instances), 2)
require.Equal(t, apps.Applications[1].Instances[0].InstanceID, "meta-service002.test.com:meta-service:8080")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[0].XMLName.Local, "project")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[0].Content, "meta-service")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[1].XMLName.Local, "management.port")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[1].Content, "8090")
require.Equal(t, apps.Applications[1].Instances[1].InstanceID, "meta-service001.test.com:meta-service:8080")
require.Len(t, apps.Applications[1].Instances, 2)
require.Equal(t, "meta-service002.test.com:meta-service:8080", apps.Applications[1].Instances[0].InstanceID)
require.Equal(t, "project", apps.Applications[1].Instances[0].Metadata.Items[0].XMLName.Local)
require.Equal(t, "meta-service", apps.Applications[1].Instances[0].Metadata.Items[0].Content)
require.Equal(t, "management.port", apps.Applications[1].Instances[0].Metadata.Items[1].XMLName.Local)
require.Equal(t, "8090", apps.Applications[1].Instances[0].Metadata.Items[1].Content)
require.Equal(t, "meta-service001.test.com:meta-service:8080", apps.Applications[1].Instances[1].InstanceID)
}
func Test500ErrorHttpResponse(t *testing.T) {

View file

@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -80,7 +81,7 @@ func (*SDConfig) Name() string { return "eureka" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -117,7 +118,7 @@ type Discovery struct {
}
// NewDiscovery creates a new Eureka discovery for the given role.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd")
if err != nil {
return nil, err
@ -128,10 +129,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
server: conf.Server,
}
d.Discovery = refresh.NewDiscovery(
logger,
"eureka",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "eureka",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -20,6 +20,7 @@ import (
"net/http/httptest"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -35,7 +36,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err
Server: ts.URL,
}
md, err := NewDiscovery(&conf, nil)
md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@ -55,7 +56,7 @@ func TestEurekaSDHandleError(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.EqualError(t, err, errTesting)
require.Equal(t, len(tgs), 0)
require.Empty(t, tgs)
}
func TestEurekaSDEmptyList(t *testing.T) {
@ -72,7 +73,7 @@ func TestEurekaSDEmptyList(t *testing.T) {
)
tgs, err := testUpdateServices(respHandler)
require.NoError(t, err)
require.Equal(t, len(tgs), 1)
require.Len(t, tgs, 1)
}
func TestEurekaSDSendGroup(t *testing.T) {
@ -232,11 +233,11 @@ func TestEurekaSDSendGroup(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.NoError(t, err)
require.Equal(t, len(tgs), 1)
require.Len(t, tgs, 1)
tg := tgs[0]
require.Equal(t, tg.Source, "eureka")
require.Equal(t, len(tg.Targets), 4)
require.Equal(t, "eureka", tg.Source)
require.Len(t, tg.Targets, 4)
tgt := tg.Targets[0]
require.Equal(t, tgt[model.AddressLabel], model.LabelValue("config-service001.test.com:8080"))

View file

@ -39,24 +39,6 @@ import (
)
var (
fileSDReadErrorsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_read_errors_total",
Help: "The number of File-SD read errors.",
})
fileSDScanDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_file_scan_duration_seconds",
Help: "The duration of the File-SD scan in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})
fileSDTimeStamp = NewTimestampCollector()
fileWatcherErrorsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_watcher_errors_total",
Help: "The number of File-SD errors caused by filesystem watch failures.",
})
patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`)
// DefaultSDConfig is the default file SD configuration.
@ -67,7 +49,6 @@ var (
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(fileSDReadErrorsCount, fileSDScanDuration, fileSDTimeStamp, fileWatcherErrorsCount)
}
// SDConfig is the configuration for file based discovery.
@ -81,7 +62,7 @@ func (*SDConfig) Name() string { return "file" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger), nil
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -187,10 +168,17 @@ type Discovery struct {
// This is used to detect deleted target groups.
lastRefresh map[string]int
logger log.Logger
fileSDReadErrorsCount prometheus.Counter
fileSDScanDuration prometheus.Summary
fileWatcherErrorsCount prometheus.Counter
fileSDTimeStamp *TimestampCollector
metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a new file discovery for the given paths.
func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -200,9 +188,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
interval: time.Duration(conf.RefreshInterval),
timestamps: make(map[string]float64),
logger: logger,
fileSDReadErrorsCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_read_errors_total",
Help: "The number of File-SD read errors.",
}),
fileSDScanDuration: prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_file_scan_duration_seconds",
Help: "The duration of the File-SD scan in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
fileWatcherErrorsCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_watcher_errors_total",
Help: "The number of File-SD errors caused by filesystem watch failures.",
}),
fileSDTimeStamp: NewTimestampCollector(),
}
fileSDTimeStamp.addDiscoverer(disc)
return disc
disc.fileSDTimeStamp.addDiscoverer(disc)
disc.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{
disc.fileSDReadErrorsCount,
disc.fileSDScanDuration,
disc.fileWatcherErrorsCount,
disc.fileSDTimeStamp,
})
return disc, nil
}
// listFiles returns a list of all files that match the configured patterns.
@ -226,8 +240,8 @@ func (d *Discovery) watchFiles() {
panic("no watcher configured")
}
for _, p := range d.paths {
if idx := strings.LastIndex(p, "/"); idx > -1 {
p = p[:idx]
if dir, _ := filepath.Split(p); dir != "" {
p = dir
} else {
p = "./"
}
@ -239,10 +253,17 @@ func (d *Discovery) watchFiles() {
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
err := d.metricRegisterer.RegisterMetrics()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
return
}
defer d.metricRegisterer.UnregisterMetrics()
watcher, err := fsnotify.NewWatcher()
if err != nil {
level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err)
fileWatcherErrorsCount.Inc()
d.fileWatcherErrorsCount.Inc()
return
}
d.watcher = watcher
@ -306,7 +327,7 @@ func (d *Discovery) stop() {
done := make(chan struct{})
defer close(done)
fileSDTimeStamp.removeDiscoverer(d)
d.fileSDTimeStamp.removeDiscoverer(d)
// Closing the watcher will deadlock unless all events and errors are drained.
go func() {
@ -332,13 +353,13 @@ func (d *Discovery) stop() {
func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) {
t0 := time.Now()
defer func() {
fileSDScanDuration.Observe(time.Since(t0).Seconds())
d.fileSDScanDuration.Observe(time.Since(t0).Seconds())
}()
ref := map[string]int{}
for _, p := range d.listFiles() {
tgroups, err := d.readFile(p)
if err != nil {
fileSDReadErrorsCount.Inc()
d.fileSDReadErrorsCount.Inc()
level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err)
// Prevent deletion down below.

View file

@ -24,6 +24,7 @@ import (
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@ -143,7 +144,7 @@ func (t *testRunner) run(files ...string) {
ctx, cancel := context.WithCancel(context.Background())
t.cancelSD = cancel
go func() {
NewDiscovery(
d, err := NewDiscovery(
&SDConfig{
Files: files,
// Setting a high refresh interval to make sure that the tests only
@ -151,7 +152,11 @@ func (t *testRunner) run(files ...string) {
RefreshInterval: model.Duration(1 * time.Hour),
},
nil,
).Run(ctx, t.ch)
prometheus.NewRegistry(),
)
require.NoError(t, err)
d.Run(ctx, t.ch)
}()
}

View file

@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"golang.org/x/oauth2/google"
"google.golang.org/api/compute/v1"
@ -86,7 +87,7 @@ func (*SDConfig) Name() string { return "gce" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(*c, opts.Logger)
return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -121,7 +122,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
project: conf.Project,
zone: conf.Zone,
@ -141,10 +142,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
d.isvc = compute.NewInstancesService(d.svc)
d.Discovery = refresh.NewDiscovery(
logger,
"gce",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "gce",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -22,7 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/hcloud"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -91,7 +91,7 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
targets := make([]model.LabelSet, len(servers))
for i, server := range servers {
labels := model.LabelSet{
hetznerLabelRole: model.LabelValue(hetznerRoleHcloud),
hetznerLabelRole: model.LabelValue(HetznerRoleHcloud),
hetznerLabelServerID: model.LabelValue(fmt.Sprintf("%d", server.ID)),
hetznerLabelServerName: model.LabelValue(server.Name),
hetznerLabelDatacenter: model.LabelValue(server.Datacenter.Name),

View file

@ -48,12 +48,12 @@ func TestHCloudSDRefresh(t *testing.T) {
targetGroups, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(targetGroups))
require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup, "targetGroup should not be nil")
require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
require.Equal(t, 3, len(targetGroup.Targets))
require.Len(t, targetGroup.Targets, 3)
for i, labelSet := range []model.LabelSet{
{

View file

@ -20,7 +20,8 @@ import (
"time"
"github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/hcloud"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -57,7 +58,7 @@ type SDConfig struct {
RefreshInterval model.Duration `yaml:"refresh_interval"`
Port int `yaml:"port"`
Role role `yaml:"role"`
Role Role `yaml:"role"`
hcloudEndpoint string // For tests only.
robotEndpoint string // For tests only.
}
@ -67,33 +68,33 @@ func (*SDConfig) Name() string { return "hetzner" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
type refresher interface {
refresh(context.Context) ([]*targetgroup.Group, error)
}
// role is the role of the target within the Hetzner Ecosystem.
type role string
// Role is the Role of the target within the Hetzner Ecosystem.
type Role string
// The valid options for role.
const (
// Hetzner Robot Role (Dedicated Server)
// https://robot.hetzner.com
hetznerRoleRobot role = "robot"
HetznerRoleRobot Role = "robot"
// Hetzner Cloud Role
// https://console.hetzner.cloud
hetznerRoleHcloud role = "hcloud"
HetznerRoleHcloud Role = "hcloud"
)
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *role) UnmarshalYAML(unmarshal func(interface{}) error) error {
func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err := unmarshal((*string)(c)); err != nil {
return err
}
switch *c {
case hetznerRoleRobot, hetznerRoleHcloud:
case HetznerRoleRobot, HetznerRoleHcloud:
return nil
default:
return fmt.Errorf("unknown role %q", *c)
@ -127,28 +128,31 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, logger)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
logger,
"hetzner",
time.Duration(conf.RefreshInterval),
r.refresh,
refresh.Options{
Logger: logger,
Mech: "hetzner",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: r.refresh,
Registry: reg,
},
), nil
}
func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
switch conf.Role {
case hetznerRoleHcloud:
case HetznerRoleHcloud:
if conf.hcloudEndpoint == "" {
conf.hcloudEndpoint = hcloud.Endpoint
}
return newHcloudDiscovery(conf, l)
case hetznerRoleRobot:
case HetznerRoleRobot:
if conf.robotEndpoint == "" {
conf.robotEndpoint = "https://robot-ws.your-server.de"
}

View file

@ -20,7 +20,7 @@ import (
"testing"
)
// SDMock is the interface for the Hetzner Cloud mock
// SDMock is the interface for the Hetzner Cloud mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -34,19 +34,19 @@ func NewSDMock(t *testing.T) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)
m.t.Cleanup(m.Server.Close)
}
// ShutdownServer creates the mock server
// ShutdownServer creates the mock server.
func (m *SDMock) ShutdownServer() {
m.Server.Close()
}

View file

@ -105,7 +105,7 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error)
targets := make([]model.LabelSet, len(servers))
for i, server := range servers {
labels := model.LabelSet{
hetznerLabelRole: model.LabelValue(hetznerRoleRobot),
hetznerLabelRole: model.LabelValue(HetznerRoleRobot),
hetznerLabelServerID: model.LabelValue(strconv.Itoa(server.Server.ServerNumber)),
hetznerLabelServerName: model.LabelValue(server.Server.ServerName),
hetznerLabelDatacenter: model.LabelValue(strings.ToLower(server.Server.Dc)),

View file

@ -47,12 +47,12 @@ func TestRobotSDRefresh(t *testing.T) {
targetGroups, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(targetGroups))
require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup, "targetGroup should not be nil")
require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
require.Equal(t, 2, len(targetGroup.Targets))
require.Len(t, targetGroup.Targets, 2)
for i, labelSet := range []model.LabelSet{
{
@ -98,5 +98,5 @@ func TestRobotSDRefreshHandleError(t *testing.T) {
require.Error(t, err)
require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error())
require.Equal(t, 0, len(targetGroups))
require.Empty(t, targetGroups)
}

View file

@ -45,17 +45,10 @@ var (
}
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_http_failures_total",
Help: "Number of HTTP service discovery refresh failures.",
})
)
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for HTTP based discovery.
@ -70,7 +63,7 @@ func (*SDConfig) Name() string { return "http" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions)
return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -99,7 +92,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if parsedURL.Host == "" {
return fmt.Errorf("host is missing in URL")
}
return nil
return c.HTTPClientConfig.Validate()
}
const httpSDURLLabel = model.MetaLabelPrefix + "url"
@ -112,10 +105,11 @@ type Discovery struct {
client *http.Client
refreshInterval time.Duration
tgLastLength int
failuresCount prometheus.Counter
}
// NewDiscovery returns a new HTTP discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -130,13 +124,22 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPCli
url: conf.URL,
client: client,
refreshInterval: time.Duration(conf.RefreshInterval), // Stored to be sent as headers.
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_http_failures_total",
Help: "Number of HTTP service discovery refresh failures.",
}),
}
d.Discovery = refresh.NewDiscovery(
logger,
"http",
time.Duration(conf.RefreshInterval),
d.Refresh,
refresh.Options{
Logger: logger,
Mech: "http",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.Refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount},
},
)
return d, nil
}
@ -152,7 +155,7 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) {
resp, err := d.client.Do(req.WithContext(ctx))
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
defer func() {
@ -161,31 +164,31 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) {
}()
if resp.StatusCode != http.StatusOK {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("server returned HTTP status %s", resp.Status)
}
if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
}
b, err := io.ReadAll(resp.Body)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
var targetGroups []*targetgroup.Group
if err := json.Unmarshal(b, &targetGroups); err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
for i, tg := range targetGroups {
if tg == nil {
failuresCount.Inc()
d.failuresCount.Inc()
err = errors.New("nil target group item found")
return nil, err
}

View file

@ -41,7 +41,7 @@ func TestHTTPValidRefresh(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
@ -62,8 +62,8 @@ func TestHTTPValidRefresh(t *testing.T) {
Source: urlSource(ts.URL+"/http_sd.good.json", 0),
},
}
require.Equal(t, tgs, expectedTargets)
require.Equal(t, 0.0, getFailureCount())
require.Equal(t, expectedTargets, tgs)
require.Equal(t, 0.0, getFailureCount(d.failuresCount))
}
func TestHTTPInvalidCode(t *testing.T) {
@ -79,13 +79,13 @@ func TestHTTPInvalidCode(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
_, err = d.Refresh(ctx)
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
require.Equal(t, 1.0, getFailureCount())
require.Equal(t, 1.0, getFailureCount(d.failuresCount))
}
func TestHTTPInvalidFormat(t *testing.T) {
@ -101,18 +101,16 @@ func TestHTTPInvalidFormat(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
_, err = d.Refresh(ctx)
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
require.Equal(t, 1.0, getFailureCount())
require.Equal(t, 1.0, getFailureCount(d.failuresCount))
}
var lastFailureCount float64
func getFailureCount() float64 {
func getFailureCount(failuresCount prometheus.Counter) float64 {
failureChan := make(chan prometheus.Metric)
go func() {
@ -129,10 +127,7 @@ func getFailureCount() float64 {
metric.Write(&counter)
}
// account for failures in prior tests
count := *counter.Counter.Value - lastFailureCount
lastFailureCount = *counter.Counter.Value
return count
return *counter.Counter.Value
}
func TestContentTypeRegex(t *testing.T) {
@ -417,7 +412,7 @@ func TestSourceDisappeared(t *testing.T) {
URL: ts.URL,
RefreshInterval: model.Duration(1 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
for _, test := range cases {
ctx := context.Background()

View file

@ -14,15 +14,17 @@
package ionos
import (
"errors"
"time"
"github.com/go-kit/log"
"github.com/pkg/errors"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/client_golang/prometheus"
)
const (
@ -41,7 +43,7 @@ func init() {
type Discovery struct{}
// NewDiscovery returns a new refresh.Discovery for IONOS Cloud.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
if conf.ionosEndpoint == "" {
conf.ionosEndpoint = "https://api.ionos.com"
}
@ -52,10 +54,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error)
}
return refresh.NewDiscovery(
logger,
"ionos",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "ionos",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
), nil
}
@ -86,7 +91,7 @@ func (c SDConfig) Name() string {
// NewDiscoverer returns a new discovery.Discoverer for IONOS Cloud.
func (c SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(&c, options.Logger)
return NewDiscovery(&c, options.Logger, options.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.

View file

@ -48,12 +48,12 @@ func TestIONOSServerRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 2, len(tg.Targets))
require.Len(t, tg.Targets, 2)
for i, lbls := range []model.LabelSet{
{

View file

@ -11,7 +11,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// nolint:revive // Many legitimately empty blocks in this file.
package kubernetes
import (
@ -23,19 +22,13 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
var (
epAddCount = eventCount.WithLabelValues("endpoints", "add")
epUpdateCount = eventCount.WithLabelValues("endpoints", "update")
epDeleteCount = eventCount.WithLabelValues("endpoints", "delete")
)
// Endpoints discovers new endpoint targets.
@ -56,10 +49,19 @@ type Endpoints struct {
}
// NewEndpoints returns a new endpoints discovery.
func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *Endpoints {
func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
if l == nil {
l = log.NewNopLogger()
}
epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd)
epUpdateCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleUpdate)
epDeleteCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleDelete)
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
e := &Endpoints{
logger: l,
endpointsInf: eps,
@ -70,7 +72,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
podStore: pod.GetStore(),
nodeInf: node,
withNodeMetadata: node != nil,
queue: workqueue.NewNamed("endpoints"),
queue: workqueue.NewNamed(RoleEndpoint.String()),
}
_, err := e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@ -248,9 +250,6 @@ func endpointsSourceFromNamespaceAndName(namespace, name string) string {
}
const (
endpointsLabelPrefix = metaLabelPrefix + "endpoints_label_"
endpointsLabelPresentPrefix = metaLabelPrefix + "endpoints_labelpresent_"
endpointsNameLabel = metaLabelPrefix + "endpoints_name"
endpointNodeName = metaLabelPrefix + "endpoint_node_name"
endpointHostname = metaLabelPrefix + "endpoint_hostname"
endpointReadyLabel = metaLabelPrefix + "endpoint_ready"
@ -265,16 +264,11 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
Source: endpointsSource(eps),
}
tg.Labels = model.LabelSet{
namespaceLabel: lv(eps.Namespace),
endpointsNameLabel: lv(eps.Name),
namespaceLabel: lv(eps.Namespace),
}
e.addServiceLabels(eps.Namespace, eps.Name, tg)
// Add endpoints labels metadata.
for k, v := range eps.Labels {
ln := strutil.SanitizeLabelName(k)
tg.Labels[model.LabelName(endpointsLabelPrefix+ln)] = lv(v)
tg.Labels[model.LabelName(endpointsLabelPresentPrefix+ln)] = presentValue
}
addObjectMetaLabels(tg.Labels, eps.ObjectMeta, RoleEndpoint)
type podEntry struct {
pod *apiv1.Pod
@ -305,7 +299,11 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
}
if e.withNodeMetadata {
target = addNodeLabels(target, e.nodeInf, e.logger, addr.NodeName)
if addr.NodeName != nil {
target = addNodeLabels(target, e.nodeInf, e.logger, addr.NodeName)
} else if addr.TargetRef != nil && addr.TargetRef.Kind == "Node" {
target = addNodeLabels(target, e.nodeInf, e.logger, &addr.TargetRef.Name)
}
}
pod := e.resolvePodRef(addr.TargetRef)
@ -385,18 +383,21 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
continue
}
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
// PodIP can be empty when a pod is starting or has been evicted.
if len(pe.pod.Status.PodIP) != 0 {
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
}
}
@ -458,13 +459,7 @@ func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.L
node := obj.(*apiv1.Node)
// Allocate one target label for the node name,
// and two target labels for each node label.
nodeLabelset := make(model.LabelSet, 1+2*len(node.GetLabels()))
nodeLabelset[nodeNameLabel] = lv(*nodeName)
for k, v := range node.GetLabels() {
ln := strutil.SanitizeLabelName(k)
nodeLabelset[model.LabelName(nodeLabelPrefix+ln)] = lv(v)
nodeLabelset[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue
}
nodeLabelset := make(model.LabelSet)
addObjectMetaLabels(nodeLabelset, node.ObjectMeta, RoleNode)
return tg.Merge(nodeLabelset)
}

View file

@ -32,6 +32,9 @@ func makeEndpoints() *v1.Endpoints {
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
Namespace: "default",
Annotations: map[string]string{
"test.annotation": "test",
},
},
Subsets: []v1.EndpointSubset{
{
@ -69,6 +72,24 @@ func makeEndpoints() *v1.Endpoints {
},
},
},
{
Addresses: []v1.EndpointAddress{
{
IP: "6.7.8.9",
TargetRef: &v1.ObjectReference{
Kind: "Node",
Name: "barbaz",
},
},
},
Ports: []v1.EndpointPort{
{
Name: "testport",
Port: 9002,
Protocol: v1.ProtocolTCP,
},
},
},
},
}
}
@ -106,10 +127,20 @@ func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
},
Source: "endpoints/default/testendpoints",
},
@ -398,13 +429,23 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
},
Source: "endpoints/default/testendpoints",
},
@ -466,15 +507,25 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "svc",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_service_label_component": "testing",
"__meta_kubernetes_service_labelpresent_component": "true",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "svc",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_service_label_component": "testing",
"__meta_kubernetes_service_labelpresent_component": "true",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
},
Source: "endpoints/default/testendpoints",
},
@ -484,8 +535,10 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) {
metadataConfig := AttachMetadataConfig{Node: true}
nodeLabels := map[string]string{"az": "us-east1"}
node := makeNode("foobar", "", "", nodeLabels, nil)
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
node1 := makeNode("foobar", "", "", nodeLabels1, nil)
node2 := makeNode("barbaz", "", "", nodeLabels2, nil)
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
@ -495,7 +548,7 @@ func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) {
},
},
}
n, _ := makeDiscoveryWithMetadata(RoleEndpoint, NamespaceDiscovery{}, metadataConfig, makeEndpoints(), svc, node)
n, _ := makeDiscoveryWithMetadata(RoleEndpoint, NamespaceDiscovery{}, metadataConfig, makeEndpoints(), svc, node1, node2)
k8sDiscoveryTest{
discovery: n,
@ -526,13 +579,26 @@ func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
"__meta_kubernetes_node_label_az": "us-west2",
"__meta_kubernetes_node_labelpresent_az": "true",
"__meta_kubernetes_node_name": "barbaz",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
},
Source: "endpoints/default/testendpoints",
},
@ -541,8 +607,10 @@ func TestEndpointsDiscoveryWithNodeMetadata(t *testing.T) {
}
func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
nodeLabels := map[string]string{"az": "us-east1"}
nodes := makeNode("foobar", "", "", nodeLabels, nil)
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
node1 := makeNode("foobar", "", "", nodeLabels1, nil)
node2 := makeNode("barbaz", "", "", nodeLabels2, nil)
metadataConfig := AttachMetadataConfig{Node: true}
svc := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
@ -553,13 +621,13 @@ func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
},
},
}
n, c := makeDiscoveryWithMetadata(RoleEndpoint, NamespaceDiscovery{}, metadataConfig, makeEndpoints(), nodes, svc)
n, c := makeDiscoveryWithMetadata(RoleEndpoint, NamespaceDiscovery{}, metadataConfig, makeEndpoints(), node1, node2, svc)
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
nodes.Labels["az"] = "eu-central1"
c.CoreV1().Nodes().Update(context.Background(), nodes, metav1.UpdateOptions{})
node1.Labels["az"] = "eu-central1"
c.CoreV1().Nodes().Update(context.Background(), node1, metav1.UpdateOptions{})
},
expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{
@ -572,7 +640,7 @@ func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
"__meta_kubernetes_node_label_az": "eu-central1",
"__meta_kubernetes_node_label_az": "us-east1",
"__meta_kubernetes_node_labelpresent_az": "true",
"__meta_kubernetes_node_name": "foobar",
},
@ -588,13 +656,26 @@ func TestEndpointsDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
"__meta_kubernetes_node_label_az": "us-west2",
"__meta_kubernetes_node_labelpresent_az": "true",
"__meta_kubernetes_node_name": "barbaz",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
},
Source: "endpoints/default/testendpoints",
},
@ -699,13 +780,23 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "ns1",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app": "app1",
"__meta_kubernetes_service_labelpresent_app": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_namespace": "ns1",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
"__meta_kubernetes_service_label_app": "app1",
"__meta_kubernetes_service_labelpresent_app": "true",
"__meta_kubernetes_service_name": "testendpoints",
},
Source: "endpoints/ns1/testendpoints",
},
@ -815,13 +906,66 @@ func TestEndpointsDiscoveryOwnNamespace(t *testing.T) {
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "false",
},
{
"__address__": "6.7.8.9:9002",
"__meta_kubernetes_endpoint_address_target_kind": "Node",
"__meta_kubernetes_endpoint_address_target_name": "barbaz",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_namespace": "own-ns",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_namespace": "own-ns",
"__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_endpoints_annotation_test_annotation": "test",
"__meta_kubernetes_endpoints_annotationpresent_test_annotation": "true",
},
Source: "endpoints/own-ns/testendpoints",
},
},
}.Run(t)
}
func TestEndpointsDiscoveryEmptyPodStatus(t *testing.T) {
ep := makeEndpoints()
ep.Namespace = "ns"
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Namespace: "ns",
UID: types.UID("deadbeef"),
},
Spec: v1.PodSpec{
NodeName: "testnode",
Containers: []v1.Container{
{
Name: "p1",
Image: "p1:latest",
Ports: []v1.ContainerPort{
{
Name: "mainport",
ContainerPort: 9000,
Protocol: v1.ProtocolTCP,
},
},
},
},
},
Status: v1.PodStatus{},
}
objs := []runtime.Object{
ep,
pod,
}
n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{IncludeOwnNamespace: true}, objs...)
k8sDiscoveryTest{
discovery: n,
expectedMaxItems: 0,
expectedRes: map[string]*targetgroup.Group{},
}.Run(t)
}

View file

@ -15,12 +15,14 @@ package kubernetes
import (
"context"
"errors"
"fmt"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
@ -32,12 +34,6 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
var (
epslAddCount = eventCount.WithLabelValues("endpointslice", "add")
epslUpdateCount = eventCount.WithLabelValues("endpointslice", "update")
epslDeleteCount = eventCount.WithLabelValues("endpointslice", "delete")
)
// EndpointSlice discovers new endpoint targets.
type EndpointSlice struct {
logger log.Logger
@ -56,10 +52,19 @@ type EndpointSlice struct {
}
// NewEndpointSlice returns a new endpointslice discovery.
func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *EndpointSlice {
func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
if l == nil {
l = log.NewNopLogger()
}
epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd)
epslUpdateCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleUpdate)
epslDeleteCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleDelete)
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
e := &EndpointSlice{
logger: l,
endpointSliceInf: eps,
@ -70,7 +75,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
podStore: pod.GetStore(),
nodeInf: node,
withNodeMetadata: node != nil,
queue: workqueue.NewNamed("endpointSlice"),
queue: workqueue.NewNamed(RoleEndpointSlice.String()),
}
_, err := e.endpointSliceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@ -183,14 +188,14 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group)
cacheSyncs = append(cacheSyncs, e.nodeInf.HasSynced)
}
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if ctx.Err() != context.Canceled {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache")
}
return
}
go func() {
for e.process(ctx, ch) { // nolint:revive
for e.process(ctx, ch) {
}
}()
@ -252,7 +257,6 @@ func endpointSliceSourceFromNamespaceAndName(namespace, name string) string {
}
const (
endpointSliceNameLabel = metaLabelPrefix + "endpointslice_name"
endpointSliceAddressTypeLabel = metaLabelPrefix + "endpointslice_address_type"
endpointSlicePortNameLabel = metaLabelPrefix + "endpointslice_port_name"
endpointSlicePortProtocolLabel = metaLabelPrefix + "endpointslice_port_protocol"
@ -274,9 +278,11 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
}
tg.Labels = model.LabelSet{
namespaceLabel: lv(eps.namespace()),
endpointSliceNameLabel: lv(eps.name()),
endpointSliceAddressTypeLabel: lv(eps.addressType()),
}
addObjectMetaLabels(tg.Labels, eps.getObjectMeta(), RoleEndpointSlice)
e.addServiceLabels(eps, tg)
type podEntry struct {
@ -339,7 +345,11 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
}
if e.withNodeMetadata {
target = addNodeLabels(target, e.nodeInf, e.logger, ep.nodename())
if ep.targetRef() != nil && ep.targetRef().Kind == "Node" {
target = addNodeLabels(target, e.nodeInf, e.logger, &ep.targetRef().Name)
} else {
target = addNodeLabels(target, e.nodeInf, e.logger, ep.nodename())
}
}
pod := e.resolvePodRef(ep.targetRef())
@ -412,18 +422,21 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
continue
}
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
// PodIP can be empty when a pod is starting or has been evicted.
if len(pe.pod.Status.PodIP) != 0 {
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
podContainerImageLabel: lv(c.Image),
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
}
}

View file

@ -17,11 +17,13 @@ import (
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
"k8s.io/api/discovery/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// endpointSliceAdaptor is an adaptor for the different EndpointSlice versions
// endpointSliceAdaptor is an adaptor for the different EndpointSlice versions.
type endpointSliceAdaptor interface {
get() interface{}
getObjectMeta() metav1.ObjectMeta
name() string
namespace() string
addressType() string
@ -53,7 +55,7 @@ type endpointSliceEndpointConditionsAdaptor interface {
terminating() *bool
}
// Adaptor for k8s.io/api/discovery/v1
// Adaptor for k8s.io/api/discovery/v1.
type endpointSliceAdaptorV1 struct {
endpointSlice *v1.EndpointSlice
}
@ -66,6 +68,10 @@ func (e *endpointSliceAdaptorV1) get() interface{} {
return e.endpointSlice
}
func (e *endpointSliceAdaptorV1) getObjectMeta() metav1.ObjectMeta {
return e.endpointSlice.ObjectMeta
}
func (e *endpointSliceAdaptorV1) name() string {
return e.endpointSlice.ObjectMeta.Name
}
@ -102,7 +108,7 @@ func (e *endpointSliceAdaptorV1) labelServiceName() string {
return v1.LabelServiceName
}
// Adaptor for k8s.io/api/discovery/v1beta1
// Adaptor for k8s.io/api/discovery/v1beta1.
type endpointSliceAdaptorV1Beta1 struct {
endpointSlice *v1beta1.EndpointSlice
}
@ -115,6 +121,10 @@ func (e *endpointSliceAdaptorV1Beta1) get() interface{} {
return e.endpointSlice
}
func (e *endpointSliceAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta {
return e.endpointSlice.ObjectMeta
}
func (e *endpointSliceAdaptorV1Beta1) name() string {
return e.endpointSlice.Name
}

View file

@ -29,7 +29,7 @@ func Test_EndpointSliceAdaptor_v1(t *testing.T) {
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
require.Equal(t, endpointSlice.Labels[v1.LabelServiceName], "testendpoints")
require.Equal(t, "testendpoints", endpointSlice.Labels[v1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
@ -57,7 +57,7 @@ func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) {
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
require.Equal(t, endpointSlice.Labels[v1beta1.LabelServiceName], "testendpoints")
require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())

View file

@ -52,6 +52,9 @@ func makeEndpointSliceV1() *v1.EndpointSlice {
Labels: map[string]string{
v1.LabelServiceName: "testendpoints",
},
Annotations: map[string]string{
"test.annotation": "test",
},
},
AddressType: v1.AddressTypeIPv4,
Ports: []v1.EndpointPort{
@ -90,6 +93,17 @@ func makeEndpointSliceV1() *v1.EndpointSlice {
Serving: boolptr(true),
Terminating: boolptr(true),
},
}, {
Addresses: []string{"4.5.6.7"},
Conditions: v1.EndpointConditions{
Ready: boolptr(true),
Serving: boolptr(true),
Terminating: boolptr(false),
},
TargetRef: &corev1.ObjectReference{
Kind: "Node",
Name: "barbaz",
},
},
},
}
@ -103,6 +117,9 @@ func makeEndpointSliceV1beta1() *v1beta1.EndpointSlice {
Labels: map[string]string{
v1beta1.LabelServiceName: "testendpoints",
},
Annotations: map[string]string{
"test.annotation": "test",
},
},
AddressType: v1beta1.AddressTypeIPv4,
Ports: []v1beta1.EndpointPort{
@ -130,6 +147,17 @@ func makeEndpointSliceV1beta1() *v1beta1.EndpointSlice {
Serving: boolptr(true),
Terminating: boolptr(true),
},
}, {
Addresses: []string{"4.5.6.7"},
Conditions: v1beta1.EndpointConditions{
Ready: boolptr(true),
Serving: boolptr(true),
Terminating: boolptr(false),
},
TargetRef: &corev1.ObjectReference{
Kind: "Node",
Name: "barbaz",
},
},
},
}
@ -183,11 +211,27 @@ func TestEndpointSliceDiscoveryBeforeRun(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
},
Source: "endpointslice/default/testendpoints",
},
@ -233,11 +277,26 @@ func TestEndpointSliceDiscoveryBeforeRunV1beta1(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
},
Source: "endpointslice/default/testendpoints",
},
@ -419,11 +478,27 @@ func TestEndpointSliceDiscoveryDelete(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: map[model.LabelName]model.LabelValue{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
},
},
},
@ -503,11 +578,27 @@ func TestEndpointSliceDiscoveryUpdate(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
},
},
},
@ -576,11 +667,27 @@ func TestEndpointSliceDiscoveryEmptyEndpoints(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
},
Source: "endpointslice/default/testendpoints",
},
@ -644,14 +751,30 @@ func TestEndpointSliceDiscoveryWithService(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
},
Source: "endpointslice/default/testendpoints",
},
@ -728,16 +851,32 @@ func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) {
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "svc",
"__meta_kubernetes_service_label_component": "testing",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_labelpresent_component": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "svc",
"__meta_kubernetes_service_label_component": "testing",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_labelpresent_component": "true",
"__meta_kubernetes_service_name": "testendpoints",
},
Source: "endpointslice/default/testendpoints",
},
@ -747,7 +886,8 @@ func TestEndpointSliceDiscoveryWithServiceUpdate(t *testing.T) {
func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) {
metadataConfig := AttachMetadataConfig{Node: true}
nodeLabels := map[string]string{"az": "us-east1"}
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
svc := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
@ -757,7 +897,7 @@ func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) {
},
},
}
objs := []runtime.Object{makeEndpointSliceV1(), makeNode("foobar", "", "", nodeLabels, nil), svc}
objs := []runtime.Object{makeEndpointSliceV1(), makeNode("foobar", "", "", nodeLabels1, nil), makeNode("barbaz", "", "", nodeLabels2, nil), svc}
n, _ := makeDiscoveryWithMetadata(RoleEndpointSlice, NamespaceDiscovery{}, metadataConfig, objs...)
k8sDiscoveryTest{
@ -804,14 +944,33 @@ func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_node_label_az": "us-west2",
"__meta_kubernetes_node_labelpresent_az": "true",
"__meta_kubernetes_node_name": "barbaz",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
},
Source: "endpointslice/default/testendpoints",
},
@ -821,7 +980,8 @@ func TestEndpointsSlicesDiscoveryWithNodeMetadata(t *testing.T) {
func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
metadataConfig := AttachMetadataConfig{Node: true}
nodeLabels := map[string]string{"az": "us-east1"}
nodeLabels1 := map[string]string{"az": "us-east1"}
nodeLabels2 := map[string]string{"az": "us-west2"}
svc := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints",
@ -831,16 +991,17 @@ func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
},
},
}
node := makeNode("foobar", "", "", nodeLabels, nil)
objs := []runtime.Object{makeEndpointSliceV1(), node, svc}
node1 := makeNode("foobar", "", "", nodeLabels1, nil)
node2 := makeNode("barbaz", "", "", nodeLabels2, nil)
objs := []runtime.Object{makeEndpointSliceV1(), node1, node2, svc}
n, c := makeDiscoveryWithMetadata(RoleEndpointSlice, NamespaceDiscovery{}, metadataConfig, objs...)
k8sDiscoveryTest{
discovery: n,
expectedMaxItems: 2,
afterStart: func() {
node.Labels["az"] = "us-central1"
c.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
node1.Labels["az"] = "us-central1"
c.CoreV1().Nodes().Update(context.Background(), node1, metav1.UpdateOptions{})
},
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testendpoints": {
@ -859,7 +1020,7 @@ func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_node_label_az": "us-central1",
"__meta_kubernetes_node_label_az": "us-east1",
"__meta_kubernetes_node_labelpresent_az": "true",
"__meta_kubernetes_node_name": "foobar",
},
@ -883,14 +1044,33 @@ func TestEndpointsSlicesDiscoveryWithUpdatedNodeMetadata(t *testing.T) {
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_node_label_az": "us-west2",
"__meta_kubernetes_node_labelpresent_az": "true",
"__meta_kubernetes_node_name": "barbaz",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints",
},
Source: "endpointslice/default/testendpoints",
},
@ -1007,14 +1187,30 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "ns1",
"__meta_kubernetes_service_label_app": "app1",
"__meta_kubernetes_service_labelpresent_app": "true",
"__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
"__meta_kubernetes_namespace": "ns1",
"__meta_kubernetes_service_label_app": "app1",
"__meta_kubernetes_service_labelpresent_app": "true",
"__meta_kubernetes_service_name": "testendpoints",
},
Source: "endpointslice/ns1/testendpoints",
},
@ -1139,14 +1335,73 @@ func TestEndpointSliceDiscoveryOwnNamespace(t *testing.T) {
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
},
{
"__address__": "4.5.6.7:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Node",
"__meta_kubernetes_endpointslice_address_target_name": "barbaz",
"__meta_kubernetes_endpointslice_endpoint_conditions_ready": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_serving": "true",
"__meta_kubernetes_endpointslice_endpoint_conditions_terminating": "false",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_app_protocol": "http",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "own-ns",
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testendpoints",
"__meta_kubernetes_namespace": "own-ns",
"__meta_kubernetes_endpointslice_label_kubernetes_io_service_name": "testendpoints",
"__meta_kubernetes_endpointslice_labelpresent_kubernetes_io_service_name": "true",
"__meta_kubernetes_endpointslice_annotation_test_annotation": "test",
"__meta_kubernetes_endpointslice_annotationpresent_test_annotation": "true",
},
Source: "endpointslice/own-ns/testendpoints",
},
},
}.Run(t)
}
func TestEndpointSliceDiscoveryEmptyPodStatus(t *testing.T) {
ep := makeEndpointSliceV1()
ep.Namespace = "ns"
pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Namespace: "ns",
UID: types.UID("deadbeef"),
},
Spec: corev1.PodSpec{
NodeName: "testnode",
Containers: []corev1.Container{
{
Name: "p1",
Image: "p1:latest",
Ports: []corev1.ContainerPort{
{
Name: "mainport",
ContainerPort: 9000,
Protocol: corev1.ProtocolTCP,
},
},
},
},
},
Status: corev1.PodStatus{},
}
objs := []runtime.Object{
ep,
pod,
}
n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{IncludeOwnNamespace: true}, objs...)
k8sDiscoveryTest{
discovery: n,
expectedMaxItems: 0,
expectedRes: map[string]*targetgroup.Group{},
}.Run(t)
}

View file

@ -21,6 +21,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
@ -28,13 +29,6 @@ import (
"k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
var (
ingressAddCount = eventCount.WithLabelValues("ingress", "add")
ingressUpdateCount = eventCount.WithLabelValues("ingress", "update")
ingressDeleteCount = eventCount.WithLabelValues("ingress", "delete")
)
// Ingress implements discovery of Kubernetes ingress.
@ -46,8 +40,18 @@ type Ingress struct {
}
// NewIngress returns a new ingress discovery.
func NewIngress(l log.Logger, inf cache.SharedInformer) *Ingress {
s := &Ingress{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")}
func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd)
ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate)
ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete)
s := &Ingress{
logger: l,
informer: inf,
store: inf.GetStore(),
queue: workqueue.NewNamed(RoleIngress.String()),
}
_, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
ingressAddCount.Inc()
@ -89,7 +93,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for i.process(ctx, ch) { // nolint:revive
for i.process(ctx, ch) {
}
}()
@ -143,37 +147,22 @@ func ingressSourceFromNamespaceAndName(namespace, name string) string {
}
const (
ingressNameLabel = metaLabelPrefix + "ingress_name"
ingressLabelPrefix = metaLabelPrefix + "ingress_label_"
ingressLabelPresentPrefix = metaLabelPrefix + "ingress_labelpresent_"
ingressAnnotationPrefix = metaLabelPrefix + "ingress_annotation_"
ingressAnnotationPresentPrefix = metaLabelPrefix + "ingress_annotationpresent_"
ingressSchemeLabel = metaLabelPrefix + "ingress_scheme"
ingressHostLabel = metaLabelPrefix + "ingress_host"
ingressPathLabel = metaLabelPrefix + "ingress_path"
ingressClassNameLabel = metaLabelPrefix + "ingress_class_name"
ingressSchemeLabel = metaLabelPrefix + "ingress_scheme"
ingressHostLabel = metaLabelPrefix + "ingress_host"
ingressPathLabel = metaLabelPrefix + "ingress_path"
ingressClassNameLabel = metaLabelPrefix + "ingress_class_name"
)
func ingressLabels(ingress ingressAdaptor) model.LabelSet {
// Each label and annotation will create two key-value pairs in the map.
ls := make(model.LabelSet, 2*(len(ingress.labels())+len(ingress.annotations()))+2)
ls[ingressNameLabel] = lv(ingress.name())
ls := make(model.LabelSet)
ls[namespaceLabel] = lv(ingress.namespace())
if cls := ingress.ingressClassName(); cls != nil {
ls[ingressClassNameLabel] = lv(*cls)
}
for k, v := range ingress.labels() {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ingressLabelPrefix+ln)] = lv(v)
ls[model.LabelName(ingressLabelPresentPrefix+ln)] = presentValue
}
addObjectMetaLabels(ls, ingress.getObjectMeta(), RoleIngress)
for k, v := range ingress.annotations() {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ingressAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(ingressAnnotationPresentPrefix+ln)] = presentValue
}
return ls
}

View file

@ -16,10 +16,12 @@ package kubernetes
import (
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// ingressAdaptor is an adaptor for the different Ingress versions
// ingressAdaptor is an adaptor for the different Ingress versions.
type ingressAdaptor interface {
getObjectMeta() metav1.ObjectMeta
name() string
namespace() string
labels() map[string]string
@ -34,7 +36,7 @@ type ingressRuleAdaptor interface {
host() string
}
// Adaptor for networking.k8s.io/v1
// Adaptor for networking.k8s.io/v1.
type ingressAdaptorV1 struct {
ingress *v1.Ingress
}
@ -43,11 +45,12 @@ func newIngressAdaptorFromV1(ingress *v1.Ingress) ingressAdaptor {
return &ingressAdaptorV1{ingress: ingress}
}
func (i *ingressAdaptorV1) name() string { return i.ingress.Name }
func (i *ingressAdaptorV1) namespace() string { return i.ingress.Namespace }
func (i *ingressAdaptorV1) labels() map[string]string { return i.ingress.Labels }
func (i *ingressAdaptorV1) annotations() map[string]string { return i.ingress.Annotations }
func (i *ingressAdaptorV1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
func (i *ingressAdaptorV1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta }
func (i *ingressAdaptorV1) name() string { return i.ingress.Name }
func (i *ingressAdaptorV1) namespace() string { return i.ingress.Namespace }
func (i *ingressAdaptorV1) labels() map[string]string { return i.ingress.Labels }
func (i *ingressAdaptorV1) annotations() map[string]string { return i.ingress.Annotations }
func (i *ingressAdaptorV1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
func (i *ingressAdaptorV1) tlsHosts() []string {
var hosts []string
@ -87,7 +90,7 @@ func (i *ingressRuleAdaptorV1) paths() []string {
func (i *ingressRuleAdaptorV1) host() string { return i.rule.Host }
// Adaptor for networking.k8s.io/v1beta1
// Adaptor for networking.k8s.io/v1beta1.
type ingressAdaptorV1Beta1 struct {
ingress *v1beta1.Ingress
}
@ -95,12 +98,12 @@ type ingressAdaptorV1Beta1 struct {
func newIngressAdaptorFromV1beta1(ingress *v1beta1.Ingress) ingressAdaptor {
return &ingressAdaptorV1Beta1{ingress: ingress}
}
func (i *ingressAdaptorV1Beta1) name() string { return i.ingress.Name }
func (i *ingressAdaptorV1Beta1) namespace() string { return i.ingress.Namespace }
func (i *ingressAdaptorV1Beta1) labels() map[string]string { return i.ingress.Labels }
func (i *ingressAdaptorV1Beta1) annotations() map[string]string { return i.ingress.Annotations }
func (i *ingressAdaptorV1Beta1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
func (i *ingressAdaptorV1Beta1) getObjectMeta() metav1.ObjectMeta { return i.ingress.ObjectMeta }
func (i *ingressAdaptorV1Beta1) name() string { return i.ingress.Name }
func (i *ingressAdaptorV1Beta1) namespace() string { return i.ingress.Namespace }
func (i *ingressAdaptorV1Beta1) labels() map[string]string { return i.ingress.Labels }
func (i *ingressAdaptorV1Beta1) annotations() map[string]string { return i.ingress.Annotations }
func (i *ingressAdaptorV1Beta1) ingressClassName() *string { return i.ingress.Spec.IngressClassName }
func (i *ingressAdaptorV1Beta1) tlsHosts() []string {
var hosts []string

View file

@ -23,6 +23,8 @@ import (
"sync"
"time"
"github.com/prometheus/prometheus/util/strutil"
disv1beta1 "k8s.io/api/discovery/v1beta1"
"github.com/go-kit/log"
@ -56,25 +58,15 @@ import (
const (
// metaLabelPrefix is the meta prefix used for all meta labels.
// in this discovery.
metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
namespaceLabel = metaLabelPrefix + "namespace"
metricsNamespace = "prometheus_sd_kubernetes"
presentValue = model.LabelValue("true")
metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
namespaceLabel = metaLabelPrefix + "namespace"
presentValue = model.LabelValue("true")
)
var (
// Http header
// Http header.
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
// Custom events metric
eventCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Name: "events_total",
Help: "The number of Kubernetes events handled.",
},
[]string{"role", "event"},
)
// DefaultSDConfig is the default Kubernetes SD configuration
// DefaultSDConfig is the default Kubernetes SD configuration.
DefaultSDConfig = SDConfig{
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
@ -82,15 +74,6 @@ var (
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(eventCount)
// Initialize metric vectors.
for _, role := range []string{"endpointslice", "endpoints", "node", "pod", "service", "ingress"} {
for _, evt := range []string{"add", "delete", "update"} {
eventCount.WithLabelValues(role, evt)
}
}
(&clientGoRequestMetricAdapter{}).Register(prometheus.DefaultRegisterer)
(&clientGoWorkqueueMetricsProvider{}).Register(prometheus.DefaultRegisterer)
}
// Role is role of the service in Kubernetes.
@ -119,6 +102,16 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
}
func (c Role) String() string {
return string(c)
}
const (
MetricLabelRoleAdd = "add"
MetricLabelRoleDelete = "delete"
MetricLabelRoleUpdate = "update"
)
// SDConfig is the configuration for Kubernetes service discovery.
type SDConfig struct {
APIServer config.URL `yaml:"api_server,omitempty"`
@ -135,7 +128,7 @@ func (*SDConfig) Name() string { return "kubernetes" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return New(opts.Logger, c)
return New(opts.Logger, opts.Registerer, c)
}
// SetDirectory joins any relative file paths with dir.
@ -272,6 +265,8 @@ type Discovery struct {
selectors roleSelector
ownNamespace string
attachMetadata AttachMetadataConfig
eventCount *prometheus.CounterVec
metricRegisterer discovery.MetricRegisterer
}
func (d *Discovery) getNamespaces() []string {
@ -290,7 +285,7 @@ func (d *Discovery) getNamespaces() []string {
}
// New creates a new Kubernetes discovery for the given role.
func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
func New(l log.Logger, reg prometheus.Registerer, conf *SDConfig) (*Discovery, error) {
if l == nil {
l = log.NewNopLogger()
}
@ -344,7 +339,7 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
return nil, err
}
return &Discovery{
d := &Discovery{
client: c,
logger: l,
role: conf.Role,
@ -353,7 +348,37 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
selectors: mapSelector(conf.Selectors),
ownNamespace: ownNamespace,
attachMetadata: conf.AttachMetadata,
}, nil
eventCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: discovery.KubernetesMetricsNamespace,
Name: "events_total",
Help: "The number of Kubernetes events handled.",
},
[]string{"role", "event"},
),
}
d.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{d.eventCount})
// Initialize metric vectors.
for _, role := range []string{
RoleEndpointSlice.String(),
RoleEndpoint.String(),
RoleNode.String(),
RolePod.String(),
RoleService.String(),
RoleIngress.String(),
} {
for _, evt := range []string{
MetricLabelRoleAdd,
MetricLabelRoleDelete,
MetricLabelRoleUpdate,
} {
d.eventCount.WithLabelValues(role, evt)
}
}
return d, nil
}
func mapSelector(rawSelector []SelectorConfig) roleSelector {
@ -389,6 +414,14 @@ const resyncDisabled = 0
// Run implements the discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
d.Lock()
err := d.metricRegisterer.RegisterMetrics()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
return
}
defer d.metricRegisterer.UnregisterMetrics()
namespaces := d.getNamespaces()
switch d.role {
@ -480,6 +513,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
nodeInf,
d.eventCount,
)
d.discoverers = append(d.discoverers, eps)
go eps.endpointSliceInf.Run(ctx.Done())
@ -539,6 +573,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
nodeInf,
d.eventCount,
)
d.discoverers = append(d.discoverers, eps)
go eps.endpointsInf.Run(ctx.Done())
@ -570,6 +605,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
log.With(d.logger, "role", "pod"),
d.newPodsByNodeInformer(plw),
nodeInformer,
d.eventCount,
)
d.discoverers = append(d.discoverers, pod)
go pod.podInf.Run(ctx.Done())
@ -592,6 +628,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
svc := NewService(
log.With(d.logger, "role", "service"),
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.eventCount,
)
d.discoverers = append(d.discoverers, svc)
go svc.informer.Run(ctx.Done())
@ -649,13 +686,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
ingress := NewIngress(
log.With(d.logger, "role", "ingress"),
informer,
d.eventCount,
)
d.discoverers = append(d.discoverers, ingress)
go ingress.informer.Run(ctx.Done())
}
case RoleNode:
nodeInformer := d.newNodeInformer(ctx)
node := NewNode(log.With(d.logger, "role", "node"), nodeInformer)
node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.eventCount)
d.discoverers = append(d.discoverers, node)
go node.informer.Run(ctx.Done())
default:
@ -761,15 +799,21 @@ func (d *Discovery) newEndpointsByNodeInformer(plw *cache.ListWatch) cache.Share
indexers[nodeIndex] = func(obj interface{}) ([]string, error) {
e, ok := obj.(*apiv1.Endpoints)
if !ok {
return nil, fmt.Errorf("object is not a pod")
return nil, fmt.Errorf("object is not endpoints")
}
var nodes []string
for _, target := range e.Subsets {
for _, addr := range target.Addresses {
if addr.NodeName == nil {
continue
if addr.TargetRef != nil {
switch addr.TargetRef.Kind {
case "Pod":
if addr.NodeName != nil {
nodes = append(nodes, *addr.NodeName)
}
case "Node":
nodes = append(nodes, addr.TargetRef.Name)
}
}
nodes = append(nodes, *addr.NodeName)
}
}
return nodes, nil
@ -789,17 +833,29 @@ func (d *Discovery) newEndpointSlicesByNodeInformer(plw *cache.ListWatch, object
switch e := obj.(type) {
case *disv1.EndpointSlice:
for _, target := range e.Endpoints {
if target.NodeName == nil {
continue
if target.TargetRef != nil {
switch target.TargetRef.Kind {
case "Pod":
if target.NodeName != nil {
nodes = append(nodes, *target.NodeName)
}
case "Node":
nodes = append(nodes, target.TargetRef.Name)
}
}
nodes = append(nodes, *target.NodeName)
}
case *disv1beta1.EndpointSlice:
for _, target := range e.Endpoints {
if target.NodeName == nil {
continue
if target.TargetRef != nil {
switch target.TargetRef.Kind {
case "Pod":
if target.NodeName != nil {
nodes = append(nodes, *target.NodeName)
}
case "Node":
nodes = append(nodes, target.TargetRef.Name)
}
}
nodes = append(nodes, *target.NodeName)
}
default:
return nil, fmt.Errorf("object is not an endpointslice")
@ -825,3 +881,19 @@ func checkDiscoveryV1Supported(client kubernetes.Interface) (bool, error) {
// https://kubernetes.io/docs/reference/using-api/deprecation-guide/#v1-25
return semVer.Major() >= 1 && semVer.Minor() >= 21, nil
}
func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta, role Role) {
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_name")] = lv(objectMeta.Name)
for k, v := range objectMeta.Labels {
ln := strutil.SanitizeLabelName(k)
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_label_"+ln)] = lv(v)
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_labelpresent_"+ln)] = presentValue
}
for k, v := range objectMeta.Annotations {
ln := strutil.SanitizeLabelName(k)
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_annotation_"+ln)] = lv(v)
labelSet[model.LabelName(metaLabelPrefix+string(role)+"_annotationpresent_"+ln)] = presentValue
}
}

View file

@ -29,6 +29,8 @@ import (
"k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/cache"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil"
@ -49,13 +51,25 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer
fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery)
fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: k8sVer}
return &Discovery{
d := &Discovery{
client: clientset,
logger: log.NewNopLogger(),
role: role,
namespaceDiscovery: &nsDiscovery,
ownNamespace: "own-ns",
}, clientset
eventCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: discovery.KubernetesMetricsNamespace,
Name: "events_total",
Help: "The number of Kubernetes events handled.",
},
[]string{"role", "event"},
),
}
d.metricRegisterer = discovery.NewMetricRegisterer(prometheus.NewRegistry(), []prometheus.Collector{d.eventCount})
return d, clientset
}
// makeDiscoveryWithMetadata creates a kubernetes.Discovery instance with the specified metadata config.

View file

@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@ -35,12 +36,6 @@ const (
NodeLegacyHostIP = "LegacyHostIP"
)
var (
nodeAddCount = eventCount.WithLabelValues("node", "add")
nodeUpdateCount = eventCount.WithLabelValues("node", "update")
nodeDeleteCount = eventCount.WithLabelValues("node", "delete")
)
// Node discovers Kubernetes nodes.
type Node struct {
logger log.Logger
@ -50,11 +45,22 @@ type Node struct {
}
// NewNode returns a new node discovery.
func NewNode(l log.Logger, inf cache.SharedInformer) *Node {
func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
if l == nil {
l = log.NewNopLogger()
}
n := &Node{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("node")}
nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd)
nodeUpdateCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleUpdate)
nodeDeleteCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleDelete)
n := &Node{
logger: l,
informer: inf,
store: inf.GetStore(),
queue: workqueue.NewNamed(RoleNode.String()),
}
_, err := n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
nodeAddCount.Inc()
@ -96,7 +102,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for n.process(ctx, ch) { // nolint:revive
for n.process(ctx, ch) {
}
}()
@ -152,33 +158,18 @@ func nodeSourceFromName(name string) string {
}
const (
nodeNameLabel = metaLabelPrefix + "node_name"
nodeProviderIDLabel = metaLabelPrefix + "node_provider_id"
nodeLabelPrefix = metaLabelPrefix + "node_label_"
nodeLabelPresentPrefix = metaLabelPrefix + "node_labelpresent_"
nodeAnnotationPrefix = metaLabelPrefix + "node_annotation_"
nodeAnnotationPresentPrefix = metaLabelPrefix + "node_annotationpresent_"
nodeAddressPrefix = metaLabelPrefix + "node_address_"
nodeProviderIDLabel = metaLabelPrefix + "node_provider_id"
nodeAddressPrefix = metaLabelPrefix + "node_address_"
)
func nodeLabels(n *apiv1.Node) model.LabelSet {
// Each label and annotation will create two key-value pairs in the map.
ls := make(model.LabelSet, 2*(len(n.Labels)+len(n.Annotations))+1)
ls := make(model.LabelSet)
ls[nodeNameLabel] = lv(n.Name)
ls[nodeProviderIDLabel] = lv(n.Spec.ProviderID)
for k, v := range n.Labels {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(nodeLabelPrefix+ln)] = lv(v)
ls[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue
}
addObjectMetaLabels(ls, n.ObjectMeta, RoleNode)
for k, v := range n.Annotations {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(nodeAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(nodeAnnotationPresentPrefix+ln)] = presentValue
}
return ls
}
@ -209,7 +200,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group {
return tg
}
// nodeAddresses returns the provided node's address, based on the priority:
// nodeAddress returns the provided node's address, based on the priority:
// 1. NodeInternalIP
// 2. NodeInternalDNS
// 3. NodeExternalIP
@ -217,7 +208,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group {
// 5. NodeLegacyHostIP
// 6. NodeHostName
//
// Derived from k8s.io/kubernetes/pkg/util/node/node.go
// Derived from k8s.io/kubernetes/pkg/util/node/node.go.
func nodeAddress(node *apiv1.Node) (string, map[apiv1.NodeAddressType][]string, error) {
m := map[apiv1.NodeAddressType][]string{}
for _, a := range node.Status.Addresses {

View file

@ -23,6 +23,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -30,17 +31,10 @@ import (
"k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
const nodeIndex = "node"
var (
podAddCount = eventCount.WithLabelValues("pod", "add")
podUpdateCount = eventCount.WithLabelValues("pod", "update")
podDeleteCount = eventCount.WithLabelValues("pod", "delete")
)
// Pod discovers new pod targets.
type Pod struct {
podInf cache.SharedIndexInformer
@ -52,18 +46,22 @@ type Pod struct {
}
// NewPod creates a new pod discovery.
func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer) *Pod {
func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
if l == nil {
l = log.NewNopLogger()
}
podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd)
podDeleteCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleDelete)
podUpdateCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleUpdate)
p := &Pod{
podInf: pods,
nodeInf: nodes,
withNodeMetadata: nodes != nil,
store: pods.GetStore(),
logger: l,
queue: workqueue.NewNamed("pod"),
queue: workqueue.NewNamed(RolePod.String()),
}
_, err := p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
@ -132,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for p.process(ctx, ch) { // nolint:revive
for p.process(ctx, ch) {
}
}()
@ -180,7 +178,6 @@ func convertToPod(o interface{}) (*apiv1.Pod, error) {
}
const (
podNameLabel = metaLabelPrefix + "pod_name"
podIPLabel = metaLabelPrefix + "pod_ip"
podContainerNameLabel = metaLabelPrefix + "pod_container_name"
podContainerIDLabel = metaLabelPrefix + "pod_container_id"
@ -191,10 +188,6 @@ const (
podContainerIsInit = metaLabelPrefix + "pod_container_init"
podReadyLabel = metaLabelPrefix + "pod_ready"
podPhaseLabel = metaLabelPrefix + "pod_phase"
podLabelPrefix = metaLabelPrefix + "pod_label_"
podLabelPresentPrefix = metaLabelPrefix + "pod_labelpresent_"
podAnnotationPrefix = metaLabelPrefix + "pod_annotation_"
podAnnotationPresentPrefix = metaLabelPrefix + "pod_annotationpresent_"
podNodeNameLabel = metaLabelPrefix + "pod_node_name"
podHostIPLabel = metaLabelPrefix + "pod_host_ip"
podUID = metaLabelPrefix + "pod_uid"
@ -215,7 +208,6 @@ func GetControllerOf(controllee metav1.Object) *metav1.OwnerReference {
func podLabels(pod *apiv1.Pod) model.LabelSet {
ls := model.LabelSet{
podNameLabel: lv(pod.ObjectMeta.Name),
podIPLabel: lv(pod.Status.PodIP),
podReadyLabel: podReady(pod),
podPhaseLabel: lv(string(pod.Status.Phase)),
@ -224,6 +216,8 @@ func podLabels(pod *apiv1.Pod) model.LabelSet {
podUID: lv(string(pod.ObjectMeta.UID)),
}
addObjectMetaLabels(ls, pod.ObjectMeta, RolePod)
createdBy := GetControllerOf(pod)
if createdBy != nil {
if createdBy.Kind != "" {
@ -234,18 +228,6 @@ func podLabels(pod *apiv1.Pod) model.LabelSet {
}
}
for k, v := range pod.Labels {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(podLabelPrefix+ln)] = lv(v)
ls[model.LabelName(podLabelPresentPrefix+ln)] = presentValue
}
for k, v := range pod.Annotations {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(podAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(podAnnotationPresentPrefix+ln)] = presentValue
}
return ls
}

View file

@ -22,19 +22,13 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
)
var (
svcAddCount = eventCount.WithLabelValues("service", "add")
svcUpdateCount = eventCount.WithLabelValues("service", "update")
svcDeleteCount = eventCount.WithLabelValues("service", "delete")
)
// Service implements discovery of Kubernetes services.
@ -46,11 +40,22 @@ type Service struct {
}
// NewService returns a new service discovery.
func NewService(l log.Logger, inf cache.SharedInformer) *Service {
func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
if l == nil {
l = log.NewNopLogger()
}
s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")}
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
s := &Service{
logger: l,
informer: inf,
store: inf.GetStore(),
queue: workqueue.NewNamed(RoleService.String()),
}
_, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
svcAddCount.Inc()
@ -92,7 +97,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for s.process(ctx, ch) { // nolint:revive
for s.process(ctx, ch) {
}
}()
@ -147,38 +152,20 @@ func serviceSourceFromNamespaceAndName(namespace, name string) string {
}
const (
serviceNameLabel = metaLabelPrefix + "service_name"
serviceLabelPrefix = metaLabelPrefix + "service_label_"
serviceLabelPresentPrefix = metaLabelPrefix + "service_labelpresent_"
serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_"
serviceAnnotationPresentPrefix = metaLabelPrefix + "service_annotationpresent_"
servicePortNameLabel = metaLabelPrefix + "service_port_name"
servicePortNumberLabel = metaLabelPrefix + "service_port_number"
servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol"
serviceClusterIPLabel = metaLabelPrefix + "service_cluster_ip"
serviceLoadBalancerIP = metaLabelPrefix + "service_loadbalancer_ip"
serviceExternalNameLabel = metaLabelPrefix + "service_external_name"
serviceType = metaLabelPrefix + "service_type"
servicePortNameLabel = metaLabelPrefix + "service_port_name"
servicePortNumberLabel = metaLabelPrefix + "service_port_number"
servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol"
serviceClusterIPLabel = metaLabelPrefix + "service_cluster_ip"
serviceLoadBalancerIP = metaLabelPrefix + "service_loadbalancer_ip"
serviceExternalNameLabel = metaLabelPrefix + "service_external_name"
serviceType = metaLabelPrefix + "service_type"
)
func serviceLabels(svc *apiv1.Service) model.LabelSet {
// Each label and annotation will create two key-value pairs in the map.
ls := make(model.LabelSet, 2*(len(svc.Labels)+len(svc.Annotations))+2)
ls[serviceNameLabel] = lv(svc.Name)
ls := make(model.LabelSet)
ls[namespaceLabel] = lv(svc.Namespace)
addObjectMetaLabels(ls, svc.ObjectMeta, RoleService)
for k, v := range svc.Labels {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(serviceLabelPrefix+ln)] = lv(v)
ls[model.LabelName(serviceLabelPresentPrefix+ln)] = presentValue
}
for k, v := range svc.Annotations {
ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(serviceAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(serviceAnnotationPresentPrefix+ln)] = presentValue
}
return ls
}

View file

@ -28,48 +28,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
failedConfigs = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
},
[]string{"name"},
)
discoveredTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_discovered_targets",
Help: "Current number of discovered targets.",
},
[]string{"name", "config"},
)
receivedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.",
},
[]string{"name"},
)
delayedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.",
},
[]string{"name"},
)
sentUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.",
},
[]string{"name"},
)
)
func RegisterMetrics() {
prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates)
}
type poolKey struct {
setName string
provider string
@ -84,7 +42,7 @@ type provider struct {
}
// NewManager is the Discovery Manager constructor.
func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager {
func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
}
@ -96,10 +54,21 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager
ctx: ctx,
updatert: 5 * time.Second,
triggerSend: make(chan struct{}, 1),
registerer: registerer,
}
for _, option := range options {
option(mgr)
}
// Register the metrics.
// We have to do this after setting all options, so that the name of the Manager is set.
if metrics, err := discovery.NewMetrics(registerer, mgr.name); err == nil {
mgr.metrics = metrics
} else {
level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
return nil
}
return mgr
}
@ -135,16 +104,19 @@ type Manager struct {
// The triggerSend channel signals to the manager that new updates have been received from providers.
triggerSend chan struct{}
// A registerer for all service discovery metrics.
registerer prometheus.Registerer
metrics *discovery.Metrics
}
// Run starts the background processing
// Run starts the background processing.
func (m *Manager) Run() error {
go m.sender()
for range m.ctx.Done() {
m.cancelDiscoverers()
return m.ctx.Err()
}
return nil
<-m.ctx.Done()
m.cancelDiscoverers()
return m.ctx.Err()
}
// SyncCh returns a read only channel used by all the clients to receive target updates.
@ -159,7 +131,7 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
for pk := range m.targets {
if _, ok := cfg[pk.setName]; !ok {
discoveredTargets.DeleteLabelValues(m.name, pk.setName)
m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName)
}
}
m.cancelDiscoverers()
@ -170,9 +142,9 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
failedCount := 0
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
discoveredTargets.WithLabelValues(m.name, name).Set(0)
m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0)
}
failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
m.metrics.FailedConfigs.Set(float64(failedCount))
for _, prov := range m.providers {
m.startProvider(m.ctx, prov)
@ -209,7 +181,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ
case <-ctx.Done():
return
case tgs, ok := <-updates:
receivedUpdates.WithLabelValues(m.name).Inc()
m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
return
@ -238,11 +210,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
select {
case <-m.triggerSend:
sentUpdates.WithLabelValues(m.name).Inc()
m.metrics.SentUpdates.Inc()
select {
case m.syncCh <- m.allGroups():
default:
delayedUpdates.WithLabelValues(m.name).Inc()
m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
@ -290,7 +262,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
}
}
for setName, v := range n {
discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v))
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}
@ -311,7 +283,8 @@ func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int
}
typ := cfg.Name()
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{
Logger: log.With(m.logger, "discovery", typ, "config", setName),
Logger: log.With(m.logger, "discovery", typ, "config", setName),
Registerer: m.registerer,
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -664,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
@ -746,7 +748,8 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou
func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -774,7 +777,8 @@ func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
func TestDiscovererConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -798,7 +802,8 @@ func TestDiscovererConfigs(t *testing.T) {
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -837,7 +842,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, nil)
discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -868,7 +874,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -893,7 +900,8 @@ func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Disco
func TestGaugeFailedConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -907,7 +915,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount := client_testutil.ToFloat64(failedConfigs)
failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
@ -918,7 +926,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount = client_testutil.ToFloat64(failedConfigs)
failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
@ -1049,7 +1057,8 @@ func TestCoordinationWithReceiver(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
mgr := NewManager(ctx, nil)
mgr := NewManager(ctx, nil, prometheus.NewRegistry())
require.NotNil(t, mgr)
mgr.updatert = updateDelay
go mgr.Run()

View file

@ -51,6 +51,7 @@ const (
linodeLabelStatus = linodeLabel + "status"
linodeLabelTags = linodeLabel + "tags"
linodeLabelGroup = linodeLabel + "group"
linodeLabelGPUs = linodeLabel + "gpus"
linodeLabelHypervisor = linodeLabel + "hypervisor"
linodeLabelBackups = linodeLabel + "backups"
linodeLabelSpecsDiskBytes = linodeLabel + "specs_disk_bytes"
@ -66,24 +67,15 @@ const (
)
// DefaultSDConfig is the default Linode SD configuration.
var (
DefaultSDConfig = SDConfig{
TagSeparator: ",",
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_linode_failures_total",
Help: "Number of Linode service discovery refresh failures.",
})
)
var DefaultSDConfig = SDConfig{
TagSeparator: ",",
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for Linode based service discovery.
@ -100,7 +92,7 @@ func (*SDConfig) Name() string { return "linode" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -130,16 +122,22 @@ type Discovery struct {
pollCount int
lastResults []*targetgroup.Group
eventPollingEnabled bool
failuresCount prometheus.Counter
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
tagSeparator: conf.TagSeparator,
pollCount: 0,
lastRefreshTimestamp: time.Now().UTC(),
eventPollingEnabled: true,
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_linode_failures_total",
Help: "Number of Linode service discovery refresh failures.",
}),
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "linode_sd")
@ -157,10 +155,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
d.client = &client
d.Discovery = refresh.NewDiscovery(
logger,
"linode",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "linode",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount},
},
)
return d, nil
}
@ -221,14 +223,14 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro
// Gather all linode instances.
instances, err := d.client.ListInstances(ctx, &linodego.ListOptions{PageSize: 500})
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
// Gather detailed IP address info for all IPs on all linode instances.
detailedIPs, err := d.client.ListIPAddresses(ctx, &linodego.ListOptions{PageSize: 500})
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
@ -302,12 +304,13 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro
linodeLabelType: model.LabelValue(instance.Type),
linodeLabelStatus: model.LabelValue(instance.Status),
linodeLabelGroup: model.LabelValue(instance.Group),
linodeLabelGPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.GPUs)),
linodeLabelHypervisor: model.LabelValue(instance.Hypervisor),
linodeLabelBackups: model.LabelValue(backupsStatus),
linodeLabelSpecsDiskBytes: model.LabelValue(fmt.Sprintf("%d", instance.Specs.Disk<<20)),
linodeLabelSpecsMemoryBytes: model.LabelValue(fmt.Sprintf("%d", instance.Specs.Memory<<20)),
linodeLabelSpecsDiskBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Disk)<<20)),
linodeLabelSpecsMemoryBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Memory)<<20)),
linodeLabelSpecsVCPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.VCPUs)),
linodeLabelSpecsTransferBytes: model.LabelValue(fmt.Sprintf("%d", instance.Specs.Transfer<<20)),
linodeLabelSpecsTransferBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Transfer)<<20)),
}
addr := net.JoinHostPort(publicIPv4, strconv.FormatUint(uint64(d.port), 10))

View file

@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -52,7 +53,7 @@ func TestLinodeSDRefresh(t *testing.T) {
Credentials: tokenID,
Type: "Bearer",
}
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
@ -61,12 +62,12 @@ func TestLinodeSDRefresh(t *testing.T) {
tgs, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 4, len(tg.Targets))
require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
@ -85,6 +86,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("85899345920"),
@ -109,6 +111,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("85899345920"),
@ -132,6 +135,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("53687091200"),
@ -155,6 +159,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("26843545600"),

Some files were not shown because too many files have changed in this diff Show more