Merge pull request #15325 from prometheus/merge-main-into-3.0

Merge main into 3.0
This commit is contained in:
Jan Fajerski 2024-11-04 14:09:12 +01:00 committed by GitHub
commit 60da9b88a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
394 changed files with 19279 additions and 8679 deletions

View file

@ -1,4 +1,4 @@
blank_issues_enabled: false
blank_issues_enabled: true
contact_links:
- name: Prometheus Community Support
url: https://prometheus.io/community/

View file

@ -12,8 +12,8 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: bufbuild/buf-setup-action@62ee92603c244ad0da98bab36a834a999a5329e6 # v1.43.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1

View file

@ -12,8 +12,8 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: bufbuild/buf-setup-action@54abbed4fe8d8d45173eca4798b0c39a53a7b658 # v1.39.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: bufbuild/buf-setup-action@62ee92603c244ad0da98bab36a834a999a5329e6 # v1.43.0
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@06f9dd823d873146471cfaaf108a993fe00e5325 # v1.1.1

View file

@ -12,13 +12,9 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
image: quay.io/prometheus/golang-builder:1.23-base
env:
# Preliminary fix to make Go tests with race detector not use too much memory,
# see https://github.com/prometheus/prometheus/issues/14858.
GOMEMLIMIT: 10GiB
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/setup_environment
with:
enable_npm: true
@ -33,8 +29,8 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/setup_environment
- run: go test --tags=dedupelabels ./...
- run: GOARCH=386 go test ./cmd/prometheus
@ -52,7 +48,7 @@ jobs:
# The go version in this image should be N-1 wrt test_go.
image: quay.io/prometheus/golang-builder:1.22-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- run: make build
# Don't run NPM build; don't run race-detector.
- run: make test GO_ONLY=1 test-flags=""
@ -66,8 +62,8 @@ jobs:
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/setup_environment
with:
enable_go: false
@ -83,7 +79,7 @@ jobs:
name: Go tests on Windows
runs-on: windows-latest
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: 1.23.x
@ -100,7 +96,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.23-base
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- run: go install ./cmd/promtool/.
- run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest
- run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
@ -125,8 +121,8 @@ jobs:
matrix:
thread: [ 0, 1, 2 ]
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/build
with:
promu_opts: "-p linux/amd64 -p windows/amd64 -p linux/arm64 -p darwin/amd64 -p darwin/arm64 -p linux/386"
@ -150,8 +146,8 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/build
with:
parallelism: 12
@ -173,7 +169,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Install Go
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
@ -186,7 +182,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Install Go
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
@ -212,8 +208,8 @@ jobs:
needs: [test_ui, test_go, test_go_more, test_go_oldest, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/publish_main
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -229,8 +225,8 @@ jobs:
||
(github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v3.'))
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- uses: ./.github/promci/actions/publish_release
with:
docker_hub_login: ${{ secrets.docker_hub_login }}
@ -244,10 +240,10 @@ jobs:
needs: [test_ui, codeql]
steps:
- name: Checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: prometheus/promci@45166329da36d74895901808f1c8c97efafc7f84 # v0.3.0
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: prometheus/promci@468927c440349ab56c4a1aafd453b312841503c2 # v0.4.4
- name: Install nodejs
uses: actions/setup-node@1e60f620b9541d16bece96c5465dc8ee9832be0b # v4.0.3
uses: actions/setup-node@0a44ba7841725637a19e28fa30b79a866c81b0a6 # v4.0.4
with:
node-version-file: "web/ui/.nvmrc"
registry-url: "https://registry.npmjs.org"

View file

@ -24,15 +24,15 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Initialize CodeQL
uses: github/codeql-action/init@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6
uses: github/codeql-action/init@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6
uses: github/codeql-action/autobuild@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@4dd16135b69a43b6c8efb853346f8437d92d3c93 # v3.26.6
uses: github/codeql-action/analyze@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # v3.26.10

View file

@ -18,7 +18,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Set docker hub repo name
run: echo "DOCKER_REPO_NAME=$(make docker-repo-name)" >> $GITHUB_ENV
- name: Push README to Dockerhub
@ -40,7 +40,7 @@ jobs:
if: github.repository_owner == 'prometheus' || github.repository_owner == 'prometheus-community' # Don't run this workflow on forks.
steps:
- name: git checkout
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Set quay.io org name
run: echo "DOCKER_REPO=$(echo quay.io/${GITHUB_REPOSITORY_OWNER} | tr -d '-')" >> $GITHUB_ENV
- name: Set quay.io repo name

View file

@ -21,7 +21,7 @@ jobs:
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts

View file

@ -13,7 +13,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- run: ./scripts/sync_repo_files.sh
env:
GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}

View file

@ -21,7 +21,7 @@ jobs:
steps:
- name: "Checkout code"
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # tag=v4.1.6
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # tag=v4.2.0
with:
persist-credentials: false
@ -37,7 +37,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # tag=v4.3.4
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # tag=v4.4.0
with:
name: SARIF file
path: results.sarif
@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@4dd16135b69a43b6c8efb853346f8437d92d3c93 # tag=v3.26.6
uses: github/codeql-action/upload-sarif@e2b3eafc8d227b0241d48be5f425d47c2d750a13 # tag=v3.26.10
with:
sarif_file: results.sarif

View file

@ -23,6 +23,7 @@ linters:
- usestdlibvars
- whitespace
- loggercheck
- sloglint
issues:
max-issues-per-linter: 0
@ -100,8 +101,6 @@ linters-settings:
- (net/http.ResponseWriter).Write
# No need to check for errors on server's shutdown.
- (*net/http.Server).Shutdown
# Never check for logger errors.
- (github.com/go-kit/log.Logger).Log
# Never check for rollback errors as Rollback() is called when a previous error was detected.
- (github.com/prometheus/prometheus/storage.Appender).Rollback
goimports:
@ -153,14 +152,4 @@ linters-settings:
disable:
- float-compare
- go-require
enable:
- bool-compare
- compares
- empty
- error-is-as
- error-nil
- expected-actual
- len
- require-error
- suite-dont-use-pkg
- suite-extra-assert-call
enable-all: true

View file

@ -1,5 +1,48 @@
# Changelog
## unreleased
* [CHANGE] Scraping: Remove implicit fallback to the Prometheus text format in case of invalid/missing Content-Type and fail the scrape instead. Add ability to specify a `fallback_scrape_protocol` in the scrape config. #15136
* [CHANGE] Remote-write: default enable_http2 to false. #15219
* [CHANGE] Scraping: normalize "le" and "quantile" label values upon ingestion. #15164
* [CHANGE] Scraping: config `scrape_classic_histograms` was renamed to `always_scrape_classic_histograms`. #15178
* [CHANGE] Config: remove expand-external-labels flag, expand external labels env vars by default. #14657
* [CHANGE] Disallow configuring AM with the v1 api. #13883
* [ENHANCEMENT] Scraping, rules: handle targets reappearing, or rules moving group, when out-of-order is enabled. #14710
* [ENHANCEMENT] Tools: add debug printouts to promtool rules unit testing #15196
* [ENHANCEMENT] Scraping: support Created-Timestamp feature on native histograms. #14694
* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to always ignore native histograms. #14941
* [BUGFIX] PromQL: Fix stddev+stdvar aggregations to treat Infinity consistently. #14941
* [BUGFIX] OTLP receiver: Preserve colons when generating metric names in suffix adding mode (this mode is always enabled, unless one uses Prometheus as a library). #15251
## 3.0.0-beta.1 / 2024-10-09
* [CHANGE] regexp `.` now matches all characters (performance improvement). #14505
* [CHANGE] `holt_winters` is now called `double_exponential_smoothing` and moves behind the [experimental-promql-functions feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#experimental-promql-functions). #14930
* [CHANGE] API: The OTLP receiver endpoint can now be enabled using `--web.enable-otlp-receiver` instead of `--enable-feature=otlp-write-receiver`. #14894
* [CHANGE] Prometheus will not add or remove port numbers from the target address. `no-default-scrape-port` feature flag removed. #14160
* [CHANGE] Logging: the format of log lines has changed a little, along with the adoption of Go's Structured Logging package. #14906
* [CHANGE] Don't create extra `_created` timeseries if feature-flag `created-timestamp-zero-ingestion' is enabled. #14738
* [CHANGE] Float literals and time durations being the same is now a stable fetaure. #15111
* [ENHANCEMENT] UI: Many fixes and improvements. #14898, #14899, #14907, #14908, #14912, #14913, #14914, #14931, #14940, #14945, #14946, #14972, #14981, #14982, #14994, #15096
* [ENHANCEMENT] UI: Web UI now displays notifications, e.g. when starting up and shutting down. #15082
* [ENHANCEMENT] PromQL: Introduce exponential interpolation for native histograms. #14677
* [ENHANCEMENT] TSDB: Add support for ingestion of out-of-order native histogram samples. #14850, #14546
* [ENHANCEMENT] Alerts: remove metrics for removed Alertmanagers. #13909
* [ENHANCEMENT] Kubernetes SD: Support sidecar containers in endpoint discovery. #14929
* [ENHANCEMENT] Consul SD: Support catalog filters. #11224
* [PERF] TSDB: Parallelize deletion of postings after head compaction. #14975
* [PERF] TSDB: Chunk encoding: shorten some write sequences. #14932
* [PERF] TSDB: Grow postings by doubling. #14721
* [PERF] Relabeling: Optimize adding a constant label pair. #12180
* [BUGFIX] Scraping: Unit was missing when using protobuf format. #15095
* [BUGFIX] PromQL: Only return "possible non-counter" annotation when `rate` returns points. #14910
* [BUGFIX] TSDB: Chunks could have one unnecessary zero byte at the end. #14854
* [BUGFIX] "superfluous response.WriteHeader call" messages in log. #14884
* [BUGFIX] PromQL: Unary negation of native histograms. #14821
* [BUGFIX] PromQL: Handle stale marker in native histogram series (e.g. if series goes away and comes back). #15025
* [BUGFIX] Autoreload: Reload invalid yaml files. #14947
## 3.0.0-beta.0 / 2024-09-05
Release 3.0.0-beta.0 includes new features such as a brand new UI and UTF-8 support enabled by default. As a new major version, several breaking changes are introduced. The breaking changes are mainly around the removal of deprecated feature flags and CLI arguments, and the full list can be found below. Most users should be able to try this release out of the box without any configuration changes.
@ -16,32 +59,43 @@ As is traditional with a beta release, we do **not** recommend users install 3.0
* [CHANGE] Agent mode has been promoted to stable. The feature flag `agent` has been removed. To run Prometheus in Agent mode, use the new `--agent` cmdline arg instead. #14747
* [CHANGE] Remove deprecated `remote-write-receiver`,`promql-at-modifier`, and `promql-negative-offset` feature flags. #13456, #14526
* [CHANGE] Remove deprecated `storage.tsdb.allow-overlapping-blocks`, `alertmanager.timeout`, and `storage.tsdb.retention` flags. #14640, #14643
* [FEATURE] Promtool: Allow additional labels to be added to blocks created from openmetrics. #14402
* [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200
* [FEATURE] Automatic reloading of the Prometheus configuration file at a specified interval #14769
* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706
* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612
* [ENHANCEMENT] Scrape: Only parse created timestamp if `created-timestamp-zero-ingestion` feature flag is enabled. This is as a lot of memory is used when parsing the created timestamp in the OM text format. #14815
* [ENHANCEMENT] Scrape: Add support for logging scrape failures to a specified file. #14734
* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379
* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477
* [ENHANCEMENT] Move AM discovery page from "Monitoring status" to "Server status". #14875
* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls.#14816
* [ENHANCEMENT] Add support for multiple listening addresses. #14665
* [ENHANCEMENT] Add the ability to set custom HTTP headers. #14817
* [BUGFIX] TSDB: Fix shard initialization after WAL repair. #14731
* [BUGFIX] UTF-8: Ensure correct validation when legacy mode turned on. #14736
* [BUGFIX] SD: Make discovery manager notify consumers of dropped targets for still defined jobs. #13147
* [BUGFIX] SD: Prevent the new service discovery manager from storing stale targets. #13622
* [BUGFIX] Remote Write 2.0: Ensure metadata records are sent from the WAL to remote write during WAL replay. #14766
* [BUGFIX] Scrape: Do no override target parameter labels with config params. #11029
* [BUGFIX] Scrape: Reset exemplar position when scraping histograms in protobuf. #14810
* [BUGFIX] Native Histograms: Do not re-use spans between histograms. #14771
* [BUGFIX] Scrape: Only parse created timestamp if `created-timestamp-zero-ingestion` feature flag is enabled. This is as a lot of memory is used when parsing the created timestamp in the OM text format. #14815
* [BUGFIX] TSDB: Fix panic in query during truncation with OOO head. #14831
* [BUGFIX] TSDB: Fix panic in chunk querier. #14874
* [BUGFIX] promql.Engine.Close: No-op if nil. #14861
* [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042
* [FEATURE] Support config reload automatically - feature flag `auto-reload-config`. #14769
* [BUGFIX] Scrape: Do not override target parameter labels with config params. #11029
## 2.55.0 / 2024-10-22
* [FEATURE] PromQL: Add experimental `info` function. #14495
* [FEATURE] Support UTF-8 characters in label names - feature flag `utf8-names`. #14482, #14880, #14736, #14727
* [FEATURE] Scraping: Add the ability to set custom `http_headers` in config. #14817
* [FEATURE] Scraping: Support feature flag `created-timestamp-zero-ingestion` in OpenMetrics. #14356, #14815
* [FEATURE] Scraping: `scrape_failure_log_file` option to log failures to a file. #14734
* [FEATURE] OTLP receiver: Optional promotion of resource attributes to series labels. #14200
* [FEATURE] Remote-Write: Support Google Cloud Monitoring authorization. #14346
* [FEATURE] Promtool: `tsdb create-blocks` new option to add labels. #14403
* [FEATURE] Promtool: `promtool test` adds `--junit` flag to format results. #14506
* [FEATURE] TSDB: Add `delayed-compaction` feature flag, for people running many Prometheus to randomize timing. #12532
* [ENHANCEMENT] OTLP receiver: Warn on exponential histograms with zero count and non-zero sum. #14706
* [ENHANCEMENT] OTLP receiver: Interrupt translation on context cancellation/timeout. #14612
* [ENHANCEMENT] Remote Read client: Enable streaming remote read if the server supports it. #11379
* [ENHANCEMENT] Remote-Write: Don't reshard if we haven't successfully sent a sample since last update. #14450
* [ENHANCEMENT] PromQL: Delay deletion of `__name__` label to the end of the query evaluation. This is **experimental** and enabled under the feature-flag `promql-delayed-name-removal`. #14477
* [ENHANCEMENT] PromQL: Experimental `sort_by_label` and `sort_by_label_desc` sort by all labels when label is equal. #14655, #14985
* [ENHANCEMENT] PromQL: Clarify error message logged when Go runtime panic occurs during query evaluation. #14621
* [ENHANCEMENT] PromQL: Use Kahan summation for better accuracy in `avg` and `avg_over_time`. #14413
* [ENHANCEMENT] Tracing: Improve PromQL tracing, including showing the operation performed for aggregates, operators, and calls. #14816
* [ENHANCEMENT] API: Support multiple listening addresses. #14665
* [ENHANCEMENT] TSDB: Backward compatibility with upcoming index v3. #14934
* [PERF] TSDB: Query in-order and out-of-order series together. #14354, #14693, #14714, #14831, #14874, #14948, #15120
* [PERF] TSDB: Streamline reading of overlapping out-of-order head chunks. #14729
* [BUGFIX] PromQL: make sort_by_label stable. #14985
* [BUGFIX] SD: Fix dropping targets (with feature flag `new-service-discovery-manager`). #13147
* [BUGFIX] SD: Stop storing stale targets (with feature flag `new-service-discovery-manager`). #13622
* [BUGFIX] Scraping: exemplars could be dropped in protobuf scraping. #14810
* [BUGFIX] Remote-Write: fix metadata sending for experimental Remote-Write V2. #14766
* [BUGFIX] Remote-Write: Return 4xx not 5xx when timeseries has duplicate label. #14716
* [BUGFIX] Experimental Native Histograms: many fixes for incorrect results, panics, warnings. #14513, #14575, #14598, #14609, #14611, #14771, #14821
* [BUGFIX] TSDB: Only count unknown record types in `record_decode_failures_total` metric. #14042
## 2.54.1 / 2024-08-27
@ -140,7 +194,7 @@ This release changes the default for GOGC, the Go runtime control for the trade-
* [ENHANCEMENT] TSDB: Pause regular block compactions if the head needs to be compacted (prioritize head as it increases memory consumption). #13754
* [ENHANCEMENT] Observability: Improved logging during signal handling termination. #13772
* [ENHANCEMENT] Observability: All log lines for drop series use "num_dropped" key consistently. #13823
* [ENHANCEMENT] Observability: Log chunk snapshot and mmaped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log chunk snapshot and mmapped chunk replay duration during WAL replay. #13838
* [ENHANCEMENT] Observability: Log if the block is being created from WBL during compaction. #13846
* [BUGFIX] PromQL: Fix inaccurate sample number statistic when querying histograms. #13667
* [BUGFIX] PromQL: Fix `histogram_stddev` and `histogram_stdvar` for cases where the histogram has negative buckets. #13852
@ -677,7 +731,7 @@ The binaries published with this release are built with Go1.17.8 to avoid [CVE-2
## 2.33.0 / 2022-01-29
* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121
* [CHANGE] PromQL: Promote negative offset and `@` modifier to stable features. #10121
* [CHANGE] Web: Promote remote-write-receiver to stable. #10119
* [FEATURE] Config: Add `stripPort` template function. #10002
* [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045
@ -914,7 +968,7 @@ This vulnerability has been reported by Aaron Devaney from MDSec.
* [ENHANCEMENT] Templating: Enable parsing strings in `humanize` functions. #8682
* [BUGFIX] UI: Provide errors instead of blank page on TSDB Status Page. #8654 #8659
* [BUGFIX] TSDB: Do not panic when writing very large records to the WAL. #8790
* [BUGFIX] TSDB: Avoid panic when mmaped memory is referenced after the file is closed. #8723
* [BUGFIX] TSDB: Avoid panic when mmapped memory is referenced after the file is closed. #8723
* [BUGFIX] Scaleway Discovery: Fix nil pointer dereference. #8737
* [BUGFIX] Consul Discovery: Restart no longer required after config update with no targets. #8766
@ -1840,7 +1894,7 @@ information, read the announcement blog post and migration guide.
## 1.7.0 / 2017-06-06
* [CHANGE] Compress remote storage requests and responses with unframed/raw snappy.
* [CHANGE] Properly ellide secrets in config.
* [CHANGE] Properly elide secrets in config.
* [FEATURE] Add OpenStack service discovery.
* [FEATURE] Add ability to limit Kubernetes service discovery to certain namespaces.
* [FEATURE] Add metric for discovered number of Alertmanagers.

View file

@ -2,7 +2,6 @@
General maintainers:
* Bryan Boreham (bjboreham@gmail.com / @bboreham)
* Levi Harrison (levi@leviharrison.dev / @LeviHarrison)
* Ayoub Mrini (ayoubmrini424@gmail.com / @machine424)
* Julien Pivotto (roidelapluie@prometheus.io / @roidelapluie)
@ -17,9 +16,8 @@ Maintainers for specific parts of the codebase:
George Krajcsovits (<gyorgy.krajcsovits@grafana.com> / @krajorama)
* `storage`
* `remote`: Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (tom.wilkie@gmail.com / @tomwilkie), Nicolás Pazos ( <npazosmendez@gmail.com> / @npazosmendez), Alex Greenbank ( <alex.greenbank@grafana.com> / @alexgreenbank)
* `otlptranslator`: Arve Knudsen (<arve.knudsen@gmail.com> / @aknuds1), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `otlptranslator`: Arthur Silva Sens (<arthursens2005@gmail.com> / @ArthurSens), Arve Knudsen (<arve.knudsen@gmail.com> / @aknuds1), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `tsdb`: Ganesh Vernekar (<ganesh@grafana.com> / @codesome), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)
* `agent`: Robert Fratto (<robert.fratto@grafana.com> / @rfratto)
* `web`
* `ui`: Julius Volz (<julius.volz@gmail.com> / @juliusv)
* `module`: Augustin Husson (<husson.augustin@gmail.com> @nexucis)

View file

@ -30,6 +30,11 @@ include Makefile.common
DOCKER_IMAGE_NAME ?= prometheus
# Only build UI if PREBUILT_ASSETS_STATIC_DIR is not set
ifdef PREBUILT_ASSETS_STATIC_DIR
SKIP_UI_BUILD = true
endif
.PHONY: update-npm-deps
update-npm-deps:
@echo ">> updating npm dependencies"
@ -75,8 +80,24 @@ ui-lint:
cd $(UI_PATH)/react-app && npm run lint
.PHONY: assets
ifndef SKIP_UI_BUILD
assets: ui-install ui-build
.PHONY: npm_licenses
npm_licenses: ui-install
@echo ">> bundling npm licenses"
rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
ln -s . npm_licenses
find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
rm -f npm_licenses
else
assets:
@echo '>> skipping assets build, pre-built assets provided'
npm_licenses:
@echo '>> skipping assets npm licenses, pre-built assets provided'
endif
.PHONY: assets-compress
assets-compress: assets
@echo '>> compressing assets'
@ -125,14 +146,6 @@ else
test: check-generated-parser common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
.PHONY: npm_licenses
npm_licenses: ui-install
@echo ">> bundling npm licenses"
rm -f $(REACT_APP_NPM_LICENSES_TARBALL) npm_licenses
ln -s . npm_licenses
find npm_licenses/$(UI_NODE_MODULES_PATH) -iname "license*" | tar cfj $(REACT_APP_NPM_LICENSES_TARBALL) --files-from=-
rm -f npm_licenses
.PHONY: tarball
tarball: npm_licenses common-tarball

View file

@ -275,3 +275,9 @@ $(1)_precheck:
exit 1; \
fi
endef
govulncheck: install-govulncheck
govulncheck ./...
install-govulncheck:
command -v govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest

View file

@ -115,7 +115,7 @@ The Makefile provides several targets:
Prometheus is bundled with many service discovery plugins.
When building Prometheus from source, you can edit the [plugins.yml](./plugins.yml)
file to disable some service discoveries. The file is a yaml-formated list of go
file to disable some service discoveries. The file is a yaml-formatted list of go
import path that will be built into the Prometheus binary.
After you have changed the file, you

View file

@ -59,6 +59,7 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.52 | 2024-04-22 | Arthur Silva Sens (GitHub: @ArthurSens) |
| v2.53 LTS | 2024-06-03 | George Krajcsovits (GitHub: @krajorama) |
| v2.54 | 2024-07-17 | Bryan Boreham (GitHub: @bboreham) |
| v2.55 | 2024-09-17 | Bryan Boreham (GitHub: @bboreham) |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.

View file

@ -1 +1 @@
3.0.0-beta.0
3.0.0-beta.1

View file

@ -18,11 +18,11 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"math"
"math/bits"
"net"
"net/http"
_ "net/http/pprof" // Comment this line to disable pprof endpoint.
"net/url"
"os"
"os/signal"
@ -38,8 +38,6 @@ import (
"github.com/KimMachineGun/automemlimit/memlimit"
"github.com/alecthomas/kingpin/v2"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/mwitkow/go-conntrack"
"github.com/oklog/run"
@ -47,8 +45,8 @@ import (
"github.com/prometheus/client_golang/prometheus/collectors"
versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promlog"
promlogflag "github.com/prometheus/common/promlog/flag"
"github.com/prometheus/common/promslog"
promslogflag "github.com/prometheus/common/promslog/flag"
"github.com/prometheus/common/version"
toolkit_web "github.com/prometheus/exporter-toolkit/web"
"go.uber.org/atomic"
@ -77,10 +75,50 @@ import (
"github.com/prometheus/prometheus/tsdb/wlog"
"github.com/prometheus/prometheus/util/documentcli"
"github.com/prometheus/prometheus/util/logging"
"github.com/prometheus/prometheus/util/notifications"
prom_runtime "github.com/prometheus/prometheus/util/runtime"
"github.com/prometheus/prometheus/web"
)
// klogv1OutputCallDepth is the stack depth where we can find the origin of this call.
const klogv1OutputCallDepth = 6
// klogv1DefaultPrefixLength is the length of the log prefix that we have to strip out.
const klogv1DefaultPrefixLength = 53
// klogv1Writer is used in SetOutputBySeverity call below to redirect any calls
// to klogv1 to end up in klogv2.
// This is a hack to support klogv1 without use of go-kit/log. It is inspired
// by klog's upstream klogv1/v2 coexistence example:
// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go
type klogv1Writer struct{}
// Write redirects klogv1 calls to klogv2.
// This is a hack to support klogv1 without use of go-kit/log. It is inspired
// by klog's upstream klogv1/v2 coexistence example:
// https://github.com/kubernetes/klog/blob/main/examples/coexist_klog_v1_and_v2/coexist_klog_v1_and_v2.go
func (kw klogv1Writer) Write(p []byte) (n int, err error) {
if len(p) < klogv1DefaultPrefixLength {
klogv2.InfoDepth(klogv1OutputCallDepth, string(p))
return len(p), nil
}
switch p[0] {
case 'I':
klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
case 'W':
klogv2.WarningDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
case 'E':
klogv2.ErrorDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
case 'F':
klogv2.FatalDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
default:
klogv2.InfoDepth(klogv1OutputCallDepth, string(p[klogv1DefaultPrefixLength:]))
}
return len(p), nil
}
var (
appName = "prometheus"
@ -135,24 +173,25 @@ func agentOnlyFlag(app *kingpin.Application, name, help string) *kingpin.FlagCla
type flagConfig struct {
configFile string
agentStoragePath string
serverStoragePath string
notifier notifier.Options
forGracePeriod model.Duration
outageTolerance model.Duration
resendDelay model.Duration
maxConcurrentEvals int64
web web.Options
scrape scrape.Options
tsdb tsdbOptions
agent agentOptions
lookbackDelta model.Duration
webTimeout model.Duration
queryTimeout model.Duration
queryConcurrency int
queryMaxSamples int
RemoteFlushDeadline model.Duration
nameEscapingScheme string
agentStoragePath string
serverStoragePath string
notifier notifier.Options
forGracePeriod model.Duration
outageTolerance model.Duration
resendDelay model.Duration
maxConcurrentEvals int64
web web.Options
scrape scrape.Options
tsdb tsdbOptions
agent agentOptions
lookbackDelta model.Duration
webTimeout model.Duration
queryTimeout model.Duration
queryConcurrency int
queryMaxSamples int
RemoteFlushDeadline model.Duration
nameEscapingScheme string
maxNotificationsSubscribers int
enableAutoReload bool
autoReloadInterval model.Duration
@ -161,94 +200,87 @@ type flagConfig struct {
memlimitRatio float64
// These options are extracted from featureList
// for ease of use.
enableExpandExternalLabels bool
enablePerStepStats bool
enableAutoGOMAXPROCS bool
enableAutoGOMEMLIMIT bool
enableConcurrentRuleEval bool
enablePerStepStats bool
enableAutoGOMAXPROCS bool
enableAutoGOMEMLIMIT bool
enableConcurrentRuleEval bool
prometheusURL string
corsRegexString string
promlogConfig promlog.Config
promqlEnableDelayedNameRemoval bool
promslogConfig promslog.Config
}
// setFeatureListOptions sets the corresponding options from the featureList.
func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
func (c *flagConfig) setFeatureListOptions(logger *slog.Logger) error {
for _, f := range c.featureList {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
case "otlp-write-receiver":
c.web.EnableOTLPWriteReceiver = true
level.Info(logger).Log("msg", "Experimental OTLP write receiver enabled")
case "expand-external-labels":
c.enableExpandExternalLabels = true
level.Info(logger).Log("msg", "Experimental expand-external-labels enabled")
case "exemplar-storage":
c.tsdb.EnableExemplarStorage = true
level.Info(logger).Log("msg", "Experimental in-memory exemplar storage enabled")
logger.Info("Experimental in-memory exemplar storage enabled")
case "memory-snapshot-on-shutdown":
c.tsdb.EnableMemorySnapshotOnShutdown = true
level.Info(logger).Log("msg", "Experimental memory snapshot on shutdown enabled")
logger.Info("Experimental memory snapshot on shutdown enabled")
case "extra-scrape-metrics":
c.scrape.ExtraMetrics = true
level.Info(logger).Log("msg", "Experimental additional scrape metrics enabled")
logger.Info("Experimental additional scrape metrics enabled")
case "metadata-wal-records":
c.scrape.AppendMetadata = true
level.Info(logger).Log("msg", "Experimental metadata records in WAL enabled, required for remote write 2.0")
logger.Info("Experimental metadata records in WAL enabled, required for remote write 2.0")
case "promql-per-step-stats":
c.enablePerStepStats = true
level.Info(logger).Log("msg", "Experimental per-step statistics reporting")
logger.Info("Experimental per-step statistics reporting")
case "auto-gomaxprocs":
c.enableAutoGOMAXPROCS = true
level.Info(logger).Log("msg", "Automatically set GOMAXPROCS to match Linux container CPU quota")
logger.Info("Automatically set GOMAXPROCS to match Linux container CPU quota")
case "auto-reload-config":
c.enableAutoReload = true
if s := time.Duration(c.autoReloadInterval).Seconds(); s > 0 && s < 1 {
c.autoReloadInterval, _ = model.ParseDuration("1s")
}
level.Info(logger).Log("msg", fmt.Sprintf("Enabled automatic configuration file reloading. Checking for configuration changes every %s.", c.autoReloadInterval))
logger.Info("Enabled automatic configuration file reloading. Checking for configuration changes every", "interval", c.autoReloadInterval)
case "auto-gomemlimit":
c.enableAutoGOMEMLIMIT = true
level.Info(logger).Log("msg", "Automatically set GOMEMLIMIT to match Linux container or system memory limit")
logger.Info("Automatically set GOMEMLIMIT to match Linux container or system memory limit")
case "concurrent-rule-eval":
c.enableConcurrentRuleEval = true
level.Info(logger).Log("msg", "Experimental concurrent rule evaluation enabled.")
case "no-default-scrape-port":
c.scrape.NoDefaultPort = true
level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
logger.Info("Experimental concurrent rule evaluation enabled.")
case "promql-experimental-functions":
parser.EnableExperimentalFunctions = true
level.Info(logger).Log("msg", "Experimental PromQL functions enabled.")
logger.Info("Experimental PromQL functions enabled.")
case "native-histograms":
c.tsdb.EnableNativeHistograms = true
c.scrape.EnableNativeHistogramsIngestion = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
logger.Info("Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "ooo-native-histograms":
c.tsdb.EnableOOONativeHistograms = true
logger.Info("Experimental out-of-order native histogram ingestion enabled. This will only take effect if OutOfOrderTimeWindow is > 0 and if EnableNativeHistograms = true")
case "created-timestamp-zero-ingestion":
c.scrape.EnableCreatedTimestampZeroIngestion = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
logger.Info("Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "delayed-compaction":
c.tsdb.EnableDelayedCompaction = true
level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.")
logger.Info("Experimental delayed compaction is enabled.")
case "promql-delayed-name-removal":
c.promqlEnableDelayedNameRemoval = true
level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.")
logger.Info("Experimental PromQL delayed name removal enabled.")
case "":
continue
case "old-ui":
c.web.UseOldUI = true
level.Info(logger).Log("msg", "Serving previous version of the Prometheus web UI.")
logger.Info("Serving previous version of the Prometheus web UI.")
default:
level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o)
logger.Warn("Unknown option for --enable-feature", "option", o)
}
}
}
@ -282,7 +314,7 @@ func main() {
Registerer: prometheus.DefaultRegisterer,
Gatherer: prometheus.DefaultGatherer,
},
promlogConfig: promlog.Config{},
promslogConfig: promslog.Config{},
}
a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server").UsageWriter(os.Stdout)
@ -315,6 +347,9 @@ func main() {
a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners.").
Default("512").IntVar(&cfg.web.MaxConnections)
a.Flag("web.max-notifications-subscribers", "Limits the maximum number of subscribers that can concurrently receive live notifications. If the limit is reached, new subscription requests will be denied until existing connections close.").
Default("16").IntVar(&cfg.maxNotificationsSubscribers)
a.Flag("web.external-url",
"The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically.").
PlaceHolder("<URL>").StringVar(&cfg.prometheusURL)
@ -341,6 +376,9 @@ func main() {
a.Flag("web.remote-write-receiver.accepted-protobuf-messages", fmt.Sprintf("List of the remote write protobuf messages to accept when receiving the remote writes. Supported values: %v", supportedRemoteWriteProtoMsgs.String())).
Default(supportedRemoteWriteProtoMsgs.Strings()...).SetValue(rwProtoMsgFlagValue(&cfg.web.AcceptRemoteWriteProtoMsgs))
a.Flag("web.enable-otlp-receiver", "Enable API endpoint accepting OTLP write requests.").
Default("false").BoolVar(&cfg.web.EnableOTLPWriteReceiver)
a.Flag("web.console.templates", "Path to the console template directory, available at /consoles.").
Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath)
@ -380,6 +418,9 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
Default("false").BoolVar(&cfg.tsdb.NoLockfile)
serverOnlyFlag(a, "storage.tsdb.allow-overlapping-compaction", "Allow compaction of overlapping blocks. If set to false, TSDB stops vertical compaction and leaves overlapping blocks there. The use case is to let another component handle the compaction of overlapping blocks.").
Default("true").Hidden().BoolVar(&cfg.tsdb.EnableOverlappingCompaction)
serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL.").
Hidden().Default("true").BoolVar(&cfg.tsdb.WALCompression)
@ -392,6 +433,9 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.samples-per-chunk", "Target number of samples per chunk.").
Default("120").Hidden().IntVar(&cfg.tsdb.SamplesPerChunk)
serverOnlyFlag(a, "storage.tsdb.delayed-compaction.max-percent", "Sets the upper limit for the random compaction delay, specified as a percentage of the head chunk range. 100 means the compaction can be delayed by up to the entire head chunk range. Only effective when the delayed-compaction feature flag is enabled.").
Default("10").Hidden().IntVar(&cfg.tsdb.CompactionDelayMaxPercent)
agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage.").
Default("data-agent/").StringVar(&cfg.agentStoragePath)
@ -471,12 +515,12 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, auto-gomaxprocs, native-histograms, created-timestamp-zero-ingestion, concurrent-rule-eval, delayed-compaction, old-ui. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
a.Flag("agent", "Run Prometheus in 'Agent mode'.").BoolVar(&agentMode)
promlogflag.AddFlags(a, &cfg.promlogConfig)
promslogflag.AddFlags(a, &cfg.promslogConfig)
a.Flag("write-documentation", "Generate command line documentation. Internal use.").Hidden().Action(func(ctx *kingpin.ParseContext) error {
if err := documentcli.GenerateMarkdown(a.Model(), os.Stdout); err != nil {
@ -494,7 +538,13 @@ func main() {
os.Exit(2)
}
logger := promlog.New(&cfg.promlogConfig)
logger := promslog.New(&cfg.promslogConfig)
slog.SetDefault(logger)
notifs := notifications.NewNotifications(cfg.maxNotificationsSubscribers, prometheus.DefaultRegisterer)
cfg.web.NotificationsSub = notifs.Sub
cfg.web.NotificationsGetter = notifs.Get
notifs.AddNotification(notifications.StartingUp)
if err := cfg.setFeatureListOptions(logger); err != nil {
fmt.Fprintln(os.Stderr, fmt.Errorf("Error parsing feature list: %w", err))
@ -544,12 +594,12 @@ func main() {
// Throw error for invalid config before starting other components.
var cfgFile *config.Config
if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil {
if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, promslog.NewNopLogger()); err != nil {
absPath, pathErr := filepath.Abs(cfg.configFile)
if pathErr != nil {
absPath = cfg.configFile
}
level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err)
logger.Error(fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err)
os.Exit(2)
}
if _, err := cfgFile.GetScrapeConfigs(); err != nil {
@ -557,7 +607,7 @@ func main() {
if pathErr != nil {
absPath = cfg.configFile
}
level.Error(logger).Log("msg", fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err)
logger.Error(fmt.Sprintf("Error loading scrape config files from config (--config.file=%q)", cfg.configFile), "file", absPath, "err", err)
os.Exit(2)
}
if cfg.tsdb.EnableExemplarStorage {
@ -590,7 +640,7 @@ func main() {
if !agentMode {
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
level.Info(logger).Log("msg", "No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
logger.Info("No time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
}
// Check for overflows. This limits our max retention to 100y.
@ -600,7 +650,7 @@ func main() {
panic(err)
}
cfg.tsdb.RetentionDuration = y
level.Warn(logger).Log("msg", "Time retention value is too high. Limiting to: "+y.String())
logger.Warn("Time retention value is too high. Limiting to: " + y.String())
}
// Max block size settings.
@ -616,16 +666,19 @@ func main() {
cfg.tsdb.MaxBlockDuration = maxBlockDuration
}
// Delayed compaction checks
if cfg.tsdb.EnableDelayedCompaction && (cfg.tsdb.CompactionDelayMaxPercent > 100 || cfg.tsdb.CompactionDelayMaxPercent <= 0) {
logger.Warn("The --storage.tsdb.delayed-compaction.max-percent should have a value between 1 and 100. Using default", "default", tsdb.DefaultCompactionDelayMaxPercent)
cfg.tsdb.CompactionDelayMaxPercent = tsdb.DefaultCompactionDelayMaxPercent
}
}
noStepSubqueryInterval := &safePromQLNoStepSubqueryInterval{}
noStepSubqueryInterval.Set(config.DefaultGlobalConfig.EvaluationInterval)
// Above level 6, the k8s client would log bearer tokens in clear-text.
klog.ClampLevel(6)
klog.SetLogger(log.With(logger, "component", "k8s_client_runtime"))
klogv2.ClampLevel(6)
klogv2.SetLogger(log.With(logger, "component", "k8s_client_runtime"))
klogv2.SetSlogLogger(logger.With("component", "k8s_client_runtime"))
klog.SetOutputBySeverity("INFO", klogv1Writer{})
modeAppName := "Prometheus Server"
mode := "server"
@ -634,20 +687,22 @@ func main() {
mode = "agent"
}
level.Info(logger).Log("msg", "Starting "+modeAppName, "mode", mode, "version", version.Info())
logger.Info("Starting "+modeAppName, "mode", mode, "version", version.Info())
if bits.UintSize < 64 {
level.Warn(logger).Log("msg", "This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH)
logger.Warn("This Prometheus binary has not been compiled for a 64-bit architecture. Due to virtual memory constraints of 32-bit systems, it is highly recommended to switch to a 64-bit binary of Prometheus.", "GOARCH", runtime.GOARCH)
}
level.Info(logger).Log("build_context", version.BuildContext())
level.Info(logger).Log("host_details", prom_runtime.Uname())
level.Info(logger).Log("fd_limits", prom_runtime.FdLimits())
level.Info(logger).Log("vm_limits", prom_runtime.VMLimits())
logger.Info("operational information",
"build_context", version.BuildContext(),
"host_details", prom_runtime.Uname(),
"fd_limits", prom_runtime.FdLimits(),
"vm_limits", prom_runtime.VMLimits(),
)
var (
localStorage = &readyStorage{stats: tsdb.NewDBStats()}
scraper = &readyScrapeManager{}
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata)
remoteStorage = remote.NewStorage(logger.With("component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, localStoragePath, time.Duration(cfg.RemoteFlushDeadline), scraper, cfg.scrape.AppendMetadata)
fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage)
)
@ -655,7 +710,7 @@ func main() {
ctxWeb, cancelWeb = context.WithCancel(context.Background())
ctxRule = context.Background()
notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier"))
notifierManager = notifier.NewManager(&cfg.notifier, logger.With("component", "notifier"))
ctxScrape, cancelScrape = context.WithCancel(context.Background())
ctxNotify, cancelNotify = context.WithCancel(context.Background())
@ -670,37 +725,37 @@ func main() {
// they are not specific to an SD instance.
err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer)
if err != nil {
level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err)
logger.Error("failed to register Kubernetes client metrics", "err", err)
os.Exit(1)
}
sdMetrics, err := discovery.CreateAndRegisterSDMetrics(prometheus.DefaultRegisterer)
if err != nil {
level.Error(logger).Log("msg", "failed to register service discovery metrics", "err", err)
logger.Error("failed to register service discovery metrics", "err", err)
os.Exit(1)
}
discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
discoveryManagerScrape = discovery.NewManager(ctxScrape, logger.With("component", "discovery manager scrape"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("scrape"))
if discoveryManagerScrape == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
logger.Error("failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
discoveryManagerNotify = discovery.NewManager(ctxNotify, logger.With("component", "discovery manager notify"), prometheus.DefaultRegisterer, sdMetrics, discovery.Name("notify"))
if discoveryManagerNotify == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
logger.Error("failed to create a discovery manager notify")
os.Exit(1)
}
scrapeManager, err := scrape.NewManager(
&cfg.scrape,
log.With(logger, "component", "scrape manager"),
func(s string) (log.Logger, error) { return logging.NewJSONFileLogger(s) },
logger.With("component", "scrape manager"),
logging.NewJSONFileLogger,
fanoutStorage,
prometheus.DefaultRegisterer,
)
if err != nil {
level.Error(logger).Log("msg", "failed to create a scrape manager", "err", err)
logger.Error("failed to create a scrape manager", "err", err)
os.Exit(1)
}
@ -713,10 +768,10 @@ func main() {
if cfg.enableAutoGOMAXPROCS {
l := func(format string, a ...interface{}) {
level.Info(logger).Log("component", "automaxprocs", "msg", fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...))
logger.Info(fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...), "component", "automaxprocs")
}
if _, err := maxprocs.Set(maxprocs.Logger(l)); err != nil {
level.Warn(logger).Log("component", "automaxprocs", "msg", "Failed to set GOMAXPROCS automatically", "err", err)
logger.Warn("Failed to set GOMAXPROCS automatically", "component", "automaxprocs", "err", err)
}
}
@ -730,17 +785,17 @@ func main() {
),
),
); err != nil {
level.Warn(logger).Log("component", "automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err)
logger.Warn("automemlimit", "msg", "Failed to set GOMEMLIMIT automatically", "err", err)
}
}
if !agentMode {
opts := promql.EngineOpts{
Logger: log.With(logger, "component", "query engine"),
Logger: logger.With("component", "query engine"),
Reg: prometheus.DefaultRegisterer,
MaxSamples: cfg.queryMaxSamples,
Timeout: time.Duration(cfg.queryTimeout),
ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")),
ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, logger.With("component", "activeQueryTracker")),
LookbackDelta: time.Duration(cfg.lookbackDelta),
NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get,
// EnableAtModifier and EnableNegativeOffset have to be
@ -761,7 +816,7 @@ func main() {
Context: ctxRule,
ExternalURL: cfg.web.ExternalURL,
Registerer: prometheus.DefaultRegisterer,
Logger: log.With(logger, "component", "rule manager"),
Logger: logger.With("component", "rule manager"),
OutageTolerance: time.Duration(cfg.outageTolerance),
ForGracePeriod: time.Duration(cfg.forGracePeriod),
ResendDelay: time.Duration(cfg.resendDelay),
@ -812,7 +867,7 @@ func main() {
}
// Depends on cfg.web.ScrapeManager so needs to be after cfg.web.ScrapeManager = scrapeManager.
webHandler := web.New(log.With(logger, "component", "web"), &cfg.web)
webHandler := web.New(logger.With("component", "web"), &cfg.web)
// Monitor outgoing connections on default transport with conntrack.
http.DefaultTransport.(*http.Transport).DialContext = conntrack.NewDialContextFunc(
@ -939,18 +994,18 @@ func main() {
listeners, err := webHandler.Listeners()
if err != nil {
level.Error(logger).Log("msg", "Unable to start web listeners", "err", err)
logger.Error("Unable to start web listener", "err", err)
if err := queryEngine.Close(); err != nil {
level.Warn(logger).Log("msg", "Closing query engine failed", "err", err)
logger.Warn("Closing query engine failed", "err", err)
}
os.Exit(1)
}
err = toolkit_web.Validate(*webConfig)
if err != nil {
level.Error(logger).Log("msg", "Unable to validate web configuration file", "err", err)
logger.Error("Unable to validate web configuration file", "err", err)
if err := queryEngine.Close(); err != nil {
level.Warn(logger).Log("msg", "Closing query engine failed", "err", err)
logger.Warn("Closing query engine failed", "err", err)
}
os.Exit(1)
}
@ -966,21 +1021,22 @@ func main() {
// Don't forget to release the reloadReady channel so that waiting blocks can exit normally.
select {
case sig := <-term:
level.Warn(logger).Log("msg", "Received an OS signal, exiting gracefully...", "signal", sig.String())
logger.Warn("Received an OS signal, exiting gracefully...", "signal", sig.String())
reloadReady.Close()
case <-webHandler.Quit():
level.Warn(logger).Log("msg", "Received termination request via web service, exiting gracefully...")
logger.Warn("Received termination request via web service, exiting gracefully...")
case <-cancel:
reloadReady.Close()
}
if err := queryEngine.Close(); err != nil {
level.Warn(logger).Log("msg", "Closing query engine failed", "err", err)
logger.Warn("Closing query engine failed", "err", err)
}
return nil
},
func(err error) {
close(cancel)
webHandler.SetReady(false)
webHandler.SetReady(web.Stopping)
notifs.AddNotification(notifications.ShuttingDown)
},
)
}
@ -989,11 +1045,11 @@ func main() {
g.Add(
func() error {
err := discoveryManagerScrape.Run()
level.Info(logger).Log("msg", "Scrape discovery manager stopped")
logger.Info("Scrape discovery manager stopped")
return err
},
func(err error) {
level.Info(logger).Log("msg", "Stopping scrape discovery manager...")
logger.Info("Stopping scrape discovery manager...")
cancelScrape()
},
)
@ -1003,11 +1059,11 @@ func main() {
g.Add(
func() error {
err := discoveryManagerNotify.Run()
level.Info(logger).Log("msg", "Notify discovery manager stopped")
logger.Info("Notify discovery manager stopped")
return err
},
func(err error) {
level.Info(logger).Log("msg", "Stopping notify discovery manager...")
logger.Info("Stopping notify discovery manager...")
cancelNotify()
},
)
@ -1036,7 +1092,7 @@ func main() {
<-reloadReady.C
err := scrapeManager.Run(discoveryManagerScrape.SyncCh())
level.Info(logger).Log("msg", "Scrape manager stopped")
logger.Info("Scrape manager stopped")
return err
},
func(err error) {
@ -1044,7 +1100,7 @@ func main() {
// so that it doesn't try to write samples to a closed storage.
// We should also wait for rule manager to be fully stopped to ensure
// we don't trigger any false positive alerts for rules using absent().
level.Info(logger).Log("msg", "Stopping scrape manager...")
logger.Info("Stopping scrape manager...")
scrapeManager.Stop()
},
)
@ -1075,10 +1131,18 @@ func main() {
if cfg.enableAutoReload {
checksum, err = config.GenerateChecksum(cfg.configFile)
if err != nil {
level.Error(logger).Log("msg", "Failed to generate initial checksum for configuration file", "err", err)
logger.Error("Failed to generate initial checksum for configuration file", "err", err)
}
}
callback := func(success bool) {
if success {
notifs.DeleteNotification(notifications.ConfigurationUnsuccessful)
return
}
notifs.AddNotification(notifications.ConfigurationUnsuccessful)
}
g.Add(
func() error {
<-reloadReady.C
@ -1086,18 +1150,18 @@ func main() {
for {
select {
case <-hup:
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
level.Error(logger).Log("msg", "Error reloading config", "err", err)
if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil {
logger.Error("Error reloading config", "err", err)
} else if cfg.enableAutoReload {
if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil {
checksum = currentChecksum
} else {
level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
logger.Error("Failed to generate checksum during configuration reload", "err", err)
}
}
case rc := <-webHandler.Reload():
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
level.Error(logger).Log("msg", "Error reloading config", "err", err)
if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil {
logger.Error("Error reloading config", "err", err)
rc <- err
} else {
rc <- nil
@ -1105,7 +1169,7 @@ func main() {
if currentChecksum, err := config.GenerateChecksum(cfg.configFile); err == nil {
checksum = currentChecksum
} else {
level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
logger.Error("Failed to generate checksum during configuration reload", "err", err)
}
}
}
@ -1115,16 +1179,14 @@ func main() {
}
currentChecksum, err := config.GenerateChecksum(cfg.configFile)
if err != nil {
level.Error(logger).Log("msg", "Failed to generate checksum during configuration reload", "err", err)
logger.Error("Failed to generate checksum during configuration reload", "err", err)
} else if currentChecksum == checksum {
continue
}
if currentChecksum == checksum {
continue
}
level.Info(logger).Log("msg", "Configuration file change detected, reloading the configuration.")
logger.Info("Configuration file change detected, reloading the configuration.")
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
level.Error(logger).Log("msg", "Error reloading config", "err", err)
if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, callback, reloaders...); err != nil {
logger.Error("Error reloading config", "err", err)
} else {
checksum = currentChecksum
}
@ -1153,14 +1215,15 @@ func main() {
return nil
}
if err := reloadConfig(cfg.configFile, cfg.enableExpandExternalLabels, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, reloaders...); err != nil {
if err := reloadConfig(cfg.configFile, cfg.tsdb.EnableExemplarStorage, logger, noStepSubqueryInterval, func(bool) {}, reloaders...); err != nil {
return fmt.Errorf("error loading config from %q: %w", cfg.configFile, err)
}
reloadReady.Close()
webHandler.SetReady(true)
level.Info(logger).Log("msg", "Server is ready to receive web requests.")
webHandler.SetReady(web.Ready)
notifs.DeleteNotification(notifications.StartingUp)
logger.Info("Server is ready to receive web requests.")
<-cancel
return nil
},
@ -1175,7 +1238,7 @@ func main() {
cancel := make(chan struct{})
g.Add(
func() error {
level.Info(logger).Log("msg", "Starting TSDB ...")
logger.Info("Starting TSDB ...")
if cfg.tsdb.WALSegmentSize != 0 {
if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 {
return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB")
@ -1194,13 +1257,13 @@ func main() {
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
logger.Warn("This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.", "fs_type", fsType)
default:
level.Info(logger).Log("fs_type", fsType)
logger.Info("filesystem information", "fs_type", fsType)
}
level.Info(logger).Log("msg", "TSDB started")
level.Debug(logger).Log("msg", "TSDB options",
logger.Info("TSDB started")
logger.Debug("TSDB options",
"MinBlockDuration", cfg.tsdb.MinBlockDuration,
"MaxBlockDuration", cfg.tsdb.MaxBlockDuration,
"MaxBytes", cfg.tsdb.MaxBytes,
@ -1219,7 +1282,7 @@ func main() {
},
func(err error) {
if err := fanoutStorage.Close(); err != nil {
level.Error(logger).Log("msg", "Error stopping storage", "err", err)
logger.Error("Error stopping storage", "err", err)
}
close(cancel)
},
@ -1231,7 +1294,7 @@ func main() {
cancel := make(chan struct{})
g.Add(
func() error {
level.Info(logger).Log("msg", "Starting WAL storage ...")
logger.Info("Starting WAL storage ...")
if cfg.agent.WALSegmentSize != 0 {
if cfg.agent.WALSegmentSize < 10*1024*1024 || cfg.agent.WALSegmentSize > 256*1024*1024 {
return errors.New("flag 'storage.agent.wal-segment-size' must be set between 10MB and 256MB")
@ -1250,13 +1313,13 @@ func main() {
switch fsType := prom_runtime.Statfs(localStoragePath); fsType {
case "NFS_SUPER_MAGIC":
level.Warn(logger).Log("fs_type", fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
logger.Warn(fsType, "msg", "This filesystem is not supported and may lead to data corruption and data loss. Please carefully read https://prometheus.io/docs/prometheus/latest/storage/ to learn more about supported filesystems.")
default:
level.Info(logger).Log("fs_type", fsType)
logger.Info(fsType)
}
level.Info(logger).Log("msg", "Agent WAL storage started")
level.Debug(logger).Log("msg", "Agent WAL storage options",
logger.Info("Agent WAL storage started")
logger.Debug("Agent WAL storage options",
"WALSegmentSize", cfg.agent.WALSegmentSize,
"WALCompression", cfg.agent.WALCompression,
"StripeSize", cfg.agent.StripeSize,
@ -1274,7 +1337,7 @@ func main() {
},
func(e error) {
if err := fanoutStorage.Close(); err != nil {
level.Error(logger).Log("msg", "Error stopping storage", "err", err)
logger.Error("Error stopping storage", "err", err)
}
close(cancel)
},
@ -1308,7 +1371,7 @@ func main() {
<-reloadReady.C
notifierManager.Run(discoveryManagerNotify.SyncCh())
level.Info(logger).Log("msg", "Notifier manager stopped")
logger.Info("Notifier manager stopped")
return nil
},
func(err error) {
@ -1317,16 +1380,16 @@ func main() {
)
}
if err := g.Run(); err != nil {
level.Error(logger).Log("err", err)
logger.Error("Error running goroutines from run.Group", "err", err)
os.Exit(1)
}
level.Info(logger).Log("msg", "See you next time!")
logger.Info("See you next time!")
}
func openDBWithMetrics(dir string, logger log.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) {
func openDBWithMetrics(dir string, logger *slog.Logger, reg prometheus.Registerer, opts *tsdb.Options, stats *tsdb.DBStats) (*tsdb.DB, error) {
db, err := tsdb.Open(
dir,
log.With(logger, "component", "tsdb"),
logger.With("component", "tsdb"),
reg,
opts,
stats,
@ -1379,21 +1442,23 @@ type reloader struct {
reloader func(*config.Config) error
}
func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage bool, logger log.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, rls ...reloader) (err error) {
func reloadConfig(filename string, enableExemplarStorage bool, logger *slog.Logger, noStepSuqueryInterval *safePromQLNoStepSubqueryInterval, callback func(bool), rls ...reloader) (err error) {
start := time.Now()
timings := []interface{}{}
level.Info(logger).Log("msg", "Loading configuration file", "filename", filename)
timingsLogger := logger
logger.Info("Loading configuration file", "filename", filename)
defer func() {
if err == nil {
configSuccess.Set(1)
configSuccessTime.SetToCurrentTime()
callback(true)
} else {
configSuccess.Set(0)
callback(false)
}
}()
conf, err := config.LoadFile(filename, agentMode, expandExternalLabels, logger)
conf, err := config.LoadFile(filename, agentMode, logger)
if err != nil {
return fmt.Errorf("couldn't load configuration (--config.file=%q): %w", filename, err)
}
@ -1408,10 +1473,10 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
for _, rl := range rls {
rstart := time.Now()
if err := rl.reloader(conf); err != nil {
level.Error(logger).Log("msg", "Failed to apply configuration", "err", err)
logger.Error("Failed to apply configuration", "err", err)
failed = true
}
timings = append(timings, rl.name, time.Since(rstart))
timingsLogger = timingsLogger.With((rl.name), time.Since(rstart))
}
if failed {
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
@ -1419,7 +1484,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
oldGoGC := debug.SetGCPercent(conf.Runtime.GoGC)
if oldGoGC != conf.Runtime.GoGC {
level.Info(logger).Log("msg", "updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC)
logger.Info("updated GOGC", "old", oldGoGC, "new", conf.Runtime.GoGC)
}
// Write the new setting out to the ENV var for runtime API output.
if conf.Runtime.GoGC >= 0 {
@ -1429,8 +1494,7 @@ func reloadConfig(filename string, expandExternalLabels, enableExemplarStorage b
}
noStepSuqueryInterval.Set(conf.GlobalConfig.EvaluationInterval)
l := []interface{}{"msg", "Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start)}
level.Info(logger).Log(append(l, timings...)...)
timingsLogger.Info("Completed loading of configuration file", "filename", filename, "totalDuration", time.Since(start))
return nil
}
@ -1584,6 +1648,9 @@ func (s *readyStorage) Appender(ctx context.Context) storage.Appender {
type notReadyAppender struct{}
// SetOptions does nothing in this appender implementation.
func (n notReadyAppender) SetOptions(opts *storage.AppendOptions) {}
func (n notReadyAppender) Append(ref storage.SeriesRef, l labels.Labels, t int64, v float64) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
@ -1596,6 +1663,10 @@ func (n notReadyAppender) AppendHistogram(ref storage.SeriesRef, l labels.Labels
return 0, tsdb.ErrNotReady
}
func (n notReadyAppender) AppendHistogramCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64, h *histogram.Histogram, fh *histogram.FloatHistogram) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels, m metadata.Metadata) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
@ -1734,7 +1805,9 @@ type tsdbOptions struct {
EnableMemorySnapshotOnShutdown bool
EnableNativeHistograms bool
EnableDelayedCompaction bool
CompactionDelayMaxPercent int
EnableOverlappingCompaction bool
EnableOOONativeHistograms bool
}
func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
@ -1754,8 +1827,10 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
MaxExemplars: opts.MaxExemplars,
EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown,
EnableNativeHistograms: opts.EnableNativeHistograms,
EnableOOONativeHistograms: opts.EnableOOONativeHistograms,
OutOfOrderTimeWindow: opts.OutOfOrderTimeWindow,
EnableDelayedCompaction: opts.EnableDelayedCompaction,
CompactionDelayMaxPercent: opts.CompactionDelayMaxPercent,
EnableOverlappingCompaction: opts.EnableOverlappingCompaction,
}
}

View file

@ -31,9 +31,9 @@ import (
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/config"
@ -125,6 +125,7 @@ func TestFailedStartupExitCode(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
fakeInputFile := "fake-input-file"
expectedExitStatus := 2
@ -211,83 +212,125 @@ func TestWALSegmentSizeBounds(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} {
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
for _, tc := range []struct {
size string
exitCode int
}{
{
size: "9MB",
exitCode: 1,
},
{
size: "257MB",
exitCode: 1,
},
{
size: "10",
exitCode: 2,
},
{
size: "1GB",
exitCode: 1,
},
{
size: "12MB",
exitCode: 0,
},
} {
t.Run(tc.size, func(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.wal-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
err = prom.Start()
require.NoError(t, err)
err = prom.Start()
require.NoError(t, err)
if expectedExitStatus == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
if tc.exitCode == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
}
return
}
continue
}
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, expectedExitStatus, status.ExitStatus())
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, tc.exitCode, status.ExitStatus())
})
}
}
func TestMaxBlockChunkSegmentSizeBounds(t *testing.T) {
t.Parallel()
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
for size, expectedExitStatus := range map[string]int{"512KB": 1, "1MB": 0} {
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
for _, tc := range []struct {
size string
exitCode int
}{
{
size: "512KB",
exitCode: 1,
},
{
size: "1MB",
exitCode: 0,
},
} {
t.Run(tc.size, func(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--storage.tsdb.max-block-chunk-segment-size="+tc.size, "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig, "--storage.tsdb.path="+filepath.Join(t.TempDir(), "data"))
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
// Log stderr in case of failure.
stderr, err := prom.StderrPipe()
require.NoError(t, err)
go func() {
slurp, _ := io.ReadAll(stderr)
t.Log(string(slurp))
}()
err = prom.Start()
require.NoError(t, err)
err = prom.Start()
require.NoError(t, err)
if expectedExitStatus == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
if tc.exitCode == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
require.Fail(t, "prometheus should be still running: %v", err)
case <-time.After(startupTime):
prom.Process.Kill()
<-done
}
return
}
continue
}
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, expectedExitStatus, status.ExitStatus())
err = prom.Wait()
require.Error(t, err)
var exitError *exec.ExitError
require.ErrorAs(t, err, &exitError)
status := exitError.Sys().(syscall.WaitStatus)
require.Equal(t, tc.exitCode, status.ExitStatus())
})
}
}
@ -295,7 +338,7 @@ func TestTimeMetrics(t *testing.T) {
tmpDir := t.TempDir()
reg := prometheus.NewRegistry()
db, err := openDBWithMetrics(tmpDir, log.NewNopLogger(), reg, nil, nil)
db, err := openDBWithMetrics(tmpDir, promslog.NewNopLogger(), reg, nil, nil)
require.NoError(t, err)
defer func() {
require.NoError(t, db.Close())
@ -353,6 +396,8 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
}
func TestAgentSuccessfulStartup(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+agentConfig)
require.NoError(t, prom.Start())
@ -371,6 +416,8 @@ func TestAgentSuccessfulStartup(t *testing.T) {
}
func TestAgentFailedStartupWithServerFlag(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--agent", "--storage.tsdb.path=.", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
output := bytes.Buffer{}
@ -398,6 +445,8 @@ func TestAgentFailedStartupWithServerFlag(t *testing.T) {
}
func TestAgentFailedStartupWithInvalidConfig(t *testing.T) {
t.Parallel()
prom := exec.Command(promPath, "-test.main", "--agent", "--web.listen-address=0.0.0.0:0", "--config.file="+promConfig)
require.NoError(t, prom.Start())
@ -419,6 +468,7 @@ func TestModeSpecificFlags(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
testcases := []struct {
mode string
@ -433,6 +483,7 @@ func TestModeSpecificFlags(t *testing.T) {
for _, tc := range testcases {
t.Run(fmt.Sprintf("%s mode with option %s", tc.mode, tc.arg), func(t *testing.T) {
t.Parallel()
args := []string{"-test.main", tc.arg, t.TempDir(), "--web.listen-address=0.0.0.0:0"}
if tc.mode == "agent" {
@ -484,6 +535,8 @@ func TestDocumentation(t *testing.T) {
if runtime.GOOS == "windows" {
t.SkipNow()
}
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
@ -508,6 +561,8 @@ func TestDocumentation(t *testing.T) {
}
func TestRwProtoMsgFlagParser(t *testing.T) {
t.Parallel()
defaultOpts := config.RemoteWriteProtoMsgs{
config.RemoteWriteProtoMsgV1, config.RemoteWriteProtoMsgV2,
}

View file

@ -34,6 +34,7 @@ func TestStartupInterrupt(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t))

View file

@ -125,12 +125,61 @@ func (p *queryLogTest) query(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 200, r.StatusCode)
case ruleOrigin:
time.Sleep(2 * time.Second)
// Poll the /api/v1/rules endpoint until a new rule evaluation is detected.
var lastEvalTime time.Time
for {
r, err := http.Get(fmt.Sprintf("http://%s:%d/api/v1/rules", p.host, p.port))
require.NoError(t, err)
rulesBody, err := io.ReadAll(r.Body)
require.NoError(t, err)
defer r.Body.Close()
// Parse the rules response to find the last evaluation time.
newEvalTime := parseLastEvaluation(rulesBody)
if newEvalTime.After(lastEvalTime) {
if !lastEvalTime.IsZero() {
break
}
lastEvalTime = newEvalTime
}
time.Sleep(100 * time.Millisecond)
}
default:
panic("can't query this origin")
}
}
// parseLastEvaluation extracts the last evaluation timestamp from the /api/v1/rules response.
func parseLastEvaluation(rulesBody []byte) time.Time {
var ruleResponse struct {
Status string `json:"status"`
Data struct {
Groups []struct {
Rules []struct {
LastEvaluation string `json:"lastEvaluation"`
} `json:"rules"`
} `json:"groups"`
} `json:"data"`
}
err := json.Unmarshal(rulesBody, &ruleResponse)
if err != nil {
return time.Time{}
}
for _, group := range ruleResponse.Data.Groups {
for _, rule := range group.Rules {
if evalTime, err := time.Parse(time.RFC3339Nano, rule.LastEvaluation); err == nil {
return evalTime
}
}
}
return time.Time{}
}
// queryString returns the expected queryString of a this test.
func (p *queryLogTest) queryString() string {
switch p.origin {
@ -322,7 +371,7 @@ func (p *queryLogTest) run(t *testing.T) {
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
require.GreaterOrEqual(t, len(ql), qc, "no queries logged")
}
p.validateLastQuery(t, ql)
qc = len(ql)
@ -353,7 +402,7 @@ func (p *queryLogTest) run(t *testing.T) {
if p.exactQueryCount() {
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
require.GreaterOrEqual(t, len(ql), qc, "no queries logged")
}
p.validateLastQuery(t, ql)
@ -393,6 +442,7 @@ func readQueryLog(t *testing.T, path string) []queryLogLine {
file, err := os.Open(path)
require.NoError(t, err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
var q queryLogLine
@ -406,6 +456,7 @@ func TestQueryLog(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
t.Parallel()
cwd, err := os.Getwd()
require.NoError(t, err)
@ -424,6 +475,7 @@ func TestQueryLog(t *testing.T) {
}
t.Run(p.String(), func(t *testing.T) {
t.Parallel()
p.run(t)
})
}

View file

@ -21,9 +21,10 @@ import (
"math"
"time"
"github.com/go-kit/log"
"github.com/oklog/ulid"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/textparse"
"github.com/prometheus/prometheus/tsdb"
@ -120,7 +121,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
// also need to append samples throughout the whole block range. To allow that, we
// pretend that the block is twice as large here, but only really add sample in the
// original interval later.
w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, 2*blockDuration)
w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), outputDir, 2*blockDuration)
if err != nil {
return fmt.Errorf("block writer: %w", err)
}

View file

@ -32,13 +32,13 @@ import (
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/go-kit/log"
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
"github.com/prometheus/exporter-toolkit/web"
"gopkg.in/yaml.v2"
@ -58,6 +58,7 @@ import (
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/promql/promqltest"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/util/documentcli"
)
@ -216,6 +217,7 @@ func main() {
"test-rule-file",
"The unit test file.",
).Required().ExistingFiles()
testRulesDebug := testRulesCmd.Flag("debug", "Enable unit test debugging.").Default("false").Bool()
testRulesDiff := testRulesCmd.Flag("diff", "[Experimental] Print colored differential output between expected & received output.").Default("false").Bool()
defaultDBPath := "data/"
@ -291,7 +293,7 @@ func main() {
promQLLabelsDeleteQuery := promQLLabelsDeleteCmd.Arg("query", "PromQL query.").Required().String()
promQLLabelsDeleteName := promQLLabelsDeleteCmd.Arg("name", "Name of the label to delete.").Required().String()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details.").Default("").Strings()
featureList := app.Flag("enable-feature", "Comma separated feature names to enable. Currently unused.").Default("").Strings()
documentationCmd := app.Command("write-documentation", "Generate command line documentation. Internal use.").Hidden()
@ -321,24 +323,21 @@ func main() {
}
}
var noDefaultScrapePort bool
for _, f := range *featureList {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
case "no-default-scrape-port":
noDefaultScrapePort = true
case "":
continue
default:
fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o)
fmt.Printf(" WARNING: --enable-feature is currently a no-op")
}
}
}
switch parsedCmd {
case sdCheckCmd.FullCommand():
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
@ -394,6 +393,7 @@ func main() {
},
*testRulesRun,
*testRulesDiff,
*testRulesDebug,
*testRulesFiles...),
)
@ -578,7 +578,7 @@ func checkFileExists(fn string) error {
func checkConfig(agentMode bool, filename string, checkSyntaxOnly bool) ([]string, error) {
fmt.Println("Checking", filename)
cfg, err := config.LoadFile(filename, agentMode, false, log.NewNopLogger())
cfg, err := config.LoadFile(filename, agentMode, promslog.NewNopLogger())
if err != nil {
return nil, err
}
@ -898,30 +898,30 @@ func compare(a, b compareRuleType) int {
func checkDuplicates(groups []rulefmt.RuleGroup) []compareRuleType {
var duplicates []compareRuleType
var rules compareRuleTypes
var cRules compareRuleTypes
for _, group := range groups {
for _, rule := range group.Rules {
rules = append(rules, compareRuleType{
cRules = append(cRules, compareRuleType{
metric: ruleMetric(rule),
label: labels.FromMap(rule.Labels),
label: rules.FromMaps(group.Labels, rule.Labels),
})
}
}
if len(rules) < 2 {
if len(cRules) < 2 {
return duplicates
}
sort.Sort(rules)
sort.Sort(cRules)
last := rules[0]
for i := 1; i < len(rules); i++ {
if compare(last, rules[i]) == 0 {
last := cRules[0]
for i := 1; i < len(cRules); i++ {
if compare(last, cRules[i]) == 0 {
// Don't add a duplicated rule multiple times.
if len(duplicates) == 0 || compare(last, duplicates[len(duplicates)-1]) != 0 {
duplicates = append(duplicates, rules[i])
duplicates = append(duplicates, cRules[i])
}
}
last = rules[i]
last = cRules[i]
}
return duplicates
@ -1185,7 +1185,7 @@ func importRules(url *url.URL, roundTripper http.RoundTripper, start, end, outpu
return fmt.Errorf("new api client error: %w", err)
}
ruleImporter := newRuleImporter(log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)), cfg, api)
ruleImporter := newRuleImporter(promslog.New(&promslog.Config{}), cfg, api)
errs := ruleImporter.loadGroups(ctx, files)
for _, err := range errs {
if err != nil {
@ -1219,7 +1219,7 @@ func checkTargetGroupsForScrapeConfig(targetGroups []*targetgroup.Group, scfg *c
lb := labels.NewBuilder(labels.EmptyLabels())
for _, tg := range targetGroups {
var failures []error
targets, failures = scrape.TargetsFromGroup(tg, scfg, false, targets, lb)
targets, failures = scrape.TargetsFromGroup(tg, scfg, targets, lb)
if len(failures) > 0 {
first := failures[0]
return first

View file

@ -146,7 +146,7 @@ func TestCheckSDFile(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
_, err := checkSDFile(test.file)
if test.err != "" {
require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -228,7 +228,7 @@ func TestCheckTargetConfig(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
require.Equalf(t, test.err, err.Error(), "Expected error %q, got %q", test.err, err.Error())
require.EqualErrorf(t, err, test.err, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -315,7 +315,7 @@ func TestCheckConfigSyntax(t *testing.T) {
expectedErrMsg = test.errWindows
}
if expectedErrMsg != "" {
require.Equalf(t, expectedErrMsg, err.Error(), "Expected error %q, got %q", test.err, err.Error())
require.EqualErrorf(t, err, expectedErrMsg, "Expected error %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)
@ -345,7 +345,7 @@ func TestAuthorizationConfig(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
_, err := checkConfig(false, "testdata/"+test.file, false)
if test.err != "" {
require.Contains(t, err.Error(), test.err, "Expected error to contain %q, got %q", test.err, err.Error())
require.ErrorContains(t, err, test.err, "Expected error to contain %q, got %q", test.err, err.Error())
return
}
require.NoError(t, err)

View file

@ -16,12 +16,12 @@ package main
import (
"context"
"fmt"
"log/slog"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/timestamp"
@ -38,7 +38,7 @@ type queryRangeAPI interface {
}
type ruleImporter struct {
logger log.Logger
logger *slog.Logger
config ruleImporterConfig
apiClient queryRangeAPI
@ -57,8 +57,8 @@ type ruleImporterConfig struct {
// newRuleImporter creates a new rule importer that can be used to parse and evaluate recording rule files and create new series
// written to disk in blocks.
func newRuleImporter(logger log.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter {
level.Info(logger).Log("backfiller", "new rule importer", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822))
func newRuleImporter(logger *slog.Logger, config ruleImporterConfig, apiClient queryRangeAPI) *ruleImporter {
logger.Info("new rule importer", "component", "backfiller", "start", config.start.Format(time.RFC822), "end", config.end.Format(time.RFC822))
return &ruleImporter{
logger: logger,
config: config,
@ -80,10 +80,10 @@ func (importer *ruleImporter) loadGroups(_ context.Context, filenames []string)
// importAll evaluates all the recording rules and creates new time series and writes them to disk in blocks.
func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) {
for name, group := range importer.groups {
level.Info(importer.logger).Log("backfiller", "processing group", "name", name)
importer.logger.Info("processing group", "component", "backfiller", "name", name)
for i, r := range group.Rules() {
level.Info(importer.logger).Log("backfiller", "processing rule", "id", i, "name", r.Name())
importer.logger.Info("processing rule", "component", "backfiller", "id", i, "name", r.Name())
if err := importer.importRule(ctx, r.Query().String(), r.Name(), r.Labels(), importer.config.start, importer.config.end, int64(importer.config.maxBlockDuration/time.Millisecond), group); err != nil {
errs = append(errs, err)
}
@ -124,7 +124,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
return fmt.Errorf("query range: %w", err)
}
if warnings != nil {
level.Warn(importer.logger).Log("msg", "Range query returned warnings.", "warnings", warnings)
importer.logger.Warn("Range query returned warnings.", "warnings", warnings)
}
// To prevent races with compaction, a block writer only allows appending samples
@ -133,7 +133,7 @@ func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName
// also need to append samples throughout the whole block range. To allow that, we
// pretend that the block is twice as large here, but only really add sample in the
// original interval later.
w, err := tsdb.NewBlockWriter(log.NewNopLogger(), importer.config.outputDir, 2*blockDuration)
w, err := tsdb.NewBlockWriter(promslog.NewNopLogger(), importer.config.outputDir, 2*blockDuration)
if err != nil {
return fmt.Errorf("new block writer: %w", err)
}

View file

@ -21,9 +21,9 @@ import (
"testing"
"time"
"github.com/go-kit/log"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
@ -161,7 +161,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
}
func newTestRuleImporter(_ context.Context, start time.Time, tmpDir string, testSamples model.Matrix, maxBlockDuration time.Duration) (*ruleImporter, error) {
logger := log.NewNopLogger()
logger := promslog.NewNopLogger()
cfg := ruleImporterConfig{
outputDir: tmpDir,
start: start.Add(-10 * time.Hour),

View file

@ -20,9 +20,9 @@ import (
"os"
"time"
"github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@ -38,10 +38,10 @@ type sdCheckResult struct {
}
// CheckSD performs service discovery for the given job name and reports the results.
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int {
logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, registerer prometheus.Registerer) int {
logger := promslog.New(&promslog.Config{})
cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
cfg, err := config.LoadFile(sdConfigFiles, false, logger)
if err != nil {
fmt.Fprintln(os.Stderr, "Cannot load config", err)
return failureExitCode
@ -114,7 +114,7 @@ outerLoop:
}
results := []sdCheckResult{}
for _, tgs := range sdCheckResults {
results = append(results, getSDCheckResult(tgs, scrapeConfig, noDefaultScrapePort)...)
results = append(results, getSDCheckResult(tgs, scrapeConfig)...)
}
res, err := json.MarshalIndent(results, "", " ")
@ -127,7 +127,7 @@ outerLoop:
return successExitCode
}
func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig, noDefaultScrapePort bool) []sdCheckResult {
func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult {
sdCheckResults := []sdCheckResult{}
lb := labels.NewBuilder(labels.EmptyLabels())
for _, targetGroup := range targetGroups {
@ -144,7 +144,7 @@ func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.Sc
}
}
res, orig, err := scrape.PopulateLabels(lb, scrapeConfig, noDefaultScrapePort)
res, orig, err := scrape.PopulateLabels(lb, scrapeConfig)
result := sdCheckResult{
DiscoveredLabels: orig,
Labels: res,

View file

@ -70,5 +70,5 @@ func TestSDCheckResult(t *testing.T) {
},
}
testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig, true))
testutil.RequireEqual(t, expectedSDCheckResult, getSDCheckResult(targetGroups, scrapeConfig))
}

View file

@ -6,7 +6,7 @@ scrape_configs:
alerting:
alertmanagers:
- scheme: http
api_version: v1
api_version: v2
file_sd_configs:
- files:
- nonexistent_file.yml

View file

@ -69,13 +69,13 @@ tests:
eval_time: 2m
exp_samples:
- labels: "test_histogram_repeat"
histogram: "{{count:2 sum:3 buckets:[2]}}"
histogram: "{{count:2 sum:3 counter_reset_hint:not_reset buckets:[2]}}"
- expr: test_histogram_increase
eval_time: 2m
exp_samples:
- labels: "test_histogram_increase"
histogram: "{{count:4 sum:5.6 buckets:[4]}}"
histogram: "{{count:4 sum:5.6 counter_reset_hint:not_reset buckets:[4]}}"
# Ensure a value is stale as soon as it is marked as such.
- expr: test_stale

View file

@ -20,6 +20,7 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"runtime"
@ -32,9 +33,10 @@ import (
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"go.uber.org/atomic"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/storage"
@ -60,7 +62,7 @@ type writeBenchmark struct {
memprof *os.File
blockprof *os.File
mtxprof *os.File
logger log.Logger
logger *slog.Logger
}
func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) error {
@ -68,7 +70,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
outPath: outPath,
samplesFile: samplesFile,
numMetrics: numMetrics,
logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)),
logger: promslog.New(&promslog.Config{}),
}
if b.outPath == "" {
dir, err := os.MkdirTemp("", "tsdb_bench")
@ -87,9 +89,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
dir := filepath.Join(b.outPath, "storage")
l := log.With(b.logger, "ts", log.DefaultTimestampUTC, "caller", log.DefaultCaller)
st, err := tsdb.Open(dir, l, nil, &tsdb.Options{
st, err := tsdb.Open(dir, b.logger, nil, &tsdb.Options{
RetentionDuration: int64(15 * 24 * time.Hour / time.Millisecond),
MinBlockDuration: int64(2 * time.Hour / time.Millisecond),
}, tsdb.NewDBStats())
@ -367,25 +367,25 @@ func printBlocks(blocks []tsdb.BlockReader, writeHeader, humanReadable bool) {
fmt.Fprintf(tw,
"%v\t%v\t%v\t%v\t%v\t%v\t%v\t%v\n",
meta.ULID,
getFormatedTime(meta.MinTime, humanReadable),
getFormatedTime(meta.MaxTime, humanReadable),
getFormattedTime(meta.MinTime, humanReadable),
getFormattedTime(meta.MaxTime, humanReadable),
time.Duration(meta.MaxTime-meta.MinTime)*time.Millisecond,
meta.Stats.NumSamples,
meta.Stats.NumChunks,
meta.Stats.NumSeries,
getFormatedBytes(b.Size(), humanReadable),
getFormattedBytes(b.Size(), humanReadable),
)
}
}
func getFormatedTime(timestamp int64, humanReadable bool) string {
func getFormattedTime(timestamp int64, humanReadable bool) string {
if humanReadable {
return time.Unix(timestamp/1000, 0).UTC().String()
}
return strconv.FormatInt(timestamp, 10)
}
func getFormatedBytes(bytes int64, humanReadable bool) string {
func getFormattedBytes(bytes int64, humanReadable bool) string {
if humanReadable {
return units.Base2Bytes(bytes).String()
}

View file

@ -26,13 +26,13 @@ import (
"strings"
"time"
"github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/grafana/regexp"
"github.com/nsf/jsondiff"
"gopkg.in/yaml.v2"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@ -46,11 +46,11 @@ import (
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, files...)
func RulesUnitTest(queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int {
return RulesUnitTestResult(io.Discard, queryOpts, runStrings, diffFlag, debug, files...)
}
func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag bool, files ...string) int {
func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts, runStrings []string, diffFlag, debug bool, files ...string) int {
failed := false
junit := &junitxml.JUnitXML{}
@ -60,7 +60,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts,
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag, junit.Suite(f)); errs != nil {
if errs := ruleUnitTest(f, queryOpts, run, diffFlag, debug, junit.Suite(f)); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -82,7 +82,7 @@ func RulesUnitTestResult(results io.Writer, queryOpts promqltest.LazyLoaderOpts,
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag bool, ts *junitxml.TestSuite) []error {
func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *regexp.Regexp, diffFlag, debug bool, ts *junitxml.TestSuite) []error {
b, err := os.ReadFile(filename)
if err != nil {
ts.Abort(err)
@ -131,7 +131,7 @@ func ruleUnitTest(filename string, queryOpts promqltest.LazyLoaderOpts, run *reg
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
ers := t.test(evalInterval, groupOrderMap, queryOpts, diffFlag, unitTestInp.RuleFiles...)
ers := t.test(testname, evalInterval, groupOrderMap, queryOpts, diffFlag, debug, unitTestInp.RuleFiles...)
if ers != nil {
for _, e := range ers {
tc.Fail(e.Error())
@ -198,7 +198,14 @@ type testGroup struct {
}
// test performs the unit tests.
func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag bool, ruleFiles ...string) (outErr []error) {
func (tg *testGroup) test(testname string, evalInterval time.Duration, groupOrderMap map[string]int, queryOpts promqltest.LazyLoaderOpts, diffFlag, debug bool, ruleFiles ...string) (outErr []error) {
if debug {
testStart := time.Now()
fmt.Printf("DEBUG: Starting test %s\n", testname)
defer func() {
fmt.Printf("DEBUG: Test %s finished, took %v\n", testname, time.Since(testStart))
}()
}
// Setup testing suite.
suite, err := promqltest.NewLazyLoader(tg.seriesLoadingString(), queryOpts)
if err != nil {
@ -218,7 +225,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
Appendable: suite.Storage(),
Context: context.Background(),
NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {},
Logger: log.NewNopLogger(),
Logger: promslog.NewNopLogger(),
}
m := rules.NewManager(opts)
groupsMap, ers := m.LoadGroups(time.Duration(tg.Interval), tg.ExternalLabels, tg.ExternalURL, nil, ruleFiles...)
@ -482,6 +489,32 @@ Outer:
}
}
if debug {
ts := tg.maxEvalTime()
// Potentially a test can be specified at a time with fractional seconds,
// which PromQL cannot represent, so round up to the next whole second.
ts = (ts + time.Second).Truncate(time.Second)
expr := fmt.Sprintf(`{__name__=~".+"}[%v]`, ts)
q, err := suite.QueryEngine().NewInstantQuery(context.Background(), suite.Queryable(), nil, expr, mint.Add(ts))
if err != nil {
fmt.Printf("DEBUG: Failed querying, expr: %q, err: %v\n", expr, err)
return errs
}
res := q.Exec(suite.Context())
if res.Err != nil {
fmt.Printf("DEBUG: Failed query exec, expr: %q, err: %v\n", expr, res.Err)
return errs
}
switch v := res.Value.(type) {
case promql.Matrix:
fmt.Printf("DEBUG: Dump of all data (input_series and rules) at %v:\n", ts)
fmt.Println(v.String())
default:
fmt.Printf("DEBUG: Got unexpected type %T\n", v)
return errs
}
}
if len(errs) > 0 {
return errs
}

View file

@ -141,14 +141,14 @@ func TestRulesUnitTest(t *testing.T) {
reuseCount[tt.want] += len(tt.args.files)
}
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, nil, false, tt.args.files...); got != tt.want {
if got := RulesUnitTest(tt.queryOpts, nil, false, false, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
}
t.Run("Junit xml output ", func(t *testing.T) {
var buf bytes.Buffer
if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, reuseFiles...); got != 1 {
if got := RulesUnitTestResult(&buf, promqltest.LazyLoaderOpts{}, nil, false, false, reuseFiles...); got != 1 {
t.Errorf("RulesUnitTestResults() = %v, want 1", got)
}
var test junitxml.JUnitXML
@ -230,7 +230,7 @@ func TestRulesUnitTestRun(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := RulesUnitTest(tt.queryOpts, tt.args.run, false, tt.args.files...)
got := RulesUnitTest(tt.queryOpts, tt.args.run, false, false, tt.args.files...)
require.Equal(t, tt.want, got)
})
}

View file

@ -16,6 +16,8 @@ package config
import (
"errors"
"fmt"
"log/slog"
"mime"
"net/url"
"os"
"path/filepath"
@ -25,8 +27,6 @@ import (
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -73,7 +73,7 @@ const (
)
// Load parses the YAML input s into a Config.
func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, error) {
func Load(s string, logger *slog.Logger) (*Config, error) {
cfg := &Config{}
// If the entire config body is empty the UnmarshalYAML method is
// never called. We thus have to set the DefaultConfig at the entry
@ -85,10 +85,6 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
return nil, err
}
if !expandExternalLabels {
return cfg, nil
}
b := labels.NewScratchBuilder(0)
cfg.GlobalConfig.ExternalLabels.Range(func(v labels.Label) {
newV := os.Expand(v.Value, func(s string) string {
@ -98,26 +94,28 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
if v := os.Getenv(s); v != "" {
return v
}
level.Warn(logger).Log("msg", "Empty environment variable", "name", s)
logger.Warn("Empty environment variable", "name", s)
return ""
})
if newV != v.Value {
level.Debug(logger).Log("msg", "External label replaced", "label", v.Name, "input", v.Value, "output", newV)
logger.Debug("External label replaced", "label", v.Name, "input", v.Value, "output", newV)
}
// Note newV can be blank. https://github.com/prometheus/prometheus/issues/11024
b.Add(v.Name, newV)
})
cfg.GlobalConfig.ExternalLabels = b.Labels()
if !b.Labels().IsEmpty() {
cfg.GlobalConfig.ExternalLabels = b.Labels()
}
return cfg, nil
}
// LoadFile parses the given YAML file into a Config.
func LoadFile(filename string, agentMode, expandExternalLabels bool, logger log.Logger) (*Config, error) {
func LoadFile(filename string, agentMode bool, logger *slog.Logger) (*Config, error) {
content, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
cfg, err := Load(string(content), expandExternalLabels, logger)
cfg, err := Load(string(content), logger)
if err != nil {
return nil, fmt.Errorf("parsing YAML file %s: %w", filename, err)
}
@ -166,13 +164,13 @@ var (
// DefaultScrapeConfig is the default scrape configuration.
DefaultScrapeConfig = ScrapeConfig{
// ScrapeTimeout, ScrapeInterval and ScrapeProtocols default to the configured globals.
ScrapeClassicHistograms: false,
MetricsPath: "/metrics",
Scheme: "http",
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
EnableCompression: true,
AlwaysScrapeClassicHistograms: false,
MetricsPath: "/metrics",
Scheme: "http",
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
EnableCompression: true,
}
// DefaultAlertmanagerConfig is the default alertmanager configuration.
@ -183,13 +181,18 @@ var (
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
DefaultRemoteWriteHTTPClientConfig = config.HTTPClientConfig{
FollowRedirects: true,
EnableHTTP2: false,
}
// DefaultRemoteWriteConfig is the default remote write configuration.
DefaultRemoteWriteConfig = RemoteWriteConfig{
RemoteTimeout: model.Duration(30 * time.Second),
ProtobufMessage: RemoteWriteProtoMsgV1,
QueueConfig: DefaultQueueConfig,
MetadataConfig: DefaultMetadataConfig,
HTTPClientConfig: config.DefaultHTTPClientConfig,
HTTPClientConfig: DefaultRemoteWriteHTTPClientConfig,
}
// DefaultQueueConfig is the default remote queue configuration.
@ -476,9 +479,22 @@ func (s ScrapeProtocol) Validate() error {
return nil
}
// HeaderMediaType returns the MIME mediaType for a particular ScrapeProtocol.
func (s ScrapeProtocol) HeaderMediaType() string {
if _, ok := ScrapeProtocolsHeaders[s]; !ok {
return ""
}
mediaType, _, err := mime.ParseMediaType(ScrapeProtocolsHeaders[s])
if err != nil {
return ""
}
return mediaType
}
var (
PrometheusProto ScrapeProtocol = "PrometheusProto"
PrometheusText0_0_4 ScrapeProtocol = "PrometheusText0.0.4"
PrometheusText1_0_0 ScrapeProtocol = "PrometheusText1.0.0"
OpenMetricsText0_0_1 ScrapeProtocol = "OpenMetricsText0.0.1"
OpenMetricsText1_0_0 ScrapeProtocol = "OpenMetricsText1.0.0"
UTF8NamesHeader string = model.EscapingKey + "=" + model.AllowUTF8
@ -486,6 +502,7 @@ var (
ScrapeProtocolsHeaders = map[ScrapeProtocol]string{
PrometheusProto: "application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited",
PrometheusText0_0_4: "text/plain;version=0.0.4",
PrometheusText1_0_0: "text/plain;version=1.0.0;escaping=allow-utf-8",
OpenMetricsText0_0_1: "application/openmetrics-text;version=0.0.1",
OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0",
}
@ -495,6 +512,7 @@ var (
DefaultScrapeProtocols = []ScrapeProtocol{
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText1_0_0,
PrometheusText0_0_4,
}
@ -506,6 +524,7 @@ var (
PrometheusProto,
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText1_0_0,
PrometheusText0_0_4,
}
)
@ -632,10 +651,17 @@ type ScrapeConfig struct {
// The protocols to negotiate during a scrape. It tells clients what
// protocol are accepted by Prometheus and with what preference (most wanted is first).
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
// OpenMetricsText1.0.0, PrometheusText0.0.4.
// OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4.
ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"`
// Whether to scrape a classic histogram that is also exposed as a native histogram.
ScrapeClassicHistograms bool `yaml:"scrape_classic_histograms,omitempty"`
// The fallback protocol to use if the Content-Type provided by the target
// is not provided, blank, or not one of the expected values.
// Supported values (case sensitive): PrometheusProto, OpenMetricsText0.0.1,
// OpenMetricsText1.0.0, PrometheusText1.0.0, PrometheusText0.0.4.
ScrapeFallbackProtocol ScrapeProtocol `yaml:"fallback_scrape_protocol,omitempty"`
// Whether to scrape a classic histogram, even if it is also exposed as a native histogram.
AlwaysScrapeClassicHistograms bool `yaml:"always_scrape_classic_histograms,omitempty"`
// Whether to convert all scraped classic histograms into a native histogram with custom buckets.
ConvertClassicHistogramsToNHCB bool `yaml:"convert_classic_histograms_to_nhcb,omitempty"`
// File to which scrape failures are logged.
ScrapeFailureLogFile string `yaml:"scrape_failure_log_file,omitempty"`
// The HTTP resource path on which to fetch metrics from targets.
@ -783,6 +809,12 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
return fmt.Errorf("%w for scrape config with job name %q", err, c.JobName)
}
if c.ScrapeFallbackProtocol != "" {
if err := c.ScrapeFallbackProtocol.Validate(); err != nil {
return fmt.Errorf("invalid fallback_scrape_protocol for scrape config with job name %q: %w", c.JobName, err)
}
}
switch globalConfig.MetricNameValidationScheme {
case LegacyValidationConfig:
case "", UTF8ValidationConfig:
@ -958,6 +990,7 @@ func (a AlertmanagerConfigs) ToMap() map[string]*AlertmanagerConfig {
// AlertmanagerAPIVersion represents a version of the
// github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'.
// 'v1' is no longer supported.
type AlertmanagerAPIVersion string
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -987,7 +1020,7 @@ const (
)
var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{
AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2,
AlertmanagerAPIVersionV2,
}
// AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with.

View file

@ -24,10 +24,10 @@ import (
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -142,7 +142,7 @@ var expectedConf = &Config{
},
},
FollowRedirects: true,
EnableHTTP2: true,
EnableHTTP2: false,
},
},
{
@ -158,7 +158,7 @@ var expectedConf = &Config{
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
},
FollowRedirects: true,
EnableHTTP2: true,
EnableHTTP2: false,
},
Headers: map[string]string{"name": "value"},
},
@ -206,19 +206,20 @@ var expectedConf = &Config{
{
JobName: "prometheus",
HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFailureLogFile: "testdata/fail_prom.log",
HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
LabelLimit: globLabelLimit,
LabelNameLengthLimit: globLabelNameLengthLimit,
LabelValueLengthLimit: globLabelValueLengthLimit,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
ScrapeFallbackProtocol: PrometheusText0_0_4,
ScrapeFailureLogFile: "testdata/fail_prom.log",
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
@ -1500,8 +1501,13 @@ var expectedConf = &Config{
},
}
func TestYAMLNotLongerSupportedAMApi(t *testing.T) {
_, err := LoadFile("testdata/config_with_no_longer_supported_am_api_config.yml", false, promslog.NewNopLogger())
require.Error(t, err)
}
func TestYAMLRoundtrip(t *testing.T) {
want, err := LoadFile("testdata/roundtrip.good.yml", false, false, log.NewNopLogger())
want, err := LoadFile("testdata/roundtrip.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@ -1514,7 +1520,7 @@ func TestYAMLRoundtrip(t *testing.T) {
}
func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, false, log.NewNopLogger())
want, err := LoadFile("testdata/remote_write_retry_on_rate_limit.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@ -1529,7 +1535,7 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
func TestOTLPSanitizeResourceAttributes(t *testing.T) {
t.Run("good config", func(t *testing.T) {
want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, false, log.NewNopLogger())
want, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.good.yml"), false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@ -1541,7 +1547,7 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) {
})
t.Run("bad config", func(t *testing.T) {
_, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, false, log.NewNopLogger())
_, err := LoadFile(filepath.Join("testdata", "otlp_sanitize_resource_attributes.bad.yml"), false, promslog.NewNopLogger())
require.ErrorContains(t, err, `duplicated promoted OTel resource attribute "k8s.job.name"`)
require.ErrorContains(t, err, `empty promoted OTel resource attribute`)
})
@ -1550,16 +1556,17 @@ func TestOTLPSanitizeResourceAttributes(t *testing.T) {
func TestLoadConfig(t *testing.T) {
// Parse a valid file that sets a global scrape timeout. This tests whether parsing
// an overwritten default field in the global config permanently changes the default.
_, err := LoadFile("testdata/global_timeout.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/global_timeout.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger())
c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
require.Equal(t, expectedConf, c)
}
func TestScrapeIntervalLarger(t *testing.T) {
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger())
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.ScrapeConfigs, 1)
for _, sc := range c.ScrapeConfigs {
@ -1569,7 +1576,7 @@ func TestScrapeIntervalLarger(t *testing.T) {
// YAML marshaling must not reveal authentication credentials.
func TestElideSecrets(t *testing.T) {
c, err := LoadFile("testdata/conf.good.yml", false, false, log.NewNopLogger())
c, err := LoadFile("testdata/conf.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
secretRe := regexp.MustCompile(`\\u003csecret\\u003e|<secret>`)
@ -1586,31 +1593,31 @@ func TestElideSecrets(t *testing.T) {
func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) {
// Parse a valid file that sets a rule files with an absolute path
c, err := LoadFile(ruleFilesConfigFile, false, false, log.NewNopLogger())
c, err := LoadFile(ruleFilesConfigFile, false, promslog.NewNopLogger())
require.NoError(t, err)
require.Equal(t, ruleFilesExpectedConf, c)
}
func TestKubernetesEmptyAPIServer(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
func TestKubernetesWithKubeConfig(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/kubernetes_kubeconfig_without_apiserver.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
func TestKubernetesSelectors(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, false, log.NewNopLogger())
_, err := LoadFile("testdata/kubernetes_selectors_endpoints.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_node.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_ingress.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_pod.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
_, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, false, log.NewNopLogger())
_, err = LoadFile("testdata/kubernetes_selectors_service.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
}
@ -2080,12 +2087,20 @@ var expectedErrors = []struct {
},
{
filename: "scrape_config_files_scrape_protocols.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4] for scrape config with job name "node"`,
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols.bad.yml: scrape_protocols: unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0] for scrape config with job name "node"`,
},
{
filename: "scrape_config_files_scrape_protocols2.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_scrape_protocols2.bad.yml: duplicated protocol in scrape_protocols, got [OpenMetricsText1.0.0 PrometheusProto OpenMetricsText1.0.0] for scrape config with job name "node"`,
},
{
filename: "scrape_config_files_fallback_scrape_protocol1.bad.yml",
errMsg: `parsing YAML file testdata/scrape_config_files_fallback_scrape_protocol1.bad.yml: invalid fallback_scrape_protocol for scrape config with job name "node": unknown scrape protocol prometheusproto, supported: [OpenMetricsText0.0.1 OpenMetricsText1.0.0 PrometheusProto PrometheusText0.0.4 PrometheusText1.0.0]`,
},
{
filename: "scrape_config_files_fallback_scrape_protocol2.bad.yml",
errMsg: `unmarshal errors`,
},
}
func TestBadConfigs(t *testing.T) {
@ -2094,9 +2109,8 @@ func TestBadConfigs(t *testing.T) {
model.NameValidationScheme = model.UTF8Validation
}()
for _, ee := range expectedErrors {
_, err := LoadFile("testdata/"+ee.filename, false, false, log.NewNopLogger())
require.Error(t, err, "%s", ee.filename)
require.Contains(t, err.Error(), ee.errMsg,
_, err := LoadFile("testdata/"+ee.filename, false, promslog.NewNopLogger())
require.ErrorContains(t, err, ee.errMsg,
"Expected error for %s to contain %q but got: %s", ee.filename, ee.errMsg, err)
}
}
@ -2126,7 +2140,7 @@ func TestBadStaticConfigsYML(t *testing.T) {
}
func TestEmptyConfig(t *testing.T) {
c, err := Load("", false, log.NewNopLogger())
c, err := Load("", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
require.Equal(t, exp, *c)
@ -2136,38 +2150,34 @@ func TestExpandExternalLabels(t *testing.T) {
// Cleanup ant TEST env variable that could exist on the system.
os.Setenv("TEST", "")
c, err := LoadFile("testdata/external_labels.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foo${TEST}bar", "foo", "${TEST}", "qux", "foo$${TEST}", "xyz", "foo$$bar"), c.GlobalConfig.ExternalLabels)
c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
c, err := LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "foobar", "foo", "", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels)
os.Setenv("TEST", "TestValue")
c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
c, err = LoadFile("testdata/external_labels.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
testutil.RequireEqual(t, labels.FromStrings("bar", "foo", "baz", "fooTestValuebar", "foo", "TestValue", "qux", "foo${TEST}", "xyz", "foo$bar"), c.GlobalConfig.ExternalLabels)
}
func TestAgentMode(t *testing.T) {
_, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, false, log.NewNopLogger())
_, err := LoadFile("testdata/agent_mode.with_alert_manager.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field alerting is not allowed in agent mode")
_, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, false, log.NewNopLogger())
_, err = LoadFile("testdata/agent_mode.with_alert_relabels.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field alerting is not allowed in agent mode")
_, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, false, log.NewNopLogger())
_, err = LoadFile("testdata/agent_mode.with_rule_files.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field rule_files is not allowed in agent mode")
_, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, false, log.NewNopLogger())
_, err = LoadFile("testdata/agent_mode.with_remote_reads.yml", true, promslog.NewNopLogger())
require.ErrorContains(t, err, "field remote_read is not allowed in agent mode")
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger())
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, promslog.NewNopLogger())
require.NoError(t, err)
require.Empty(t, c.RemoteWriteConfigs)
c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger())
c, err = LoadFile("testdata/agent_mode.good.yml", true, promslog.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.RemoteWriteConfigs, 1)
require.Equal(
@ -2178,7 +2188,7 @@ func TestAgentMode(t *testing.T) {
}
func TestEmptyGlobalBlock(t *testing.T) {
c, err := Load("global:\n", false, log.NewNopLogger())
c, err := Load("global:\n", promslog.NewNopLogger())
require.NoError(t, err)
exp := DefaultConfig
exp.Runtime = DefaultRuntimeConfig
@ -2333,7 +2343,7 @@ func TestGetScrapeConfigs(t *testing.T) {
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
c, err := LoadFile(tc.configFile, false, false, log.NewNopLogger())
c, err := LoadFile(tc.configFile, false, promslog.NewNopLogger())
require.NoError(t, err)
scfgs, err := c.GetScrapeConfigs()
@ -2351,7 +2361,7 @@ func kubernetesSDHostURL() config.URL {
}
func TestScrapeConfigDisableCompression(t *testing.T) {
want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger())
want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@ -2398,7 +2408,7 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger())
want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, promslog.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
@ -2411,3 +2421,54 @@ func TestScrapeConfigNameValidationSettings(t *testing.T) {
})
}
}
func TestScrapeProtocolHeader(t *testing.T) {
tests := []struct {
name string
proto ScrapeProtocol
expectedValue string
}{
{
name: "blank",
proto: ScrapeProtocol(""),
expectedValue: "",
},
{
name: "invalid",
proto: ScrapeProtocol("invalid"),
expectedValue: "",
},
{
name: "prometheus protobuf",
proto: PrometheusProto,
expectedValue: "application/vnd.google.protobuf",
},
{
name: "prometheus text 0.0.4",
proto: PrometheusText0_0_4,
expectedValue: "text/plain",
},
{
name: "prometheus text 1.0.0",
proto: PrometheusText1_0_0,
expectedValue: "text/plain",
},
{
name: "openmetrics 0.0.1",
proto: OpenMetricsText0_0_1,
expectedValue: "application/openmetrics-text",
},
{
name: "openmetrics 1.0.0",
proto: OpenMetricsText1_0_0,
expectedValue: "application/openmetrics-text",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
mediaType := tc.proto.HeaderMediaType()
require.Equal(t, tc.expectedValue, mediaType)
})
}
}

View file

@ -74,6 +74,8 @@ scrape_configs:
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
fallback_scrape_protocol: PrometheusText0.0.4
scrape_failure_log_file: fail_prom.log
file_sd_configs:
- files:

View file

@ -0,0 +1,7 @@
alerting:
alertmanagers:
- scheme: http
api_version: v1
file_sd_configs:
- files:
- nonexistent_file.yml

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: node
fallback_scrape_protocol: "prometheusproto"
static_configs:
- targets: ['localhost:8080']

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: node
fallback_scrape_protocol: ["OpenMetricsText1.0.0", "PrometheusText0.0.4"]
static_configs:
- targets: ['localhost:8080']

View file

@ -233,7 +233,7 @@ type Config interface {
}
type DiscovererOptions struct {
Logger log.Logger
Logger *slog.Logger
// A registerer for the Discoverer's metrics.
Registerer prometheus.Registerer

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
@ -29,11 +30,11 @@ import (
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@ -146,9 +147,9 @@ func (c *EC2SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type EC2Discovery struct {
*refresh.Discovery
logger log.Logger
logger *slog.Logger
cfg *EC2SDConfig
ec2 *ec2.EC2
ec2 ec2iface.EC2API
// azToAZID maps this account's availability zones to their underlying AZ
// ID, e.g. eu-west-2a -> euw2-az2. Refreshes are performed sequentially, so
@ -157,14 +158,14 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
func NewEC2Discovery(conf *EC2SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*EC2Discovery, error) {
m, ok := metrics.(*ec2Metrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
d := &EC2Discovery{
logger: logger,
@ -182,7 +183,7 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, metrics discovery.Dis
return d, nil
}
func (d *EC2Discovery) ec2Client(context.Context) (*ec2.EC2, error) {
func (d *EC2Discovery) ec2Client(context.Context) (ec2iface.EC2API, error) {
if d.ec2 != nil {
return d.ec2, nil
}
@ -254,8 +255,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
// Prometheus requires a reload if AWS adds a new AZ to the region.
if d.azToAZID == nil {
if err := d.refreshAZIDs(ctx); err != nil {
level.Debug(d.logger).Log(
"msg", "Unable to describe availability zones",
d.logger.Debug(
"Unable to describe availability zones",
"err", err)
}
}
@ -296,8 +297,8 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error
labels[ec2LabelAZ] = model.LabelValue(*inst.Placement.AvailabilityZone)
azID, ok := d.azToAZID[*inst.Placement.AvailabilityZone]
if !ok && d.azToAZID != nil {
level.Debug(d.logger).Log(
"msg", "Availability zone ID not found",
d.logger.Debug(
"Availability zone ID not found",
"az", *inst.Placement.AvailabilityZone)
}
labels[ec2LabelAZID] = model.LabelValue(azID)

434
discovery/aws/ec2_test.go Normal file
View file

@ -0,0 +1,434 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package aws
import (
"context"
"errors"
"testing"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
// Helper function to get pointers on literals.
// NOTE: this is common between a few tests. In the future it might worth to move this out into a separate package.
func strptr(str string) *string {
return &str
}
func boolptr(b bool) *bool {
return &b
}
func int64ptr(i int64) *int64 {
return &i
}
// Struct for test data.
type ec2DataStore struct {
region string
azToAZID map[string]string
ownerID string
instances []*ec2.Instance
}
// The tests itself.
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
func TestEC2DiscoveryRefreshAZIDs(t *testing.T) {
ctx := context.Background()
// iterate through the test cases
for _, tt := range []struct {
name string
shouldFail bool
ec2Data *ec2DataStore
}{
{
name: "Normal",
shouldFail: false,
ec2Data: &ec2DataStore{
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
},
},
{
name: "HandleError",
shouldFail: true,
ec2Data: &ec2DataStore{},
},
} {
t.Run(tt.name, func(t *testing.T) {
client := newMockEC2Client(tt.ec2Data)
d := &EC2Discovery{
ec2: client,
}
err := d.refreshAZIDs(ctx)
if tt.shouldFail {
require.Error(t, err)
} else {
require.NoError(t, err)
require.Equal(t, client.ec2Data.azToAZID, d.azToAZID)
}
})
}
}
func TestEC2DiscoveryRefresh(t *testing.T) {
ctx := context.Background()
// iterate through the test cases
for _, tt := range []struct {
name string
ec2Data *ec2DataStore
expected []*targetgroup.Group
}{
{
name: "NoPrivateIp",
ec2Data: &ec2DataStore{
region: "region-noprivateip",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
instances: []*ec2.Instance{
{
InstanceId: strptr("instance-id-noprivateip"),
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-noprivateip",
},
},
},
{
name: "NoVpc",
ec2Data: &ec2DataStore{
region: "region-novpc",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
ownerID: "owner-id-novpc",
instances: []*ec2.Instance{
{
// set every possible options and test them here
Architecture: strptr("architecture-novpc"),
ImageId: strptr("ami-novpc"),
InstanceId: strptr("instance-id-novpc"),
InstanceLifecycle: strptr("instance-lifecycle-novpc"),
InstanceType: strptr("instance-type-novpc"),
Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")},
Platform: strptr("platform-novpc"),
PrivateDnsName: strptr("private-dns-novpc"),
PrivateIpAddress: strptr("1.2.3.4"),
PublicDnsName: strptr("public-dns-novpc"),
PublicIpAddress: strptr("42.42.42.2"),
State: &ec2.InstanceState{Name: strptr("running")},
// test tags once and for all
Tags: []*ec2.Tag{
{Key: strptr("tag-1-key"), Value: strptr("tag-1-value")},
{Key: strptr("tag-2-key"), Value: strptr("tag-2-value")},
nil,
{Value: strptr("tag-4-value")},
{Key: strptr("tag-5-key")},
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-novpc",
Targets: []model.LabelSet{
{
"__address__": model.LabelValue("1.2.3.4:4242"),
"__meta_ec2_ami": model.LabelValue("ami-novpc"),
"__meta_ec2_architecture": model.LabelValue("architecture-novpc"),
"__meta_ec2_availability_zone": model.LabelValue("azname-b"),
"__meta_ec2_availability_zone_id": model.LabelValue("azid-2"),
"__meta_ec2_instance_id": model.LabelValue("instance-id-novpc"),
"__meta_ec2_instance_lifecycle": model.LabelValue("instance-lifecycle-novpc"),
"__meta_ec2_instance_type": model.LabelValue("instance-type-novpc"),
"__meta_ec2_instance_state": model.LabelValue("running"),
"__meta_ec2_owner_id": model.LabelValue("owner-id-novpc"),
"__meta_ec2_platform": model.LabelValue("platform-novpc"),
"__meta_ec2_private_dns_name": model.LabelValue("private-dns-novpc"),
"__meta_ec2_private_ip": model.LabelValue("1.2.3.4"),
"__meta_ec2_public_dns_name": model.LabelValue("public-dns-novpc"),
"__meta_ec2_public_ip": model.LabelValue("42.42.42.2"),
"__meta_ec2_region": model.LabelValue("region-novpc"),
"__meta_ec2_tag_tag_1_key": model.LabelValue("tag-1-value"),
"__meta_ec2_tag_tag_2_key": model.LabelValue("tag-2-value"),
},
},
},
},
},
{
name: "Ipv4",
ec2Data: &ec2DataStore{
region: "region-ipv4",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
instances: []*ec2.Instance{
{
// just the minimum needed for the refresh work
ImageId: strptr("ami-ipv4"),
InstanceId: strptr("instance-id-ipv4"),
InstanceType: strptr("instance-type-ipv4"),
Placement: &ec2.Placement{AvailabilityZone: strptr("azname-c")},
PrivateIpAddress: strptr("5.6.7.8"),
State: &ec2.InstanceState{Name: strptr("running")},
SubnetId: strptr("azid-3"),
VpcId: strptr("vpc-ipv4"),
// network intefaces
NetworkInterfaces: []*ec2.InstanceNetworkInterface{
// interface without subnet -> should be ignored
{
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:1::1"),
IsPrimaryIpv6: boolptr(true),
},
},
},
// interface with subnet, no IPv6
{
Ipv6Addresses: []*ec2.InstanceIpv6Address{},
SubnetId: strptr("azid-3"),
},
// interface with another subnet, no IPv6
{
Ipv6Addresses: []*ec2.InstanceIpv6Address{},
SubnetId: strptr("azid-1"),
},
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-ipv4",
Targets: []model.LabelSet{
{
"__address__": model.LabelValue("5.6.7.8:4242"),
"__meta_ec2_ami": model.LabelValue("ami-ipv4"),
"__meta_ec2_availability_zone": model.LabelValue("azname-c"),
"__meta_ec2_availability_zone_id": model.LabelValue("azid-3"),
"__meta_ec2_instance_id": model.LabelValue("instance-id-ipv4"),
"__meta_ec2_instance_state": model.LabelValue("running"),
"__meta_ec2_instance_type": model.LabelValue("instance-type-ipv4"),
"__meta_ec2_owner_id": model.LabelValue(""),
"__meta_ec2_primary_subnet_id": model.LabelValue("azid-3"),
"__meta_ec2_private_ip": model.LabelValue("5.6.7.8"),
"__meta_ec2_region": model.LabelValue("region-ipv4"),
"__meta_ec2_subnet_id": model.LabelValue(",azid-3,azid-1,"),
"__meta_ec2_vpc_id": model.LabelValue("vpc-ipv4"),
},
},
},
},
},
{
name: "Ipv6",
ec2Data: &ec2DataStore{
region: "region-ipv6",
azToAZID: map[string]string{
"azname-a": "azid-1",
"azname-b": "azid-2",
"azname-c": "azid-3",
},
instances: []*ec2.Instance{
{
// just the minimum needed for the refresh work
ImageId: strptr("ami-ipv6"),
InstanceId: strptr("instance-id-ipv6"),
InstanceType: strptr("instance-type-ipv6"),
Placement: &ec2.Placement{AvailabilityZone: strptr("azname-b")},
PrivateIpAddress: strptr("9.10.11.12"),
State: &ec2.InstanceState{Name: strptr("running")},
SubnetId: strptr("azid-2"),
VpcId: strptr("vpc-ipv6"),
// network intefaces
NetworkInterfaces: []*ec2.InstanceNetworkInterface{
// interface without primary IPv6, index 2
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(3),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:2::1:1"),
IsPrimaryIpv6: boolptr(false),
},
},
SubnetId: strptr("azid-2"),
},
// interface with primary IPv6, index 1
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(1),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:2::2:1"),
IsPrimaryIpv6: boolptr(false),
},
{
Ipv6Address: strptr("2001:db8:2::2:2"),
IsPrimaryIpv6: boolptr(true),
},
},
SubnetId: strptr("azid-2"),
},
// interface with primary IPv6, index 3
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(3),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{
{
Ipv6Address: strptr("2001:db8:2::3:1"),
IsPrimaryIpv6: boolptr(true),
},
},
SubnetId: strptr("azid-1"),
},
// interface without primary IPv6, index 0
{
Attachment: &ec2.InstanceNetworkInterfaceAttachment{
DeviceIndex: int64ptr(0),
},
Ipv6Addresses: []*ec2.InstanceIpv6Address{},
SubnetId: strptr("azid-3"),
},
},
},
},
},
expected: []*targetgroup.Group{
{
Source: "region-ipv6",
Targets: []model.LabelSet{
{
"__address__": model.LabelValue("9.10.11.12:4242"),
"__meta_ec2_ami": model.LabelValue("ami-ipv6"),
"__meta_ec2_availability_zone": model.LabelValue("azname-b"),
"__meta_ec2_availability_zone_id": model.LabelValue("azid-2"),
"__meta_ec2_instance_id": model.LabelValue("instance-id-ipv6"),
"__meta_ec2_instance_state": model.LabelValue("running"),
"__meta_ec2_instance_type": model.LabelValue("instance-type-ipv6"),
"__meta_ec2_ipv6_addresses": model.LabelValue(",2001:db8:2::1:1,2001:db8:2::2:1,2001:db8:2::2:2,2001:db8:2::3:1,"),
"__meta_ec2_owner_id": model.LabelValue(""),
"__meta_ec2_primary_ipv6_addresses": model.LabelValue(",,2001:db8:2::2:2,,2001:db8:2::3:1,"),
"__meta_ec2_primary_subnet_id": model.LabelValue("azid-2"),
"__meta_ec2_private_ip": model.LabelValue("9.10.11.12"),
"__meta_ec2_region": model.LabelValue("region-ipv6"),
"__meta_ec2_subnet_id": model.LabelValue(",azid-2,azid-1,azid-3,"),
"__meta_ec2_vpc_id": model.LabelValue("vpc-ipv6"),
},
},
},
},
},
} {
t.Run(tt.name, func(t *testing.T) {
client := newMockEC2Client(tt.ec2Data)
d := &EC2Discovery{
ec2: client,
cfg: &EC2SDConfig{
Port: 4242,
Region: client.ec2Data.region,
},
}
g, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, tt.expected, g)
})
}
}
// EC2 client mock.
type mockEC2Client struct {
ec2iface.EC2API
ec2Data ec2DataStore
}
func newMockEC2Client(ec2Data *ec2DataStore) *mockEC2Client {
client := mockEC2Client{
ec2Data: *ec2Data,
}
return &client
}
func (m *mockEC2Client) DescribeAvailabilityZonesWithContext(ctx aws.Context, input *ec2.DescribeAvailabilityZonesInput, opts ...request.Option) (*ec2.DescribeAvailabilityZonesOutput, error) {
if len(m.ec2Data.azToAZID) == 0 {
return nil, errors.New("No AZs found")
}
azs := make([]*ec2.AvailabilityZone, len(m.ec2Data.azToAZID))
i := 0
for k, v := range m.ec2Data.azToAZID {
azs[i] = &ec2.AvailabilityZone{
ZoneName: strptr(k),
ZoneId: strptr(v),
}
i++
}
return &ec2.DescribeAvailabilityZonesOutput{
AvailabilityZones: azs,
}, nil
}
func (m *mockEC2Client) DescribeInstancesPagesWithContext(ctx aws.Context, input *ec2.DescribeInstancesInput, fn func(*ec2.DescribeInstancesOutput, bool) bool, opts ...request.Option) error {
r := ec2.Reservation{}
r.SetInstances(m.ec2Data.instances)
r.SetOwnerId(m.ec2Data.ownerID)
o := ec2.DescribeInstancesOutput{}
o.SetReservations([]*ec2.Reservation{&r})
_ = fn(&o, true)
return nil
}

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
@ -29,10 +30,10 @@ import (
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lightsail"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@ -130,14 +131,14 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
func NewLightsailDiscovery(conf *LightsailSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*LightsailDiscovery, error) {
m, ok := metrics.(*lightsailMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
d := &LightsailDiscovery{

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"math/rand"
"net"
"net/http"
@ -35,10 +36,9 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -175,7 +175,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type Discovery struct {
*refresh.Discovery
logger log.Logger
logger *slog.Logger
cfg *SDConfig
port int
cache *cache.Cache[string, *armnetwork.Interface]
@ -183,14 +183,14 @@ type Discovery struct {
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
func NewDiscovery(cfg *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(cfg *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*azureMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
d := &Discovery{
@ -228,26 +228,26 @@ type azureClient struct {
vm *armcompute.VirtualMachinesClient
vmss *armcompute.VirtualMachineScaleSetsClient
vmssvm *armcompute.VirtualMachineScaleSetVMsClient
logger log.Logger
logger *slog.Logger
}
var _ client = &azureClient{}
// createAzureClient is a helper function for creating an Azure compute client to ARM.
func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
cloudConfiguration, err := CloudConfigurationFromName(cfg.Environment)
// createAzureClient is a helper method for creating an Azure compute client to ARM.
func (d *Discovery) createAzureClient() (client, error) {
cloudConfiguration, err := CloudConfigurationFromName(d.cfg.Environment)
if err != nil {
return &azureClient{}, err
}
var c azureClient
c.logger = logger
c.logger = d.logger
telemetry := policy.TelemetryOptions{
ApplicationID: userAgent,
}
credential, err := newCredential(cfg, policy.ClientOptions{
credential, err := newCredential(*d.cfg, policy.ClientOptions{
Cloud: cloudConfiguration,
Telemetry: telemetry,
})
@ -255,7 +255,7 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
return &azureClient{}, err
}
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, "azure_sd")
client, err := config_util.NewClientFromConfig(d.cfg.HTTPClientConfig, "azure_sd")
if err != nil {
return &azureClient{}, err
}
@ -267,22 +267,22 @@ func createAzureClient(cfg SDConfig, logger log.Logger) (client, error) {
},
}
c.vm, err = armcompute.NewVirtualMachinesClient(cfg.SubscriptionID, credential, options)
c.vm, err = armcompute.NewVirtualMachinesClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
c.nic, err = armnetwork.NewInterfacesClient(cfg.SubscriptionID, credential, options)
c.nic, err = armnetwork.NewInterfacesClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(cfg.SubscriptionID, credential, options)
c.vmss, err = armcompute.NewVirtualMachineScaleSetsClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(cfg.SubscriptionID, credential, options)
c.vmssvm, err = armcompute.NewVirtualMachineScaleSetVMsClient(d.cfg.SubscriptionID, credential, options)
if err != nil {
return &azureClient{}, err
}
@ -337,35 +337,27 @@ type virtualMachine struct {
}
// Create a new azureResource object from an ID string.
func newAzureResourceFromID(id string, logger log.Logger) (*arm.ResourceID, error) {
func newAzureResourceFromID(id string, logger *slog.Logger) (*arm.ResourceID, error) {
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
resourceID, err := arm.ParseResourceID(id)
if err != nil {
err := fmt.Errorf("invalid ID '%s': %w", id, err)
level.Error(logger).Log("err", err)
logger.Error("Failed to parse resource ID", "err", err)
return &arm.ResourceID{}, err
}
return resourceID, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
defer level.Debug(d.logger).Log("msg", "Azure discovery completed")
client, err := createAzureClient(*d.cfg, d.logger)
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
func (d *Discovery) refreshAzureClient(ctx context.Context, client client) ([]*targetgroup.Group, error) {
machines, err := client.getVMs(ctx, d.cfg.ResourceGroup)
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machines: %w", err)
}
level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines))
d.logger.Debug("Found virtual machines during Azure discovery.", "count", len(machines))
// Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup)
@ -418,6 +410,18 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
return []*targetgroup.Group{&tg}, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
defer d.logger.Debug("Azure discovery completed")
client, err := d.createAzureClient()
if err != nil {
d.metrics.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
return d.refreshAzureClient(ctx, client)
}
func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualMachine) (model.LabelSet, error) {
r, err := newAzureResourceFromID(vm.ID, d.logger)
if err != nil {
@ -459,7 +463,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM
}
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
d.logger.Warn("Network interface does not exist", "name", nicID, "err", err)
} else {
return nil, err
}
@ -480,7 +484,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM
// yet support this. On deallocated machines, this value happens to be nil so it
// is a cheap and easy way to determine if a machine is allocated or not.
if networkInterface.Properties.Primary == nil {
level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name)
d.logger.Debug("Skipping deallocated virtual machine", "machine", vm.Name)
return nil, nil
}
@ -724,7 +728,7 @@ func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
rs := time.Duration(random) * time.Second
exptime := time.Duration(d.cfg.RefreshInterval*10) + rs
d.cache.Set(nicID, netInt, cache.WithExpiration(exptime))
level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds())
d.logger.Debug("Adding nic", "nic", nicID, "time", exptime.Seconds())
}
// getFromCache will get the network Interface for the specified nicID

View file

@ -15,19 +15,34 @@ package azure
import (
"context"
"fmt"
"log/slog"
"net/http"
"slices"
"strings"
"testing"
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
azfake "github.com/Azure/azure-sdk-for-go/sdk/azcore/fake"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
fake "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5/fake"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
fakenetwork "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4/fake"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
const defaultMockNetworkID string = "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}"
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m,
goleak.IgnoreTopFunction("github.com/Code-Hex/go-generics-cache.(*janitor).run.func1"),
@ -96,13 +111,12 @@ func TestVMToLabelSet(t *testing.T) {
vmType := "type"
location := "westeurope"
computerName := "computer_name"
networkID := "/subscriptions/00000000-0000-0000-0000-000000000000/network1"
ipAddress := "10.20.30.40"
primary := true
networkProfile := armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{
ID: &networkID,
ID: to.Ptr(defaultMockNetworkID),
Properties: &armcompute.NetworkInterfaceReferenceProperties{Primary: &primary},
},
},
@ -139,7 +153,7 @@ func TestVMToLabelSet(t *testing.T) {
Location: location,
OsType: "Linux",
Tags: map[string]*string{},
NetworkInterfaces: []string{networkID},
NetworkInterfaces: []string{defaultMockNetworkID},
Size: size,
}
@ -150,11 +164,12 @@ func TestVMToLabelSet(t *testing.T) {
cfg := DefaultSDConfig
d := &Discovery{
cfg: &cfg,
logger: log.NewNopLogger(),
logger: promslog.NewNopLogger(),
cache: cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5))),
}
network := armnetwork.Interface{
Name: &networkID,
Name: to.Ptr(defaultMockNetworkID),
ID: to.Ptr(defaultMockNetworkID),
Properties: &armnetwork.InterfacePropertiesFormat{
Primary: &primary,
IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
@ -164,9 +179,9 @@ func TestVMToLabelSet(t *testing.T) {
},
},
}
client := &mockAzureClient{
networkInterface: &network,
}
client := createMockAzureClient(t, nil, nil, nil, network, nil)
labelSet, err := d.vmToLabelSet(context.Background(), client, actualVM)
require.NoError(t, err)
require.Len(t, labelSet, 11)
@ -475,34 +490,372 @@ func TestNewAzureResourceFromID(t *testing.T) {
}
}
func TestAzureRefresh(t *testing.T) {
tests := []struct {
scenario string
vmResp []armcompute.VirtualMachinesClientListAllResponse
vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse
vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse
interfacesResp armnetwork.Interface
expectedTG []*targetgroup.Group
}{
{
scenario: "VMs, VMSS and VMSSVMs in Multiple Responses",
vmResp: []armcompute.VirtualMachinesClientListAllResponse{
{
VirtualMachineListResult: armcompute.VirtualMachineListResult{
Value: []*armcompute.VirtualMachine{
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1"), to.Ptr("vm1")),
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2"), to.Ptr("vm2")),
},
},
},
{
VirtualMachineListResult: armcompute.VirtualMachineListResult{
Value: []*armcompute.VirtualMachine{
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3"), to.Ptr("vm3")),
defaultVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4"), to.Ptr("vm4")),
},
},
},
},
vmssResp: []armcompute.VirtualMachineScaleSetsClientListAllResponse{
{
VirtualMachineScaleSetListWithLinkResult: armcompute.VirtualMachineScaleSetListWithLinkResult{
Value: []*armcompute.VirtualMachineScaleSet{
{
ID: to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1"),
Name: to.Ptr("vmScaleSet1"),
Location: to.Ptr("australiaeast"),
Type: to.Ptr("Microsoft.Compute/virtualMachineScaleSets"),
},
},
},
},
},
vmssvmResp: []armcompute.VirtualMachineScaleSetVMsClientListResponse{
{
VirtualMachineScaleSetVMListResult: armcompute.VirtualMachineScaleSetVMListResult{
Value: []*armcompute.VirtualMachineScaleSetVM{
defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1"), to.Ptr("vmScaleSet1_vm1")),
defaultVMSSVMWithIDAndName(to.Ptr("/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2"), to.Ptr("vmScaleSet1_vm2")),
},
},
},
},
interfacesResp: armnetwork.Interface{
ID: to.Ptr(defaultMockNetworkID),
Properties: &armnetwork.InterfacePropertiesFormat{
Primary: to.Ptr(true),
IPConfigurations: []*armnetwork.InterfaceIPConfiguration{
{Properties: &armnetwork.InterfaceIPConfigurationPropertiesFormat{
PrivateIPAddress: to.Ptr("10.0.0.1"),
}},
},
},
},
expectedTG: []*targetgroup.Group{
{
Targets: []model.LabelSet{
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm1",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm1",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm2",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm2",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm3",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm3",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/vm4",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vm4",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm1",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vmScaleSet1_vm1",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_scale_set": "vmScaleSet1",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
{
"__address__": "10.0.0.1:80",
"__meta_azure_machine_computer_name": "computer_name",
"__meta_azure_machine_id": "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/vmScaleSet1/virtualMachines/vmScaleSet1_vm2",
"__meta_azure_machine_location": "australiaeast",
"__meta_azure_machine_name": "vmScaleSet1_vm2",
"__meta_azure_machine_os_type": "Linux",
"__meta_azure_machine_private_ip": "10.0.0.1",
"__meta_azure_machine_resource_group": "{resourceGroup}",
"__meta_azure_machine_scale_set": "vmScaleSet1",
"__meta_azure_machine_size": "size",
"__meta_azure_machine_tag_prometheus": "",
"__meta_azure_subscription_id": "",
"__meta_azure_tenant_id": "",
},
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.scenario, func(t *testing.T) {
t.Parallel()
azureSDConfig := &DefaultSDConfig
azureClient := createMockAzureClient(t, tc.vmResp, tc.vmssResp, tc.vmssvmResp, tc.interfacesResp, nil)
reg := prometheus.NewRegistry()
refreshMetrics := discovery.NewRefreshMetrics(reg)
metrics := azureSDConfig.NewDiscovererMetrics(reg, refreshMetrics)
sd, err := NewDiscovery(azureSDConfig, nil, metrics)
require.NoError(t, err)
tg, err := sd.refreshAzureClient(context.Background(), azureClient)
require.NoError(t, err)
sortTargetsByID(tg[0].Targets)
require.Equal(t, tc.expectedTG, tg)
})
}
}
type mockAzureClient struct {
networkInterface *armnetwork.Interface
azureClient
}
var _ client = &mockAzureClient{}
func createMockAzureClient(t *testing.T, vmResp []armcompute.VirtualMachinesClientListAllResponse, vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse, vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse, interfaceResp armnetwork.Interface, logger *slog.Logger) client {
t.Helper()
mockVMServer := defaultMockVMServer(vmResp)
mockVMSSServer := defaultMockVMSSServer(vmssResp)
mockVMScaleSetVMServer := defaultMockVMSSVMServer(vmssvmResp)
mockInterfaceServer := defaultMockInterfaceServer(interfaceResp)
func (*mockAzureClient) getVMs(ctx context.Context, resourceGroup string) ([]virtualMachine, error) {
return nil, nil
}
vmClient, err := armcompute.NewVirtualMachinesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fake.NewVirtualMachinesServerTransport(&mockVMServer),
},
})
require.NoError(t, err)
func (*mockAzureClient) getScaleSets(ctx context.Context, resourceGroup string) ([]armcompute.VirtualMachineScaleSet, error) {
return nil, nil
}
vmssClient, err := armcompute.NewVirtualMachineScaleSetsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fake.NewVirtualMachineScaleSetsServerTransport(&mockVMSSServer),
},
})
require.NoError(t, err)
func (*mockAzureClient) getScaleSetVMs(ctx context.Context, scaleSet armcompute.VirtualMachineScaleSet) ([]virtualMachine, error) {
return nil, nil
}
vmssvmClient, err := armcompute.NewVirtualMachineScaleSetVMsClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fake.NewVirtualMachineScaleSetVMsServerTransport(&mockVMScaleSetVMServer),
},
})
require.NoError(t, err)
func (m *mockAzureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
if networkInterfaceID == "" {
return nil, fmt.Errorf("parameter networkInterfaceID cannot be empty")
interfacesClient, err := armnetwork.NewInterfacesClient("fake-subscription-id", &azfake.TokenCredential{}, &arm.ClientOptions{
ClientOptions: azcore.ClientOptions{
Transport: fakenetwork.NewInterfacesServerTransport(&mockInterfaceServer),
},
})
require.NoError(t, err)
return &mockAzureClient{
azureClient: azureClient{
vm: vmClient,
vmss: vmssClient,
vmssvm: vmssvmClient,
nic: interfacesClient,
logger: logger,
},
}
return m.networkInterface, nil
}
func (m *mockAzureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) {
if scaleSetName == "" {
return nil, fmt.Errorf("parameter virtualMachineScaleSetName cannot be empty")
func defaultMockInterfaceServer(interfaceResp armnetwork.Interface) fakenetwork.InterfacesServer {
return fakenetwork.InterfacesServer{
Get: func(ctx context.Context, resourceGroupName, networkInterfaceName string, options *armnetwork.InterfacesClientGetOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetResponse], errResp azfake.ErrorResponder) {
resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetResponse{Interface: interfaceResp}, nil)
return
},
GetVirtualMachineScaleSetNetworkInterface: func(ctx context.Context, resourceGroupName, virtualMachineScaleSetName, virtualmachineIndex, networkInterfaceName string, options *armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions) (resp azfake.Responder[armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse], errResp azfake.ErrorResponder) {
resp.SetResponse(http.StatusOK, armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceResponse{Interface: interfaceResp}, nil)
return
},
}
return m.networkInterface, nil
}
func defaultMockVMServer(vmResp []armcompute.VirtualMachinesClientListAllResponse) fake.VirtualMachinesServer {
return fake.VirtualMachinesServer{
NewListAllPager: func(options *armcompute.VirtualMachinesClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachinesClientListAllResponse]) {
for _, page := range vmResp {
resp.AddPage(http.StatusOK, page, nil)
}
return
},
}
}
func defaultMockVMSSServer(vmssResp []armcompute.VirtualMachineScaleSetsClientListAllResponse) fake.VirtualMachineScaleSetsServer {
return fake.VirtualMachineScaleSetsServer{
NewListAllPager: func(options *armcompute.VirtualMachineScaleSetsClientListAllOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetsClientListAllResponse]) {
for _, page := range vmssResp {
resp.AddPage(http.StatusOK, page, nil)
}
return
},
}
}
func defaultMockVMSSVMServer(vmssvmResp []armcompute.VirtualMachineScaleSetVMsClientListResponse) fake.VirtualMachineScaleSetVMsServer {
return fake.VirtualMachineScaleSetVMsServer{
NewListPager: func(resourceGroupName, virtualMachineScaleSetName string, options *armcompute.VirtualMachineScaleSetVMsClientListOptions) (resp azfake.PagerResponder[armcompute.VirtualMachineScaleSetVMsClientListResponse]) {
for _, page := range vmssvmResp {
resp.AddPage(http.StatusOK, page, nil)
}
return
},
}
}
func defaultVMWithIDAndName(id, name *string) *armcompute.VirtualMachine {
vmSize := armcompute.VirtualMachineSizeTypes("size")
osType := armcompute.OperatingSystemTypesLinux
defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachine/testVM"
defaultName := "testVM"
if id == nil {
id = &defaultID
}
if name == nil {
name = &defaultName
}
return &armcompute.VirtualMachine{
ID: id,
Name: name,
Type: to.Ptr("Microsoft.Compute/virtualMachines"),
Location: to.Ptr("australiaeast"),
Properties: &armcompute.VirtualMachineProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: to.Ptr("computer_name"),
},
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{
ID: to.Ptr(defaultMockNetworkID),
},
},
},
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
},
Tags: map[string]*string{
"prometheus": new(string),
},
}
}
func defaultVMSSVMWithIDAndName(id, name *string) *armcompute.VirtualMachineScaleSetVM {
vmSize := armcompute.VirtualMachineSizeTypes("size")
osType := armcompute.OperatingSystemTypesLinux
defaultID := "/subscriptions/00000000-0000-0000-0000-00000000000/resourceGroups/{resourceGroup}/providers/Microsoft.Compute/virtualMachineScaleSets/testVMScaleSet/virtualMachines/testVM"
defaultName := "testVM"
if id == nil {
id = &defaultID
}
if name == nil {
name = &defaultName
}
return &armcompute.VirtualMachineScaleSetVM{
ID: id,
Name: name,
Type: to.Ptr("Microsoft.Compute/virtualMachines"),
InstanceID: to.Ptr("123"),
Location: to.Ptr("australiaeast"),
Properties: &armcompute.VirtualMachineScaleSetVMProperties{
OSProfile: &armcompute.OSProfile{
ComputerName: to.Ptr("computer_name"),
},
StorageProfile: &armcompute.StorageProfile{
OSDisk: &armcompute.OSDisk{
OSType: &osType,
},
},
NetworkProfile: &armcompute.NetworkProfile{
NetworkInterfaces: []*armcompute.NetworkInterfaceReference{
{ID: to.Ptr(defaultMockNetworkID)},
},
},
HardwareProfile: &armcompute.HardwareProfile{
VMSize: &vmSize,
},
},
Tags: map[string]*string{
"prometheus": new(string),
},
}
}
func sortTargetsByID(targets []model.LabelSet) {
slices.SortFunc(targets, func(a, b model.LabelSet) int {
return strings.Compare(string(a["__meta_azure_machine_id"]), string(b["__meta_azure_machine_id"]))
})
}

View file

@ -17,17 +17,17 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
consul "github.com/hashicorp/consul/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -113,8 +113,11 @@ type SDConfig struct {
Services []string `yaml:"services,omitempty"`
// A list of tags used to filter instances inside a service. Services must contain all tags in the list.
ServiceTags []string `yaml:"tags,omitempty"`
// Desired node metadata.
// Desired node metadata. As of Consul 1.14, consider `filter` instead.
NodeMeta map[string]string `yaml:"node_meta,omitempty"`
// Consul filter string
// See https://www.consul.io/api-docs/catalog#filtering-1, for syntax
Filter string `yaml:"filter,omitempty"`
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
}
@ -174,22 +177,23 @@ type Discovery struct {
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
watchedFilter string
allowStale bool
refreshInterval time.Duration
finalizer func()
logger log.Logger
logger *slog.Logger
metrics *consulMetrics
}
// NewDiscovery returns a new Discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*consulMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
wrapper, err := config.NewClientFromConfig(conf.HTTPClientConfig, "consul_sd", config.WithIdleConnTimeout(2*watchTimeout))
@ -218,6 +222,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
watchedServices: conf.Services,
watchedTags: conf.ServiceTags,
watchedNodeMeta: conf.NodeMeta,
watchedFilter: conf.Filter,
allowStale: conf.AllowStale,
refreshInterval: time.Duration(conf.RefreshInterval),
clientDatacenter: conf.Datacenter,
@ -282,7 +287,7 @@ func (d *Discovery) getDatacenter() error {
info, err := d.client.Agent().Self()
if err != nil {
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
d.logger.Error("Error retrieving datacenter name", "err", err)
d.metrics.rpcFailuresCount.Inc()
return err
}
@ -290,12 +295,12 @@ func (d *Discovery) getDatacenter() error {
dc, ok := info["Config"]["Datacenter"].(string)
if !ok {
err := fmt.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"])
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
d.logger.Error("Error retrieving datacenter name", "err", err)
return err
}
d.clientDatacenter = dc
d.logger = log.With(d.logger, "datacenter", dc)
d.logger = d.logger.With("datacenter", dc)
return nil
}
@ -361,13 +366,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// entire list of services.
func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) {
catalog := d.client.Catalog()
level.Debug(d.logger).Log("msg", "Watching services", "tags", strings.Join(d.watchedTags, ","))
d.logger.Debug("Watching services", "tags", strings.Join(d.watchedTags, ","), "filter", d.watchedFilter)
opts := &consul.QueryOptions{
WaitIndex: *lastIndex,
WaitTime: watchTimeout,
AllowStale: d.allowStale,
NodeMeta: d.watchedNodeMeta,
Filter: d.watchedFilter,
}
t0 := time.Now()
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
@ -382,7 +388,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
}
if err != nil {
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
d.logger.Error("Error refreshing service list", "err", err)
d.metrics.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
@ -445,7 +451,7 @@ type consulService struct {
discovery *Discovery
client *consul.Client
tagSeparator string
logger log.Logger
logger *slog.Logger
rpcFailuresCount prometheus.Counter
serviceRPCDuration prometheus.Observer
}
@ -490,7 +496,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
// Get updates for a service.
func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, health *consul.Health, lastIndex *uint64) {
level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ","))
srv.logger.Debug("Watching service", "service", srv.name, "tags", strings.Join(srv.tags, ","))
opts := &consul.QueryOptions{
WaitIndex: *lastIndex,
@ -513,7 +519,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
}
if err != nil {
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
srv.logger.Error("Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
srv.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return

View file

@ -21,10 +21,10 @@ import (
"testing"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
"gopkg.in/yaml.v2"
@ -252,6 +252,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
case "/v1/catalog/services?index=1&wait=120000ms":
time.Sleep(5 * time.Second)
response = ServicesTestAnswer
case "/v1/catalog/services?filter=NodeMeta.rack_name+%3D%3D+%222304%22&index=1&wait=120000ms":
response = ServicesTestAnswer
default:
t.Errorf("Unhandled consul call: %s", r.URL)
}
@ -270,7 +272,7 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
}
func newDiscovery(t *testing.T, config *SDConfig) *Discovery {
logger := log.NewNopLogger()
logger := promslog.NewNopLogger()
metrics := NewTestMetrics(t, config, prometheus.NewRegistry())
@ -369,6 +371,27 @@ func TestAllOptions(t *testing.T) {
<-ch
}
// Watch the test service with a specific tag and node-meta via Filter parameter.
func TestFilterOption(t *testing.T) {
stub, config := newServer(t)
defer stub.Close()
config.Services = []string{"test"}
config.Filter = `NodeMeta.rack_name == "2304"`
config.Token = "fake-token"
d := newDiscovery(t, config)
ctx, cancel := context.WithCancel(context.Background())
ch := make(chan []*targetgroup.Group)
go func() {
d.Run(ctx, ch)
close(ch)
}()
checkOneTarget(t, <-ch)
cancel()
}
func TestGetDatacenterShouldReturnError(t *testing.T) {
for _, tc := range []struct {
handler func(http.ResponseWriter, *http.Request)
@ -407,7 +430,7 @@ func TestGetDatacenterShouldReturnError(t *testing.T) {
err = d.getDatacenter()
// An error should be returned.
require.Equal(t, tc.errMessage, err.Error())
require.EqualError(t, err, tc.errMessage)
// Should still be empty.
require.Equal(t, "", d.clientDatacenter)
}

View file

@ -16,6 +16,7 @@ package digitalocean
import (
"context"
"fmt"
"log/slog"
"net"
"net/http"
"strconv"
@ -23,7 +24,6 @@ import (
"time"
"github.com/digitalocean/godo"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -111,7 +111,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*digitaloceanMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -19,9 +19,9 @@ import (
"net/url"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@ -57,7 +57,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)

View file

@ -15,9 +15,9 @@ package discovery
import (
"context"
"log/slog"
"reflect"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@ -47,7 +47,7 @@ type DiscovererMetrics interface {
// DiscovererOptions provides options for a Discoverer.
type DiscovererOptions struct {
Logger log.Logger
Logger *slog.Logger
Metrics DiscovererMetrics
@ -109,7 +109,7 @@ func (c *Configs) SetDirectory(dir string) {
// UnmarshalYAML implements yaml.Unmarshaler.
func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error {
cfgTyp := getConfigType(configsType)
cfgTyp := reflect.StructOf(configFields)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()
@ -124,7 +124,7 @@ func (c *Configs) UnmarshalYAML(unmarshal func(interface{}) error) error {
// MarshalYAML implements yaml.Marshaler.
func (c Configs) MarshalYAML() (interface{}, error) {
cfgTyp := getConfigType(configsType)
cfgTyp := reflect.StructOf(configFields)
cfgPtr := reflect.New(cfgTyp)
cfgVal := cfgPtr.Elem()

View file

@ -1,4 +1,4 @@
// Copyright 2019 The Prometheus Authors
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@ -11,25 +11,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package testutil
package discovery
import (
"testing"
"github.com/go-kit/log"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
)
type logger struct {
t *testing.T
}
func TestConfigsCustomUnMarshalMarshal(t *testing.T) {
input := `static_configs:
- targets:
- foo:1234
- bar:4321
`
cfg := &Configs{}
err := yaml.UnmarshalStrict([]byte(input), cfg)
require.NoError(t, err)
// NewLogger returns a gokit compatible Logger which calls t.Log.
func NewLogger(t *testing.T) log.Logger {
return logger{t: t}
}
// Log implements log.Logger.
func (t logger) Log(keyvals ...interface{}) error {
t.t.Log(keyvals...)
return nil
output, err := yaml.Marshal(cfg)
require.NoError(t, err)
require.Equal(t, input, string(output))
}

View file

@ -17,17 +17,17 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
"sync"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@ -111,21 +111,21 @@ type Discovery struct {
names []string
port int
qtype uint16
logger log.Logger
logger *slog.Logger
metrics *dnsMetrics
lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
lookupFn func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error)
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*dnsMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
qtype := dns.TypeSRV
@ -174,7 +174,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
for _, name := range d.names {
go func(n string) {
if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) {
level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err)
d.logger.Error("Error refreshing DNS targets", "err", err)
}
wg.Done()
}(name)
@ -238,7 +238,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
// CNAME responses can occur with "Type: A" dns_sd_config requests.
continue
default:
level.Warn(d.logger).Log("msg", "Invalid record", "record", record)
d.logger.Warn("Invalid record", "record", record)
continue
}
tg.Targets = append(tg.Targets, model.LabelSet{
@ -288,7 +288,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
// error will be generic-looking, because trying to return all the errors
// returned by the combination of all name permutations and servers is a
// nightmare.
func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
func lookupWithSearchPath(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
conf, err := dns.ClientConfigFromFile(resolvConf)
if err != nil {
return nil, fmt.Errorf("could not load resolv.conf: %w", err)
@ -337,14 +337,14 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms
// A non-viable answer is "anything else", which encompasses both various
// system-level problems (like network timeouts) and also
// valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc).
func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) {
func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger *slog.Logger) (*dns.Msg, error) {
client := &dns.Client{}
for _, server := range conf.Servers {
servAddr := net.JoinHostPort(server, conf.Port)
msg, err := askServerForName(name, qtype, client, servAddr, true)
if err != nil {
level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err)
logger.Warn("DNS resolution failed", "server", server, "name", name, "err", err)
continue
}

View file

@ -16,11 +16,11 @@ package dns
import (
"context"
"fmt"
"log/slog"
"net"
"testing"
"time"
"github.com/go-kit/log"
"github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
@ -40,7 +40,7 @@ func TestDNS(t *testing.T) {
testCases := []struct {
name string
config SDConfig
lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
lookup func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error)
expected []*targetgroup.Group
}{
@ -52,7 +52,7 @@ func TestDNS(t *testing.T) {
Port: 80,
Type: "A",
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return nil, fmt.Errorf("some error")
},
expected: []*targetgroup.Group{},
@ -65,7 +65,7 @@ func TestDNS(t *testing.T) {
Port: 80,
Type: "A",
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.A{A: net.IPv4(192, 0, 2, 2)},
@ -97,7 +97,7 @@ func TestDNS(t *testing.T) {
Port: 80,
Type: "AAAA",
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.AAAA{AAAA: net.IPv6loopback},
@ -128,7 +128,7 @@ func TestDNS(t *testing.T) {
Type: "SRV",
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.SRV{Port: 3306, Target: "db1.example.com."},
@ -167,7 +167,7 @@ func TestDNS(t *testing.T) {
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.SRV{Port: 3306, Target: "db1.example.com."},
@ -198,7 +198,7 @@ func TestDNS(t *testing.T) {
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{}, nil
},
expected: []*targetgroup.Group{
@ -215,7 +215,7 @@ func TestDNS(t *testing.T) {
Port: 25,
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
lookup: func(name string, qtype uint16, logger *slog.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.MX{Preference: 0, Mx: "smtp1.example.com."},

View file

@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"net/url"
"strconv"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -126,7 +126,7 @@ type Discovery struct {
}
// NewDiscovery creates a new Eureka discovery for the given role.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*eurekaMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"strings"
@ -26,12 +27,11 @@ import (
"time"
"github.com/fsnotify/fsnotify"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
@ -175,20 +175,20 @@ type Discovery struct {
// and how many target groups they contained.
// This is used to detect deleted target groups.
lastRefresh map[string]int
logger log.Logger
logger *slog.Logger
metrics *fileMetrics
}
// NewDiscovery returns a new file discovery for the given paths.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
fm, ok := metrics.(*fileMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
disc := &Discovery{
@ -210,7 +210,7 @@ func (d *Discovery) listFiles() []string {
for _, p := range d.paths {
files, err := filepath.Glob(p)
if err != nil {
level.Error(d.logger).Log("msg", "Error expanding glob", "glob", p, "err", err)
d.logger.Error("Error expanding glob", "glob", p, "err", err)
continue
}
paths = append(paths, files...)
@ -231,7 +231,7 @@ func (d *Discovery) watchFiles() {
p = "./"
}
if err := d.watcher.Add(p); err != nil {
level.Error(d.logger).Log("msg", "Error adding file watch", "path", p, "err", err)
d.logger.Error("Error adding file watch", "path", p, "err", err)
}
}
}
@ -240,7 +240,7 @@ func (d *Discovery) watchFiles() {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err)
d.logger.Error("Error adding file watcher", "err", err)
d.metrics.fileWatcherErrorsCount.Inc()
return
}
@ -280,7 +280,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
case err := <-d.watcher.Errors:
if err != nil {
level.Error(d.logger).Log("msg", "Error watching file", "err", err)
d.logger.Error("Error watching file", "err", err)
}
}
}
@ -300,7 +300,7 @@ func (d *Discovery) deleteTimestamp(filename string) {
// stop shuts down the file watcher.
func (d *Discovery) stop() {
level.Debug(d.logger).Log("msg", "Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths))
d.logger.Debug("Stopping file discovery...", "paths", fmt.Sprintf("%v", d.paths))
done := make(chan struct{})
defer close(done)
@ -320,10 +320,10 @@ func (d *Discovery) stop() {
}
}()
if err := d.watcher.Close(); err != nil {
level.Error(d.logger).Log("msg", "Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err)
d.logger.Error("Error closing file watcher", "paths", fmt.Sprintf("%v", d.paths), "err", err)
}
level.Debug(d.logger).Log("msg", "File discovery stopped")
d.logger.Debug("File discovery stopped")
}
// refresh reads all files matching the discovery's patterns and sends the respective
@ -339,7 +339,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group)
if err != nil {
d.metrics.fileSDReadErrorsCount.Inc()
level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err)
d.logger.Error("Error reading file", "path", p, "err", err)
// Prevent deletion down below.
ref[p] = d.lastRefresh[p]
continue
@ -356,7 +356,7 @@ func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group)
for f, n := range d.lastRefresh {
m, ok := ref[f]
if !ok || n > m {
level.Debug(d.logger).Log("msg", "file_sd refresh found file that should be removed", "file", f)
d.logger.Debug("file_sd refresh found file that should be removed", "file", f)
d.deleteTimestamp(f)
for i := m; i < n; i++ {
select {

View file

@ -17,12 +17,12 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"golang.org/x/oauth2/google"
@ -129,7 +129,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*gceMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -15,12 +15,12 @@ package hetzner
import (
"context"
"log/slog"
"net"
"net/http"
"strconv"
"time"
"github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -58,7 +58,7 @@ type hcloudDiscovery struct {
}
// newHcloudDiscovery returns a new hcloudDiscovery which periodically refreshes its targets.
func newHcloudDiscovery(conf *SDConfig, _ log.Logger) (*hcloudDiscovery, error) {
func newHcloudDiscovery(conf *SDConfig, _ *slog.Logger) (*hcloudDiscovery, error) {
d := &hcloudDiscovery{
port: conf.Port,
}

View file

@ -18,8 +18,8 @@ import (
"fmt"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
)
@ -43,7 +43,7 @@ func TestHCloudSDRefresh(t *testing.T) {
cfg.HTTPClientConfig.BearerToken = hcloudTestToken
cfg.hcloudEndpoint = suite.Mock.Endpoint()
d, err := newHcloudDiscovery(&cfg, log.NewNopLogger())
d, err := newHcloudDiscovery(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
targetGroups, err := d.refresh(context.Background())

View file

@ -17,9 +17,9 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"time"
"github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@ -135,7 +135,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*hetznerMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
@ -157,7 +157,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.Discovere
), nil
}
func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) {
switch conf.Role {
case HetznerRoleHcloud:
if conf.hcloudEndpoint == "" {

View file

@ -18,13 +18,13 @@ import (
"encoding/json"
"fmt"
"io"
"log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -51,7 +51,7 @@ type robotDiscovery struct {
}
// newRobotDiscovery returns a new robotDiscovery which periodically refreshes its targets.
func newRobotDiscovery(conf *SDConfig, _ log.Logger) (*robotDiscovery, error) {
func newRobotDiscovery(conf *SDConfig, _ *slog.Logger) (*robotDiscovery, error) {
d := &robotDiscovery{
port: conf.Port,
endpoint: conf.robotEndpoint,

View file

@ -18,9 +18,9 @@ import (
"fmt"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
)
@ -42,7 +42,7 @@ func TestRobotSDRefresh(t *testing.T) {
cfg.HTTPClientConfig.BasicAuth = &config.BasicAuth{Username: robotTestUsername, Password: robotTestPassword}
cfg.robotEndpoint = suite.Mock.Endpoint()
d, err := newRobotDiscovery(&cfg, log.NewNopLogger())
d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
targetGroups, err := d.refresh(context.Background())
@ -91,12 +91,11 @@ func TestRobotSDRefreshHandleError(t *testing.T) {
cfg := DefaultSDConfig
cfg.robotEndpoint = suite.Mock.Endpoint()
d, err := newRobotDiscovery(&cfg, log.NewNopLogger())
d, err := newRobotDiscovery(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
targetGroups, err := d.refresh(context.Background())
require.Error(t, err)
require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error())
require.EqualError(t, err, "non 2xx status '401' response during hetzner service discovery with role robot")
require.Empty(t, targetGroups)
}

View file

@ -19,17 +19,18 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
"github.com/prometheus/prometheus/discovery"
@ -40,8 +41,8 @@ import (
var (
// DefaultSDConfig is the default HTTP SD configuration.
DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
RefreshInterval: model.Duration(60 * time.Second),
}
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
@ -114,14 +115,14 @@ type Discovery struct {
}
// NewDiscovery returns a new HTTP discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, clientOpts []config.HTTPClientOption, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*httpMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
client, err := config.NewClientFromConfig(conf.HTTPClientConfig, "http", clientOpts...)

View file

@ -21,11 +21,11 @@ import (
"testing"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@ -49,7 +49,7 @@ func TestHTTPValidRefresh(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
ctx := context.Background()
@ -94,7 +94,7 @@ func TestHTTPInvalidCode(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
ctx := context.Background()
@ -123,7 +123,7 @@ func TestHTTPInvalidFormat(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
ctx := context.Background()
@ -442,7 +442,7 @@ func TestSourceDisappeared(t *testing.T) {
require.NoError(t, metrics.Register())
defer metrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), nil, metrics)
require.NoError(t, err)
for _, test := range cases {
ctx := context.Background()

View file

@ -16,9 +16,9 @@ package ionos
import (
"errors"
"fmt"
"log/slog"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -43,7 +43,7 @@ func init() {
type Discovery struct{}
// NewDiscovery returns a new refresh.Discovery for IONOS Cloud.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*ionosMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -16,13 +16,13 @@ package ionos
import (
"context"
"fmt"
"log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
ionoscloud "github.com/ionos-cloud/sdk-go/v6"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -60,7 +60,7 @@ type serverDiscovery struct {
datacenterID string
}
func newServerDiscovery(conf *SDConfig, _ log.Logger) (*serverDiscovery, error) {
func newServerDiscovery(conf *SDConfig, _ *slog.Logger) (*serverDiscovery, error) {
d := &serverDiscovery{
port: conf.Port,
datacenterID: conf.DatacenterID,

View file

@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@ -33,7 +33,7 @@ import (
// Endpoints discovers new endpoint targets.
type Endpoints struct {
logger log.Logger
logger *slog.Logger
endpointsInf cache.SharedIndexInformer
serviceInf cache.SharedInformer
@ -49,9 +49,9 @@ type Endpoints struct {
}
// NewEndpoints returns a new endpoints discovery.
func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
func NewEndpoints(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd)
@ -92,26 +92,23 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding endpoints event handler.", "err", err)
l.Error("Error adding endpoints event handler.", "err", err)
}
serviceUpdate := func(o interface{}) {
svc, err := convertToService(o)
if err != nil {
level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err)
e.logger.Error("converting to Service object failed", "err", err)
return
}
ep := &apiv1.Endpoints{}
ep.Namespace = svc.Namespace
ep.Name = svc.Name
obj, exists, err := e.endpointsStore.Get(ep)
obj, exists, err := e.endpointsStore.GetByKey(namespacedName(svc.Namespace, svc.Name))
if exists && err == nil {
e.enqueue(obj.(*apiv1.Endpoints))
}
if err != nil {
level.Error(e.logger).Log("msg", "retrieving endpoints failed", "err", err)
e.logger.Error("retrieving endpoints failed", "err", err)
}
}
_, err = e.serviceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@ -131,7 +128,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding services event handler.", "err", err)
l.Error("Error adding services event handler.", "err", err)
}
_, err = e.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
UpdateFunc: func(old, cur interface{}) {
@ -154,7 +151,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding pods event handler.", "err", err)
l.Error("Error adding pods event handler.", "err", err)
}
if e.withNodeMetadata {
_, err = e.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@ -167,12 +164,15 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
e.enqueueNode(node.Name)
},
DeleteFunc: func(o interface{}) {
node := o.(*apiv1.Node)
e.enqueueNode(node.Name)
nodeName, err := nodeName(o)
if err != nil {
l.Error("Error getting Node name", "err", err)
}
e.enqueueNode(nodeName)
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err)
l.Error("Error adding nodes event handler.", "err", err)
}
}
@ -182,7 +182,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
func (e *Endpoints) enqueueNode(nodeName string) {
endpoints, err := e.endpointsInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err)
e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err)
return
}
@ -194,7 +194,7 @@ func (e *Endpoints) enqueueNode(nodeName string) {
func (e *Endpoints) enqueuePod(podNamespacedName string) {
endpoints, err := e.endpointsInf.GetIndexer().ByIndex(podIndex, podNamespacedName)
if err != nil {
level.Error(e.logger).Log("msg", "Error getting endpoints for pod", "pod", podNamespacedName, "err", err)
e.logger.Error("Error getting endpoints for pod", "pod", podNamespacedName, "err", err)
return
}
@ -223,7 +223,7 @@ func (e *Endpoints) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(e.logger).Log("msg", "endpoints informer unable to sync cache")
e.logger.Error("endpoints informer unable to sync cache")
}
return
}
@ -247,13 +247,13 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group)
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
level.Error(e.logger).Log("msg", "splitting key failed", "key", key)
e.logger.Error("splitting key failed", "key", key)
return true
}
o, exists, err := e.endpointsStore.GetByKey(key)
if err != nil {
level.Error(e.logger).Log("msg", "getting object from store failed", "key", key)
e.logger.Error("getting object from store failed", "key", key)
return true
}
if !exists {
@ -262,7 +262,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group)
}
eps, err := convertToEndpoints(o)
if err != nil {
level.Error(e.logger).Log("msg", "converting to Endpoints object failed", "err", err)
e.logger.Error("converting to Endpoints object failed", "err", err)
return true
}
send(ctx, ch, e.buildEndpoints(eps))
@ -361,16 +361,19 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
target = target.Merge(podLabels(pod))
// Attach potential container port labels matching the endpoint port.
for _, c := range pod.Spec.Containers {
containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
for i, c := range containers {
for _, cport := range c.Ports {
if port.Port == cport.ContainerPort {
ports := strconv.FormatUint(uint64(port.Port), 10)
isInit := i >= len(pod.Spec.Containers)
target[podContainerNameLabel] = lv(c.Name)
target[podContainerImageLabel] = lv(c.Image)
target[podContainerPortNameLabel] = lv(cport.Name)
target[podContainerPortNumberLabel] = lv(ports)
target[podContainerPortProtocolLabel] = lv(string(port.Protocol))
target[podContainerIsInit] = lv(strconv.FormatBool(isInit))
break
}
}
@ -397,10 +400,10 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
v := eps.Labels[apiv1.EndpointsOverCapacity]
if v == "truncated" {
level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000 and has been truncated, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
}
if v == "warning" {
level.Warn(e.logger).Log("msg", "Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
e.logger.Warn("Number of endpoints in one Endpoints object exceeds 1000, please use \"role: endpointslice\" instead", "endpoint", eps.Name)
}
// For all seen pods, check all container ports. If they were not covered
@ -411,7 +414,8 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
continue
}
for _, c := range pe.pod.Spec.Containers {
containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...)
for i, c := range containers {
for _, cport := range c.Ports {
hasSeenPort := func() bool {
for _, eport := range pe.servicePorts {
@ -428,6 +432,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
isInit := i >= len(pe.pod.Spec.Containers)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
@ -435,6 +440,7 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
podContainerIsInit: lv(strconv.FormatBool(isInit)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
@ -448,13 +454,10 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
if ref == nil || ref.Kind != "Pod" {
return nil
}
p := &apiv1.Pod{}
p.Namespace = ref.Namespace
p.Name = ref.Name
obj, exists, err := e.podStore.Get(p)
obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name))
if err != nil {
level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err)
e.logger.Error("resolving pod ref failed", "err", err)
return nil
}
if !exists {
@ -464,31 +467,27 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
}
func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) {
svc := &apiv1.Service{}
svc.Namespace = ns
svc.Name = name
obj, exists, err := e.serviceStore.Get(svc)
obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name))
if err != nil {
level.Error(e.logger).Log("msg", "retrieving service failed", "err", err)
e.logger.Error("retrieving service failed", "err", err)
return
}
if !exists {
return
}
svc = obj.(*apiv1.Service)
svc := obj.(*apiv1.Service)
tg.Labels = tg.Labels.Merge(serviceLabels(svc))
}
func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger log.Logger, nodeName *string) model.LabelSet {
func addNodeLabels(tg model.LabelSet, nodeInf cache.SharedInformer, logger *slog.Logger, nodeName *string) model.LabelSet {
if nodeName == nil {
return tg
}
obj, exists, err := nodeInf.GetStore().GetByKey(*nodeName)
if err != nil {
level.Error(logger).Log("msg", "Error getting node", "node", *nodeName, "err", err)
logger.Error("Error getting node", "node", *nodeName, "err", err)
return tg
}

View file

@ -18,10 +18,12 @@ import (
"testing"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/cache"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
@ -244,6 +246,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
{
"__address__": "1.2.3.4:9001",
@ -259,6 +262,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9001",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@ -821,6 +825,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@ -1078,6 +1083,7 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@ -1089,3 +1095,186 @@ func TestEndpointsDiscoveryUpdatePod(t *testing.T) {
},
}.Run(t)
}
func TestEndpointsDiscoverySidecarContainer(t *testing.T) {
objs := []runtime.Object{
&v1.Endpoints{
ObjectMeta: metav1.ObjectMeta{
Name: "testsidecar",
Namespace: "default",
},
Subsets: []v1.EndpointSubset{
{
Addresses: []v1.EndpointAddress{
{
IP: "4.3.2.1",
TargetRef: &v1.ObjectReference{
Kind: "Pod",
Name: "testpod",
Namespace: "default",
},
},
},
Ports: []v1.EndpointPort{
{
Name: "testport",
Port: 9000,
Protocol: v1.ProtocolTCP,
},
{
Name: "initport",
Port: 9111,
Protocol: v1.ProtocolTCP,
},
},
},
},
},
&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Namespace: "default",
UID: types.UID("deadbeef"),
},
Spec: v1.PodSpec{
NodeName: "testnode",
InitContainers: []v1.Container{
{
Name: "ic1",
Image: "ic1:latest",
Ports: []v1.ContainerPort{
{
Name: "initport",
ContainerPort: 1111,
Protocol: v1.ProtocolTCP,
},
},
},
{
Name: "ic2",
Image: "ic2:latest",
Ports: []v1.ContainerPort{
{
Name: "initport",
ContainerPort: 9111,
Protocol: v1.ProtocolTCP,
},
},
},
},
Containers: []v1.Container{
{
Name: "c1",
Image: "c1:latest",
Ports: []v1.ContainerPort{
{
Name: "mainport",
ContainerPort: 9000,
Protocol: v1.ProtocolTCP,
},
},
},
},
},
Status: v1.PodStatus{
HostIP: "2.3.4.5",
PodIP: "4.3.2.1",
},
},
}
n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, objs...)
k8sDiscoveryTest{
discovery: n,
expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{
"endpoints/default/testsidecar": {
Targets: []model.LabelSet{
{
"__address__": "4.3.2.1:9000",
"__meta_kubernetes_endpoint_address_target_kind": "Pod",
"__meta_kubernetes_endpoint_address_target_name": "testpod",
"__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
"__meta_kubernetes_pod_container_image": "c1:latest",
"__meta_kubernetes_pod_container_name": "c1",
"__meta_kubernetes_pod_container_port_name": "mainport",
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
"__meta_kubernetes_pod_ip": "4.3.2.1",
"__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
{
"__address__": "4.3.2.1:9111",
"__meta_kubernetes_endpoint_address_target_kind": "Pod",
"__meta_kubernetes_endpoint_address_target_name": "testpod",
"__meta_kubernetes_endpoint_port_name": "initport",
"__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true",
"__meta_kubernetes_pod_container_image": "ic2:latest",
"__meta_kubernetes_pod_container_name": "ic2",
"__meta_kubernetes_pod_container_port_name": "initport",
"__meta_kubernetes_pod_container_port_number": "9111",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
"__meta_kubernetes_pod_ip": "4.3.2.1",
"__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "true",
},
{
"__address__": "4.3.2.1:1111",
"__meta_kubernetes_pod_container_image": "ic1:latest",
"__meta_kubernetes_pod_container_name": "ic1",
"__meta_kubernetes_pod_container_port_name": "initport",
"__meta_kubernetes_pod_container_port_number": "1111",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
"__meta_kubernetes_pod_ip": "4.3.2.1",
"__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpoints_name": "testsidecar",
"__meta_kubernetes_namespace": "default",
},
Source: "endpoints/default/testsidecar",
},
},
}.Run(t)
}
func BenchmarkResolvePodRef(b *testing.B) {
indexer := cache.NewIndexer(cache.DeletionHandlingMetaNamespaceKeyFunc, nil)
e := &Endpoints{
podStore: indexer,
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
p := e.resolvePodRef(&v1.ObjectReference{
Kind: "Pod",
Name: "testpod",
Namespace: "foo",
})
require.Nil(b, p)
}
}

View file

@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
"k8s.io/client-go/tools/cache"
@ -35,7 +35,7 @@ import (
// EndpointSlice discovers new endpoint targets.
type EndpointSlice struct {
logger log.Logger
logger *slog.Logger
endpointSliceInf cache.SharedIndexInformer
serviceInf cache.SharedInformer
@ -51,9 +51,9 @@ type EndpointSlice struct {
}
// NewEndpointSlice returns a new endpointslice discovery.
func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
func NewEndpointSlice(l *slog.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd)
@ -92,13 +92,13 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding endpoint slices event handler.", "err", err)
l.Error("Error adding endpoint slices event handler.", "err", err)
}
serviceUpdate := func(o interface{}) {
svc, err := convertToService(o)
if err != nil {
level.Error(e.logger).Log("msg", "converting to Service object failed", "err", err)
e.logger.Error("converting to Service object failed", "err", err)
return
}
@ -108,7 +108,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
for _, obj := range e.endpointSliceStore.List() {
esa, err := e.getEndpointSliceAdaptor(obj)
if err != nil {
level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err)
e.logger.Error("converting to EndpointSlice object failed", "err", err)
continue
}
if lv, exists := esa.labels()[esa.labelServiceName()]; exists && lv == svc.Name {
@ -131,7 +131,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding services event handler.", "err", err)
l.Error("Error adding services event handler.", "err", err)
}
if e.withNodeMetadata {
@ -145,12 +145,15 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
e.enqueueNode(node.Name)
},
DeleteFunc: func(o interface{}) {
node := o.(*apiv1.Node)
e.enqueueNode(node.Name)
nodeName, err := nodeName(o)
if err != nil {
l.Error("Error getting Node name", "err", err)
}
e.enqueueNode(nodeName)
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err)
l.Error("Error adding nodes event handler.", "err", err)
}
}
@ -160,7 +163,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
func (e *EndpointSlice) enqueueNode(nodeName string) {
endpoints, err := e.endpointSliceInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
level.Error(e.logger).Log("msg", "Error getting endpoints for node", "node", nodeName, "err", err)
e.logger.Error("Error getting endpoints for node", "node", nodeName, "err", err)
return
}
@ -188,7 +191,7 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group)
}
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache")
e.logger.Error("endpointslice informer unable to sync cache")
}
return
}
@ -212,13 +215,13 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
level.Error(e.logger).Log("msg", "splitting key failed", "key", key)
e.logger.Error("splitting key failed", "key", key)
return true
}
o, exists, err := e.endpointSliceStore.GetByKey(key)
if err != nil {
level.Error(e.logger).Log("msg", "getting object from store failed", "key", key)
e.logger.Error("getting object from store failed", "key", key)
return true
}
if !exists {
@ -228,7 +231,7 @@ func (e *EndpointSlice) process(ctx context.Context, ch chan<- []*targetgroup.Gr
esa, err := e.getEndpointSliceAdaptor(o)
if err != nil {
level.Error(e.logger).Log("msg", "converting to EndpointSlice object failed", "err", err)
e.logger.Error("converting to EndpointSlice object failed", "err", err)
return true
}
@ -377,19 +380,23 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
target = target.Merge(podLabels(pod))
// Attach potential container port labels matching the endpoint port.
for _, c := range pod.Spec.Containers {
containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
for i, c := range containers {
for _, cport := range c.Ports {
if port.port() == nil {
continue
}
if *port.port() == cport.ContainerPort {
ports := strconv.FormatUint(uint64(*port.port()), 10)
isInit := i >= len(pod.Spec.Containers)
target[podContainerNameLabel] = lv(c.Name)
target[podContainerImageLabel] = lv(c.Image)
target[podContainerPortNameLabel] = lv(cport.Name)
target[podContainerPortNumberLabel] = lv(ports)
target[podContainerPortProtocolLabel] = lv(string(cport.Protocol))
target[podContainerIsInit] = lv(strconv.FormatBool(isInit))
break
}
}
@ -417,7 +424,8 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
continue
}
for _, c := range pe.pod.Spec.Containers {
containers := append(pe.pod.Spec.Containers, pe.pod.Spec.InitContainers...)
for i, c := range containers {
for _, cport := range c.Ports {
hasSeenPort := func() bool {
for _, eport := range pe.servicePorts {
@ -437,6 +445,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
a := net.JoinHostPort(pe.pod.Status.PodIP, strconv.FormatUint(uint64(cport.ContainerPort), 10))
ports := strconv.FormatUint(uint64(cport.ContainerPort), 10)
isInit := i >= len(pe.pod.Spec.Containers)
target := model.LabelSet{
model.AddressLabel: lv(a),
podContainerNameLabel: lv(c.Name),
@ -444,6 +453,7 @@ func (e *EndpointSlice) buildEndpointSlice(eps endpointSliceAdaptor) *targetgrou
podContainerPortNameLabel: lv(cport.Name),
podContainerPortNumberLabel: lv(ports),
podContainerPortProtocolLabel: lv(string(cport.Protocol)),
podContainerIsInit: lv(strconv.FormatBool(isInit)),
}
tg.Targets = append(tg.Targets, target.Merge(podLabels(pe.pod)))
}
@ -457,13 +467,10 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
if ref == nil || ref.Kind != "Pod" {
return nil
}
p := &apiv1.Pod{}
p.Namespace = ref.Namespace
p.Name = ref.Name
obj, exists, err := e.podStore.Get(p)
obj, exists, err := e.podStore.GetByKey(namespacedName(ref.Namespace, ref.Name))
if err != nil {
level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err)
e.logger.Error("resolving pod ref failed", "err", err)
return nil
}
if !exists {
@ -474,27 +481,27 @@ func (e *EndpointSlice) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
func (e *EndpointSlice) addServiceLabels(esa endpointSliceAdaptor, tg *targetgroup.Group) {
var (
svc = &apiv1.Service{}
found bool
name string
)
svc.Namespace = esa.namespace()
ns := esa.namespace()
// Every EndpointSlice object has the Service they belong to in the
// kubernetes.io/service-name label.
svc.Name, found = esa.labels()[esa.labelServiceName()]
name, found = esa.labels()[esa.labelServiceName()]
if !found {
return
}
obj, exists, err := e.serviceStore.Get(svc)
obj, exists, err := e.serviceStore.GetByKey(namespacedName(ns, name))
if err != nil {
level.Error(e.logger).Log("msg", "retrieving service failed", "err", err)
e.logger.Error("retrieving service failed", "err", err)
return
}
if !exists {
return
}
svc = obj.(*apiv1.Service)
svc := obj.(*apiv1.Service)
tg.Labels = tg.Labels.Merge(serviceLabels(svc))
}

View file

@ -291,6 +291,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
{
"__address__": "1.2.3.4:9001",
@ -306,6 +307,7 @@ func TestEndpointSliceDiscoveryAdd(t *testing.T) {
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@ -986,6 +988,7 @@ func TestEndpointSliceDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
},
Labels: model.LabelSet{
@ -1199,3 +1202,165 @@ func TestEndpointSliceInfIndexersCount(t *testing.T) {
})
}
}
func TestEndpointSliceDiscoverySidecarContainer(t *testing.T) {
objs := []runtime.Object{
&v1.EndpointSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "testsidecar",
Namespace: "default",
},
AddressType: v1.AddressTypeIPv4,
Ports: []v1.EndpointPort{
{
Name: strptr("testport"),
Port: int32ptr(9000),
Protocol: protocolptr(corev1.ProtocolTCP),
},
{
Name: strptr("initport"),
Port: int32ptr(9111),
Protocol: protocolptr(corev1.ProtocolTCP),
},
},
Endpoints: []v1.Endpoint{
{
Addresses: []string{"4.3.2.1"},
TargetRef: &corev1.ObjectReference{
Kind: "Pod",
Name: "testpod",
Namespace: "default",
},
},
},
},
&corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Namespace: "default",
UID: types.UID("deadbeef"),
},
Spec: corev1.PodSpec{
NodeName: "testnode",
InitContainers: []corev1.Container{
{
Name: "ic1",
Image: "ic1:latest",
Ports: []corev1.ContainerPort{
{
Name: "initport",
ContainerPort: 1111,
Protocol: corev1.ProtocolTCP,
},
},
},
{
Name: "ic2",
Image: "ic2:latest",
Ports: []corev1.ContainerPort{
{
Name: "initport",
ContainerPort: 9111,
Protocol: corev1.ProtocolTCP,
},
},
},
},
Containers: []corev1.Container{
{
Name: "c1",
Image: "c1:latest",
Ports: []corev1.ContainerPort{
{
Name: "mainport",
ContainerPort: 9000,
Protocol: corev1.ProtocolTCP,
},
},
},
},
},
Status: corev1.PodStatus{
HostIP: "2.3.4.5",
PodIP: "4.3.2.1",
},
},
}
n, _ := makeDiscovery(RoleEndpointSlice, NamespaceDiscovery{}, objs...)
k8sDiscoveryTest{
discovery: n,
expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{
"endpointslice/default/testsidecar": {
Targets: []model.LabelSet{
{
"__address__": "4.3.2.1:9000",
"__meta_kubernetes_endpointslice_address_target_kind": "Pod",
"__meta_kubernetes_endpointslice_address_target_name": "testpod",
"__meta_kubernetes_endpointslice_port": "9000",
"__meta_kubernetes_endpointslice_port_name": "testport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_pod_container_image": "c1:latest",
"__meta_kubernetes_pod_container_name": "c1",
"__meta_kubernetes_pod_container_port_name": "mainport",
"__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
"__meta_kubernetes_pod_ip": "4.3.2.1",
"__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "false",
},
{
"__address__": "4.3.2.1:9111",
"__meta_kubernetes_endpointslice_address_target_kind": "Pod",
"__meta_kubernetes_endpointslice_address_target_name": "testpod",
"__meta_kubernetes_endpointslice_port": "9111",
"__meta_kubernetes_endpointslice_port_name": "initport",
"__meta_kubernetes_endpointslice_port_protocol": "TCP",
"__meta_kubernetes_pod_container_image": "ic2:latest",
"__meta_kubernetes_pod_container_name": "ic2",
"__meta_kubernetes_pod_container_port_name": "initport",
"__meta_kubernetes_pod_container_port_number": "9111",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
"__meta_kubernetes_pod_ip": "4.3.2.1",
"__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "true",
},
{
"__address__": "4.3.2.1:1111",
"__meta_kubernetes_pod_container_image": "ic1:latest",
"__meta_kubernetes_pod_container_name": "ic1",
"__meta_kubernetes_pod_container_port_name": "initport",
"__meta_kubernetes_pod_container_port_number": "1111",
"__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
"__meta_kubernetes_pod_ip": "4.3.2.1",
"__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_phase": "",
"__meta_kubernetes_pod_ready": "unknown",
"__meta_kubernetes_pod_uid": "deadbeef",
"__meta_kubernetes_pod_container_init": "true",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_endpointslice_address_type": "IPv4",
"__meta_kubernetes_endpointslice_name": "testsidecar",
"__meta_kubernetes_namespace": "default",
},
Source: "endpointslice/default/testsidecar",
},
},
}.Run(t)
}

View file

@ -17,10 +17,9 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"strings"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
@ -32,14 +31,14 @@ import (
// Ingress implements discovery of Kubernetes ingress.
type Ingress struct {
logger log.Logger
logger *slog.Logger
informer cache.SharedInformer
store cache.Store
queue *workqueue.Type
}
// NewIngress returns a new ingress discovery.
func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
func NewIngress(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd)
ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate)
ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete)
@ -66,7 +65,7 @@ func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding ingresses event handler.", "err", err)
l.Error("Error adding ingresses event handler.", "err", err)
}
return s
}
@ -86,7 +85,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), i.informer.HasSynced) {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(i.logger).Log("msg", "ingress informer unable to sync cache")
i.logger.Error("ingress informer unable to sync cache")
}
return
}
@ -127,7 +126,7 @@ func (i *Ingress) process(ctx context.Context, ch chan<- []*targetgroup.Group) b
case *v1.Ingress:
ia = newIngressAdaptorFromV1(ingress)
default:
level.Error(i.logger).Log("msg", "converting to Ingress object failed", "err",
i.logger.Error("converting to Ingress object failed", "err",
fmt.Errorf("received unexpected object: %v", o))
return true
}

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"reflect"
"strings"
@ -25,11 +26,10 @@ import (
"github.com/prometheus/prometheus/util/strutil"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/common/version"
apiv1 "k8s.io/api/core/v1"
disv1 "k8s.io/api/discovery/v1"
@ -260,7 +260,7 @@ type Discovery struct {
sync.RWMutex
client kubernetes.Interface
role Role
logger log.Logger
logger *slog.Logger
namespaceDiscovery *NamespaceDiscovery
discoverers []discovery.Discoverer
selectors roleSelector
@ -285,14 +285,14 @@ func (d *Discovery) getNamespaces() []string {
}
// New creates a new Kubernetes discovery for the given role.
func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) {
func New(l *slog.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Discovery, error) {
m, ok := metrics.(*kubernetesMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
}
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
var (
kcfg *rest.Config
@ -324,7 +324,7 @@ func New(l log.Logger, metrics discovery.DiscovererMetrics, conf *SDConfig) (*Di
ownNamespace = string(ownNamespaceContents)
}
level.Info(l).Log("msg", "Using pod service account via in-cluster config")
l.Info("Using pod service account via in-cluster config")
default:
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd")
if err != nil {
@ -446,7 +446,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
go nodeInf.Run(ctx.Done())
}
eps := NewEndpointSlice(
log.With(d.logger, "role", "endpointslice"),
d.logger.With("role", "endpointslice"),
informer,
d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
@ -506,7 +506,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
eps := NewEndpoints(
log.With(d.logger, "role", "endpoint"),
d.logger.With("role", "endpoint"),
d.newEndpointsByNodeInformer(elw),
d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.mustNewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
@ -540,7 +540,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
},
}
pod := NewPod(
log.With(d.logger, "role", "pod"),
d.logger.With("role", "pod"),
d.newPodsByNodeInformer(plw),
nodeInformer,
d.metrics.eventCount,
@ -564,7 +564,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
},
}
svc := NewService(
log.With(d.logger, "role", "service"),
d.logger.With("role", "service"),
d.mustNewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.metrics.eventCount,
)
@ -589,7 +589,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
informer = d.mustNewSharedInformer(ilw, &networkv1.Ingress{}, resyncDisabled)
ingress := NewIngress(
log.With(d.logger, "role", "ingress"),
d.logger.With("role", "ingress"),
informer,
d.metrics.eventCount,
)
@ -598,11 +598,11 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
case RoleNode:
nodeInformer := d.newNodeInformer(ctx)
node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.metrics.eventCount)
node := NewNode(d.logger.With("role", "node"), nodeInformer, d.metrics.eventCount)
d.discoverers = append(d.discoverers, node)
go node.informer.Run(ctx.Done())
default:
level.Error(d.logger).Log("msg", "unknown Kubernetes discovery kind", "role", d.role)
d.logger.Error("unknown Kubernetes discovery kind", "role", d.role)
}
var wg sync.WaitGroup
@ -804,3 +804,13 @@ func addObjectMetaLabels(labelSet model.LabelSet, objectMeta metav1.ObjectMeta,
func namespacedName(namespace, name string) string {
return namespace + "/" + name
}
// nodeName knows how to handle the cache.DeletedFinalStateUnknown tombstone.
// It assumes the MetaNamespaceKeyFunc keyFunc is used, which uses the node name as the tombstone key.
func nodeName(o interface{}) (string, error) {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(o)
if err != nil {
return "", err
}
return key, nil
}

View file

@ -20,10 +20,12 @@ import (
"testing"
"time"
"github.com/go-kit/log"
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
apiv1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/version"
"k8s.io/apimachinery/pkg/watch"
@ -71,7 +73,7 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer
d := &Discovery{
client: clientset,
logger: log.NewNopLogger(),
logger: promslog.NewNopLogger(),
role: role,
namespaceDiscovery: &nsDiscovery,
ownNamespace: "own-ns",
@ -320,3 +322,18 @@ func TestFailuresCountMetric(t *testing.T) {
})
}
}
func TestNodeName(t *testing.T) {
node := &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
},
}
name, err := nodeName(node)
require.NoError(t, err)
require.Equal(t, "foo", name)
name, err = nodeName(cache.DeletedFinalStateUnknown{Key: "bar"})
require.NoError(t, err)
require.Equal(t, "bar", name)
}

View file

@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@ -38,16 +38,16 @@ const (
// Node discovers Kubernetes nodes.
type Node struct {
logger log.Logger
logger *slog.Logger
informer cache.SharedInformer
store cache.Store
queue *workqueue.Type
}
// NewNode returns a new node discovery.
func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
func NewNode(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd)
@ -76,13 +76,13 @@ func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.Coun
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding nodes event handler.", "err", err)
l.Error("Error adding nodes event handler.", "err", err)
}
return n
}
func (n *Node) enqueue(obj interface{}) {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
key, err := nodeName(obj)
if err != nil {
return
}
@ -96,7 +96,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), n.informer.HasSynced) {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(n.logger).Log("msg", "node informer unable to sync cache")
n.logger.Error("node informer unable to sync cache")
}
return
}
@ -133,7 +133,7 @@ func (n *Node) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool
}
node, err := convertToNode(o)
if err != nil {
level.Error(n.logger).Log("msg", "converting to Node object failed", "err", err)
n.logger.Error("converting to Node object failed", "err", err)
return true
}
send(ctx, ch, n.buildNode(node))
@ -181,7 +181,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group {
addr, addrMap, err := nodeAddress(node)
if err != nil {
level.Warn(n.logger).Log("msg", "No node address found", "err", err)
n.logger.Warn("No node address found", "err", err)
return nil
}
addr = net.JoinHostPort(addr, strconv.FormatInt(int64(node.Status.DaemonEndpoints.KubeletEndpoint.Port), 10))

View file

@ -17,14 +17,14 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/cache"
@ -44,14 +44,14 @@ type Pod struct {
nodeInf cache.SharedInformer
withNodeMetadata bool
store cache.Store
logger log.Logger
logger *slog.Logger
queue *workqueue.Type
}
// NewPod creates a new pod discovery.
func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
func NewPod(l *slog.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd)
@ -81,7 +81,7 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding pods event handler.", "err", err)
l.Error("Error adding pods event handler.", "err", err)
}
if p.withNodeMetadata {
@ -95,12 +95,15 @@ func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInfo
p.enqueuePodsForNode(node.Name)
},
DeleteFunc: func(o interface{}) {
node := o.(*apiv1.Node)
p.enqueuePodsForNode(node.Name)
nodeName, err := nodeName(o)
if err != nil {
l.Error("Error getting Node name", "err", err)
}
p.enqueuePodsForNode(nodeName)
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding pods event handler.", "err", err)
l.Error("Error adding pods event handler.", "err", err)
}
}
@ -127,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(p.logger).Log("msg", "pod informer unable to sync cache")
p.logger.Error("pod informer unable to sync cache")
}
return
}
@ -164,7 +167,7 @@ func (p *Pod) process(ctx context.Context, ch chan<- []*targetgroup.Group) bool
}
pod, err := convertToPod(o)
if err != nil {
level.Error(p.logger).Log("msg", "converting to Pod object failed", "err", err)
p.logger.Error("converting to Pod object failed", "err", err)
return true
}
send(ctx, ch, p.buildPod(pod))
@ -246,7 +249,7 @@ func (p *Pod) findPodContainerStatus(statuses *[]apiv1.ContainerStatus, containe
func (p *Pod) findPodContainerID(statuses *[]apiv1.ContainerStatus, containerName string) string {
cStatus, err := p.findPodContainerStatus(statuses, containerName)
if err != nil {
level.Debug(p.logger).Log("msg", "cannot find container ID", "err", err)
p.logger.Debug("cannot find container ID", "err", err)
return ""
}
return cStatus.ContainerID
@ -315,7 +318,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
func (p *Pod) enqueuePodsForNode(nodeName string) {
pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
level.Error(p.logger).Log("msg", "Error getting pods for node", "node", nodeName, "err", err)
p.logger.Error("Error getting pods for node", "node", nodeName, "err", err)
return
}

View file

@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
@ -33,16 +33,16 @@ import (
// Service implements discovery of Kubernetes services.
type Service struct {
logger log.Logger
logger *slog.Logger
informer cache.SharedInformer
store cache.Store
queue *workqueue.Type
}
// NewService returns a new service discovery.
func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
func NewService(l *slog.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
@ -71,7 +71,7 @@ func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.C
},
})
if err != nil {
level.Error(l).Log("msg", "Error adding services event handler.", "err", err)
l.Error("Error adding services event handler.", "err", err)
}
return s
}
@ -91,7 +91,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
if !cache.WaitForCacheSync(ctx.Done(), s.informer.HasSynced) {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(s.logger).Log("msg", "service informer unable to sync cache")
s.logger.Error("service informer unable to sync cache")
}
return
}
@ -128,7 +128,7 @@ func (s *Service) process(ctx context.Context, ch chan<- []*targetgroup.Group) b
}
eps, err := convertToService(o)
if err != nil {
level.Error(s.logger).Log("msg", "converting to Service object failed", "err", err)
s.logger.Error("converting to Service object failed", "err", err)
return true
}
send(ctx, ch, s.buildService(eps))

View file

@ -17,13 +17,13 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"net/http"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/linode/linodego"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@ -138,7 +138,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*linodeMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -19,10 +19,10 @@ import (
"net/url"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@ -238,7 +238,7 @@ func TestLinodeSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Endpoint())
require.NoError(t, err)

View file

@ -16,14 +16,14 @@ package discovery
import (
"context"
"fmt"
"log/slog"
"reflect"
"sync"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
@ -81,9 +81,9 @@ func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]Discovere
}
// NewManager is the Discovery Manager constructor.
func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager {
func NewManager(ctx context.Context, logger *slog.Logger, registerer prometheus.Registerer, sdMetrics map[string]DiscovererMetrics, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
logger = promslog.NewNopLogger()
}
mgr := &Manager{
logger: logger,
@ -104,7 +104,7 @@ func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Re
if metrics, err := NewManagerMetrics(registerer, mgr.name); err == nil {
mgr.metrics = metrics
} else {
level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
logger.Error("Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
return nil
}
@ -141,7 +141,7 @@ func HTTPClientOptions(opts ...config.HTTPClientOption) func(*Manager) {
// Manager maintains a set of discovery providers and sends each update to a map channel.
// Targets are grouped by the target set name.
type Manager struct {
logger log.Logger
logger *slog.Logger
name string
httpOpts []config.HTTPClientOption
mtx sync.RWMutex
@ -294,7 +294,7 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D
}
func (m *Manager) startProvider(ctx context.Context, p *Provider) {
level.Debug(m.logger).Log("msg", "Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs))
m.logger.Debug("Starting provider", "provider", p.name, "subs", fmt.Sprintf("%v", p.subs))
ctx, cancel := context.WithCancel(ctx)
updates := make(chan []*targetgroup.Group)
@ -328,7 +328,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ
case tgs, ok := <-updates:
m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
m.logger.Debug("Discoverer channel closed", "provider", p.name)
// Wait for provider cancellation to ensure targets are cleaned up when expected.
<-ctx.Done()
return
@ -364,7 +364,7 @@ func (m *Manager) sender() {
case m.syncCh <- m.allGroups():
default:
m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
m.logger.Debug("Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
default:
@ -458,12 +458,12 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int {
}
typ := cfg.Name()
d, err := cfg.NewDiscoverer(DiscovererOptions{
Logger: log.With(m.logger, "discovery", typ, "config", setName),
Logger: m.logger.With("discovery", typ, "config", setName),
HTTPClientOptions: m.httpOpts,
Metrics: m.sdMetrics[typ],
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)
m.logger.Error("Cannot create service discovery", "err", err, "type", typ, "config", setName)
failed++
return
}

View file

@ -22,10 +22,10 @@ import (
"testing"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -675,7 +675,7 @@ func TestTargetUpdatesOrder(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
@ -791,7 +791,7 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -828,7 +828,7 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -868,7 +868,7 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -911,7 +911,7 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -979,7 +979,7 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1023,7 +1023,7 @@ func TestDiscovererConfigs(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1060,7 +1060,7 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1141,7 +1141,7 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1202,7 +1202,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1454,7 +1454,7 @@ func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) {
reg := prometheus.NewRegistry()
_, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1551,7 +1551,7 @@ func TestUnregisterMetrics(t *testing.T) {
refreshMetrics, sdMetrics := NewTestMetrics(t, reg)
discoveryManager := NewManager(ctx, log.NewNopLogger(), reg, sdMetrics)
discoveryManager := NewManager(ctx, promslog.NewNopLogger(), reg, sdMetrics)
// discoveryManager will be nil if there was an error configuring metrics.
require.NotNil(t, discoveryManager)
// Unregister all metrics.

View file

@ -19,6 +19,7 @@ import (
"errors"
"fmt"
"io"
"log/slog"
"math/rand"
"net"
"net/http"
@ -27,7 +28,6 @@ import (
"strings"
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -140,7 +140,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Marathon Discovery.
func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*marathonMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -16,6 +16,7 @@ package moby
import (
"context"
"fmt"
"log/slog"
"net"
"net/http"
"net/url"
@ -28,7 +29,6 @@ import (
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/network"
"github.com/docker/docker/client"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -128,7 +128,7 @@ type DockerDiscovery struct {
}
// NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets.
func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) {
func NewDockerDiscovery(conf *DockerSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*DockerDiscovery, error) {
m, ok := metrics.(*dockerMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -19,9 +19,9 @@ import (
"sort"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@ -226,7 +226,7 @@ host: %s
require.NoError(t, metrics.Register())
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDockerDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()

View file

@ -16,13 +16,13 @@ package moby
import (
"context"
"fmt"
"log/slog"
"net/http"
"net/url"
"time"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -125,7 +125,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *DockerSwarmSDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*dockerswarmMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -98,7 +98,7 @@ func (m *SDMock) SetupHandlers() {
if len(query) == 2 {
h := sha1.New()
h.Write([]byte(query[1]))
// Avoing long filenames for Windows.
// Avoiding long filenames for Windows.
f += "__" + base64.URLEncoding.EncodeToString(h.Sum(nil))[:10]
}
}

View file

@ -18,9 +18,9 @@ import (
"fmt"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()

View file

@ -18,9 +18,9 @@ import (
"fmt"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()
@ -349,7 +349,7 @@ filters:
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()

View file

@ -18,9 +18,9 @@ import (
"fmt"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -48,7 +48,7 @@ host: %s
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
ctx := context.Background()

View file

@ -17,12 +17,12 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
nomad "github.com/hashicorp/nomad/api"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@ -121,7 +121,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
m, ok := metrics.(*nomadMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -21,9 +21,9 @@ import (
"net/url"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery"
@ -160,7 +160,7 @@ func TestNomadSDRefresh(t *testing.T) {
defer metrics.Unregister()
defer refreshMetrics.Unregister()
d, err := NewDiscovery(&cfg, log.NewNopLogger(), metrics)
d, err := NewDiscovery(&cfg, promslog.NewNopLogger(), metrics)
require.NoError(t, err)
tgs, err := d.refresh(context.Background())

View file

@ -16,10 +16,10 @@ package openstack
import (
"context"
"fmt"
"log/slog"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack"
"github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors"
@ -43,14 +43,14 @@ type HypervisorDiscovery struct {
provider *gophercloud.ProviderClient
authOpts *gophercloud.AuthOptions
region string
logger log.Logger
logger *slog.Logger
port int
availability gophercloud.Availability
}
// newHypervisorDiscovery returns a new hypervisor discovery.
func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
port int, region string, availability gophercloud.Availability, l log.Logger,
port int, region string, availability gophercloud.Availability, l *slog.Logger,
) *HypervisorDiscovery {
return &HypervisorDiscovery{
provider: provider, authOpts: opts,

View file

@ -93,6 +93,5 @@ func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := hypervisor.refresh(ctx)
require.Error(t, err)
require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
}

View file

@ -16,17 +16,17 @@ package openstack
import (
"context"
"fmt"
"log/slog"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack"
"github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips"
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
"github.com/gophercloud/gophercloud/pagination"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
@ -52,7 +52,7 @@ type InstanceDiscovery struct {
provider *gophercloud.ProviderClient
authOpts *gophercloud.AuthOptions
region string
logger log.Logger
logger *slog.Logger
port int
allTenants bool
availability gophercloud.Availability
@ -60,10 +60,10 @@ type InstanceDiscovery struct {
// NewInstanceDiscovery returns a new instance discovery.
func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger,
port int, region string, allTenants bool, availability gophercloud.Availability, l *slog.Logger,
) *InstanceDiscovery {
if l == nil {
l = log.NewNopLogger()
l = promslog.NewNopLogger()
}
return &InstanceDiscovery{
provider: provider, authOpts: opts,
@ -134,7 +134,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
for _, s := range instanceList {
if len(s.Addresses) == 0 {
level.Info(i.logger).Log("msg", "Got no IP address", "instance", s.ID)
i.logger.Info("Got no IP address", "instance", s.ID)
continue
}
@ -151,7 +151,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
if !nameOk {
flavorID, idOk := s.Flavor["id"].(string)
if !idOk {
level.Warn(i.logger).Log("msg", "Invalid type for both flavor original_name and flavor id, expected string")
i.logger.Warn("Invalid type for both flavor original_name and flavor id, expected string")
continue
}
labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID)
@ -171,22 +171,22 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
for pool, address := range s.Addresses {
md, ok := address.([]interface{})
if !ok {
level.Warn(i.logger).Log("msg", "Invalid type for address, expected array")
i.logger.Warn("Invalid type for address, expected array")
continue
}
if len(md) == 0 {
level.Debug(i.logger).Log("msg", "Got no IP address", "instance", s.ID)
i.logger.Debug("Got no IP address", "instance", s.ID)
continue
}
for _, address := range md {
md1, ok := address.(map[string]interface{})
if !ok {
level.Warn(i.logger).Log("msg", "Invalid type for address, expected dict")
i.logger.Warn("Invalid type for address, expected dict")
continue
}
addr, ok := md1["addr"].(string)
if !ok {
level.Warn(i.logger).Log("msg", "Invalid type for address, expected string")
i.logger.Warn("Invalid type for address, expected string")
continue
}
if _, ok := floatingIPPresent[addr]; ok {

View file

@ -134,6 +134,5 @@ func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := hypervisor.refresh(ctx)
require.Error(t, err)
require.Contains(t, err.Error(), context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
require.ErrorContains(t, err, context.Canceled.Error(), "%q doesn't contain %q", err, context.Canceled)
}

View file

@ -17,10 +17,10 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"time"
"github.com/go-kit/log"
"github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack"
"github.com/mwitkow/go-conntrack"
@ -142,7 +142,7 @@ type refresher interface {
}
// NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, l *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*openstackMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")
@ -163,7 +163,7 @@ func NewDiscovery(conf *SDConfig, l log.Logger, metrics discovery.DiscovererMetr
), nil
}
func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
func newRefresher(conf *SDConfig, l *slog.Logger) (refresher, error) {
var opts gophercloud.AuthOptions
if conf.IdentityEndpoint == "" {
var err error

View file

@ -16,13 +16,12 @@ package ovhcloud
import (
"context"
"fmt"
"log/slog"
"net/netip"
"net/url"
"path"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/common/model"
@ -55,10 +54,10 @@ type dedicatedServer struct {
type dedicatedServerDiscovery struct {
*refresh.Discovery
config *SDConfig
logger log.Logger
logger *slog.Logger
}
func newDedicatedServerDiscovery(conf *SDConfig, logger log.Logger) *dedicatedServerDiscovery {
func newDedicatedServerDiscovery(conf *SDConfig, logger *slog.Logger) *dedicatedServerDiscovery {
return &dedicatedServerDiscovery{config: conf, logger: logger}
}
@ -115,10 +114,7 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou
for _, dedicatedServerName := range dedicatedServerList {
dedicatedServer, err := getDedicatedServerDetails(client, dedicatedServerName)
if err != nil {
err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error())
if err != nil {
return nil, err
}
d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), dedicatedServerName), "err", err.Error())
continue
}
dedicatedServerDetailedList = append(dedicatedServerDetailedList, *dedicatedServer)

View file

@ -21,8 +21,8 @@ import (
"os"
"testing"
"github.com/go-kit/log"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
)
@ -41,7 +41,7 @@ application_secret: %s
consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecretTest, ovhcloudConsumerKeyTest)
require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg))
d, err := newRefresher(&cfg, log.NewNopLogger())
d, err := newRefresher(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
ctx := context.Background()
targetGroups, err := d.refresh(ctx)

View file

@ -17,10 +17,10 @@ import (
"context"
"errors"
"fmt"
"log/slog"
"net/netip"
"time"
"github.com/go-kit/log"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
@ -137,7 +137,7 @@ func parseIPList(ipList []string) ([]netip.Addr, error) {
return ipAddresses, nil
}
func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) {
func newRefresher(conf *SDConfig, logger *slog.Logger) (refresher, error) {
switch conf.Service {
case "vps":
return newVpsDiscovery(conf, logger), nil
@ -148,7 +148,7 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) {
}
// NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger *slog.Logger, metrics discovery.DiscovererMetrics) (*refresh.Discovery, error) {
m, ok := metrics.(*ovhcloudMetrics)
if !ok {
return nil, fmt.Errorf("invalid discovery metrics type")

View file

@ -20,11 +20,11 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/util/testutil"
)
var (
@ -121,7 +121,7 @@ func TestParseIPs(t *testing.T) {
func TestDiscoverer(t *testing.T) {
conf, _ := getMockConf("vps")
logger := testutil.NewLogger(t)
logger := promslog.NewNopLogger()
reg := prometheus.NewRegistry()
refreshMetrics := discovery.NewRefreshMetrics(reg)

View file

@ -16,13 +16,12 @@ package ovhcloud
import (
"context"
"fmt"
"log/slog"
"net/netip"
"net/url"
"path"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/common/model"
@ -68,10 +67,10 @@ type virtualPrivateServer struct {
type vpsDiscovery struct {
*refresh.Discovery
config *SDConfig
logger log.Logger
logger *slog.Logger
}
func newVpsDiscovery(conf *SDConfig, logger log.Logger) *vpsDiscovery {
func newVpsDiscovery(conf *SDConfig, logger *slog.Logger) *vpsDiscovery {
return &vpsDiscovery{config: conf, logger: logger}
}
@ -133,10 +132,7 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) {
for _, vpsName := range vpsList {
vpsDetailed, err := getVpsDetails(client, vpsName)
if err != nil {
err := level.Warn(d.logger).Log("msg", fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error())
if err != nil {
return nil, err
}
d.logger.Warn(fmt.Sprintf("%s: Could not get details of %s", d.getSource(), vpsName), "err", err.Error())
continue
}
vpsDetailedList = append(vpsDetailedList, *vpsDetailed)

View file

@ -23,8 +23,8 @@ import (
yaml "gopkg.in/yaml.v2"
"github.com/go-kit/log"
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
)
@ -43,7 +43,7 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr
require.NoError(t, yaml.UnmarshalStrict([]byte(cfgString), &cfg))
d, err := newRefresher(&cfg, log.NewNopLogger())
d, err := newRefresher(&cfg, promslog.NewNopLogger())
require.NoError(t, err)
ctx := context.Background()
targetGroups, err := d.refresh(ctx)

Some files were not shown because too many files have changed in this diff Show more