Merge branch 'main' into remote-write-1.1

This commit is contained in:
Callum Styan 2023-12-15 10:32:54 -08:00
commit b36ea85700
337 changed files with 11800 additions and 6402 deletions

2
.github/CODEOWNERS vendored
View file

@ -6,3 +6,5 @@
/tsdb @jesusvazquez
/promql @roidelapluie
/cmd/promtool @dgl
/documentation/prometheus-mixin @metalmatze

View file

@ -1,4 +1,8 @@
<!--
Please give your PR a title in the form "area: short description". For example "tsdb: reduce disk usage by 95%"
If your PR is to fix an issue, put "Fixes #issue-number" in the description.
Don't forget!
- Please sign CNCF's Developer Certificate of Origin and sign-off your commits by adding the -s / --signoff flag to `git commit`. See https://github.com/apps/dco for more information.
@ -7,8 +11,6 @@
- Where possible use only exported APIs for tests to simplify the review and make it as close as possible to an actual library usage.
- No tests are needed for internal implementation changes.
- Performance improvements would need a benchmark test to prove it.
- All exposed objects should have a comment.

View file

@ -12,8 +12,8 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: bufbuild/buf-setup-action@eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b # v1.26.1
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: bufbuild/buf-setup-action@382440cdb8ec7bc25a68d7b4711163d95f7cc3aa # v1.28.1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@bd48f53224baaaf0fc55de9a913e7680ca6dbea4 # v1.0.3

View file

@ -12,8 +12,8 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: bufbuild/buf-setup-action@eb60cd0de4f14f1f57cf346916b8cd69a9e7ed0b # v1.26.1
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: bufbuild/buf-setup-action@382440cdb8ec7bc25a68d7b4711163d95f7cc3aa # v1.28.1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: bufbuild/buf-lint-action@bd48f53224baaaf0fc55de9a913e7680ca6dbea4 # v1.0.3

View file

@ -13,7 +13,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.21-base
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/setup_environment
- run: make GO_ONLY=1 SKIP_GOLANGCI_LINT=1
@ -35,7 +35,7 @@ jobs:
image: quay.io/prometheus/golang-builder:1.21-base
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/setup_environment
with:
@ -52,10 +52,10 @@ jobs:
name: Go tests on Windows
runs-on: windows-latest
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
go-version: '>=1.21 <1.22'
go-version: 1.21.x
- run: |
$TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
go test $TestTargets -vet=off -v
@ -68,7 +68,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.20-base
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: make build
- run: go test ./tsdb/...
- run: go test ./tsdb/ -test.tsdb-isolation=false
@ -81,7 +81,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder:1.20-base
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: go install ./cmd/promtool/.
- run: go install github.com/google/go-jsonnet/cmd/jsonnet@latest
- run: go install github.com/google/go-jsonnet/cmd/jsonnetfmt@latest
@ -104,7 +104,7 @@ jobs:
matrix:
thread: [ 0, 1, 2 ]
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/build
with:
@ -127,7 +127,7 @@ jobs:
# Whenever the Go version is updated here, .promu.yml
# should also be updated.
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/build
with:
@ -138,12 +138,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- name: Install Go
uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
cache: false
go-version: 1.20.x
go-version: 1.21.x
- name: Install snmp_exporter/generator dependencies
run: sudo apt-get update && sudo apt-get -y install libsnmp-dev
if: github.repository == 'prometheus/snmp_exporter'
@ -151,7 +151,8 @@ jobs:
uses: golangci/golangci-lint-action@3a919529898de77ec3da873e3063ca4b10e7f5cc # v3.7.0
with:
args: --verbose
version: v1.54.2
# Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
version: v1.55.2
fuzzing:
uses: ./.github/workflows/fuzzing.yml
if: github.event_name == 'pull_request'
@ -164,7 +165,7 @@ jobs:
needs: [test_ui, test_go, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && github.event.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/publish_main
with:
@ -178,7 +179,7 @@ jobs:
needs: [test_ui, test_go, test_windows, golangci, codeql, build_all]
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v2.')
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- uses: ./.github/promci/actions/publish_release
with:
@ -193,7 +194,7 @@ jobs:
needs: [test_ui, codeql]
steps:
- name: Checkout
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: prometheus/promci@3cb0c3871f223bd5ce1226995bd52ffb314798b6 # v0.1.0
- name: Install nodejs
uses: actions/setup-node@5e21ff4d9bc1a8cf6de233a3057d20ec6b3fb69d # v3.8.1

View file

@ -24,18 +24,18 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
with:
go-version: '>=1.21 <1.22'
go-version: 1.21.x
- name: Initialize CodeQL
uses: github/codeql-action/init@00e563ead9f72a8461b24876bee2d0c2e8bd2ee8 # v2.21.5
uses: github/codeql-action/init@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@00e563ead9f72a8461b24876bee2d0c2e8bd2ee8 # v2.21.5
uses: github/codeql-action/autobuild@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@00e563ead9f72a8461b24876bee2d0c2e8bd2ee8 # v2.21.5
uses: github/codeql-action/analyze@407ffafae6a767df3e0230c3df91b6443ae8df75 # v2.22.8

View file

@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- uses: dessant/lock-threads@be8aa5be94131386884a6da4189effda9b14aa21 # v4.0.1
- uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1
with:
process-only: 'issues'
issue-inactive-days: '180'

View file

@ -13,7 +13,7 @@ jobs:
container:
image: quay.io/prometheus/golang-builder
steps:
- uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
- run: ./scripts/sync_repo_files.sh
env:
GITHUB_TOKEN: ${{ secrets.PROMBOT_GITHUB_TOKEN }}

View file

@ -21,12 +21,12 @@ jobs:
steps:
- name: "Checkout code"
uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # tag=v4.1.1
with:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0
uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # tag=v2.3.1
with:
results_file: results.sarif
results_format: sarif
@ -45,6 +45,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@5f532563584d71fdef14ee64d17bafb34f751ce5 # tag=v1.0.26
uses: github/codeql-action/upload-sarif@407ffafae6a767df3e0230c3df91b6443ae8df75 # tag=v2.22.8
with:
sarif_file: results.sarif

View file

@ -6,6 +6,7 @@ run:
skip-dirs:
# Copied it from a different source
- storage/remote/otlptranslator/prometheusremotewrite
- storage/remote/otlptranslator/prometheus
output:
sort-results: true
@ -15,11 +16,14 @@ linters:
- depguard
- errorlint
- gocritic
- godot
- gofumpt
- goimports
- misspell
- nolintlint
- predeclared
- revive
- testifylint
- unconvert
- unused
@ -32,24 +36,12 @@ issues:
- path: _test.go
linters:
- errcheck
- path: discovery/
linters:
- errorlint
- path: scrape/
linters:
- errorlint
- path: storage/
linters:
- errorlint
- path: tsdb/
linters:
- errorlint
- path: util/
linters:
- errorlint
- path: web/
- path: "tsdb/head_wal.go"
linters:
- errorlint
- linters:
- godot
source: "^// ==="
linters-settings:
depguard:
@ -66,6 +58,8 @@ linters-settings:
desc: "Use corresponding 'os' or 'io' functions instead."
- pkg: "regexp"
desc: "Use github.com/grafana/regexp instead of regexp"
- pkg: "github.com/pkg/errors"
desc: "Use 'errors' or 'fmt' instead of github.com/pkg/errors"
errcheck:
exclude-functions:
# Don't flag lines such as "io.Copy(io.Discard, resp.Body)".
@ -84,8 +78,55 @@ linters-settings:
gofumpt:
extra-rules: true
revive:
# By default, revive will enable only the linting rules that are named in the configuration file.
# So, it's needed to explicitly set in configuration all required rules.
# The following configuration enables all the rules from the defaults.toml
# https://github.com/mgechev/revive/blob/master/defaults.toml
rules:
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#unused-parameter
- name: unused-parameter
severity: warning
# https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
- name: blank-imports
- name: context-as-argument
arguments:
# allow functions with test or bench signatures
- allowTypesBefore: "*testing.T,testing.TB"
- name: context-keys-type
- name: dot-imports
# A lot of false positives: incorrectly identifies channel draining as "empty code block".
# See https://github.com/mgechev/revive/issues/386
- name: empty-block
disabled: true
- name: error-naming
- name: error-return
- name: error-strings
- name: errorf
- name: exported
- name: increment-decrement
- name: indent-error-flow
- name: package-comments
- name: range
- name: receiver-naming
- name: redefines-builtin-id
- name: superfluous-else
- name: time-naming
- name: unexported-return
- name: unreachable-code
- name: unused-parameter
disabled: true
- name: var-declaration
- name: var-naming
testifylint:
disable:
- float-compare
- go-require
enable:
- bool-compare
- compares
- empty
- error-is-as
- error-nil
- expected-actual
- len
- require-error
- suite-dont-use-pkg
- suite-extra-assert-call

View file

@ -1,5 +1,44 @@
# Changelog
## unreleased
* [ENHANCEMENT] TSDB: Make the wlog watcher read segments synchronously when not tailing. #13224
* [BUGFIX] Agent: Participate in notify calls. #13223
## 2.48.0 / 2023-11-16
* [CHANGE] Remote-write: respect Retry-After header on 5xx errors. #12677
* [FEATURE] Alerting: Add AWS SigV4 authentication support for Alertmanager endpoints. #12774
* [FEATURE] Promtool: Add support for histograms in the TSDB dump command. #12775
* [FEATURE] PromQL: Add warnings (and annotations) to PromQL query results. #12152 #12982 #12988 #13012
* [FEATURE] Remote-write: Add Azure AD OAuth authentication support for remote write requests. #12572
* [ENHANCEMENT] Remote-write: Add a header to count retried remote write requests. #12729
* [ENHANCEMENT] TSDB: Improve query performance by re-using iterator when moving between series. #12757
* [ENHANCEMENT] UI: Move /targets page discovered labels to expandable section #12824
* [ENHANCEMENT] TSDB: Optimize WBL loading by not sending empty buffers over channel. #12808
* [ENHANCEMENT] TSDB: Reply WBL mmap markers concurrently. #12801
* [ENHANCEMENT] Promtool: Add support for specifying series matchers in the TSDB analyze command. #12842
* [ENHANCEMENT] PromQL: Prevent Prometheus from overallocating memory on subquery with large amount of steps. #12734
* [ENHANCEMENT] PromQL: Add warning when monotonicity is forced in the input to histogram_quantile. #12931
* [ENHANCEMENT] Scraping: Optimize sample appending by reducing garbage. #12939
* [ENHANCEMENT] Storage: Reduce memory allocations in queries that merge series sets. #12938
* [ENHANCEMENT] UI: Show group interval in rules display. #12943
* [ENHANCEMENT] Scraping: Save memory when scraping by delaying creation of buffer. #12953
* [ENHANCEMENT] Agent: Allow ingestion of out-of-order samples. #12897
* [ENHANCEMENT] Promtool: Improve support for native histograms in TSDB analyze command. #12869
* [ENHANCEMENT] Scraping: Add configuration option for tracking staleness of scraped timestamps. #13060
* [BUGFIX] SD: Ensure that discovery managers are properly canceled. #10569
* [BUGFIX] TSDB: Fix PostingsForMatchers race with creating new series. #12558
* [BUGFIX] TSDB: Fix handling of explicit counter reset header in histograms. #12772
* [BUGFIX] SD: Validate HTTP client configuration in HTTP, EC2, Azure, Uyuni, PuppetDB, and Lightsail SDs. #12762 #12811 #12812 #12815 #12814 #12816
* [BUGFIX] TSDB: Fix counter reset edgecases causing native histogram panics. #12838
* [BUGFIX] TSDB: Fix duplicate sample detection at chunk size limit. #12874
* [BUGFIX] Promtool: Fix errors not being reported in check rules command. #12715
* [BUGFIX] TSDB: Avoid panics reported in logs when head initialization takes a long time. #12876
* [BUGFIX] TSDB: Ensure that WBL is repaired when possible. #12406
* [BUGFIX] Storage: Fix crash caused by incorrect mixed samples handling. #13055
* [BUGFIX] TSDB: Fix compactor failures by adding min time to histogram chunks. #13062
## 2.47.1 / 2023-10-04
* [BUGFIX] Fix duplicate sample detection at chunk size limit #12874

View file

@ -7,7 +7,7 @@ Julien Pivotto (<roidelapluie@prometheus.io> / @roidelapluie) and Levi Harrison
* `discovery`
* `k8s`: Frederic Branczyk (<fbranczyk@gmail.com> / @brancz)
* `documentation`
* `prometheus-mixin`: Björn Rabenstein (<beorn@grafana.com> / @beorn7)
* `prometheus-mixin`: Matthias Loibl (<mail@matthiasloibl.com> / @metalmatze)
* `storage`
* `remote`: Chris Marchbanks (<csmarchbanks@gmail.com> / @csmarchbanks), Callum Styan (<callumstyan@gmail.com> / @cstyan), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Tom Wilkie (<tom.wilkie@gmail.com> / @tomwilkie)
* `tsdb`: Ganesh Vernekar (<ganesh@grafana.com> / @codesome), Bartłomiej Płotka (<bwplotka@gmail.com> / @bwplotka), Jesús Vázquez (<jesus.vazquez@grafana.com> / @jesusvazquez)

View file

@ -93,9 +93,9 @@ endif
# If we only want to only test go code we have to change the test target
# which is called by all.
ifeq ($(GO_ONLY),1)
test: common-test
test: common-test check-go-mod-version
else
test: common-test ui-build-module ui-test ui-lint
test: common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
.PHONY: npm_licenses
@ -138,3 +138,17 @@ bench_tsdb: $(PROMU)
cli-documentation:
$(GO) run ./cmd/prometheus/ --write-documentation > docs/command-line/prometheus.md
$(GO) run ./cmd/promtool/ write-documentation > docs/command-line/promtool.md
.PHONY: check-go-mod-version
check-go-mod-version:
@echo ">> checking go.mod version matching"
@./scripts/check-go-mod-version.sh
.PHONY: update-all-go-deps
update-all-go-deps:
@$(MAKE) update-go-deps
@echo ">> updating Go dependencies in ./documentation/examples/remote_storage/"
@cd ./documentation/examples/remote_storage/ && for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \
$(GO) get -d $$m; \
done
@cd ./documentation/examples/remote_storage/ && $(GO) mod tidy

View file

@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v1.54.2
GOLANGCI_LINT_VERSION ?= v1.55.2
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))

View file

@ -53,7 +53,8 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.46 | 2023-07-12 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) |
| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) |
| v2.49 | 2023-11-15 | **searching for volunteer** |
| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) |
| v2.50 | 2024-01-16 | **searching for volunteer** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@ -108,14 +109,16 @@ This is also a good time to consider any experimental features and feature
flags for promotion to stable or for deprecation or ultimately removal. Do any
of these in pull requests, one per feature.
> NOTE: As a validation step check if all security alerts are closed after this step: https://github.com/prometheus/prometheus/security/dependabot. Sometimes it's ok
> if not critical and e.g. fix is not released yet (or it does not relate to
> upgrading) or when we are unaffected.
#### Manually updating Go dependencies
This is usually only needed for `+incompatible` and `v0.0.0` non-semver updates.
```bash
make update-go-deps
git add go.mod go.sum
git commit -m "Update dependencies"
make update-all-go-deps
```
#### Manually updating React dependencies

View file

@ -1 +1 @@
2.47.1
2.48.0

View file

@ -12,7 +12,6 @@
// limitations under the License.
// The main package for the Prometheus server executable.
// nolint:revive // Many unsued function arguments in this file by design.
package main
import (
@ -64,6 +63,7 @@ import (
"github.com/prometheus/prometheus/notifier"
_ "github.com/prometheus/prometheus/plugins" // Register plugins.
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/storage"
@ -200,12 +200,21 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "no-default-scrape-port":
c.scrape.NoDefaultPort = true
level.Info(logger).Log("msg", "No default port will be appended to scrape targets' addresses.")
case "promql-experimental-functions":
parser.EnableExperimentalFunctions = true
level.Info(logger).Log("msg", "Experimental PromQL functions enabled.")
case "native-histograms":
c.tsdb.EnableNativeHistograms = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "created-timestamp-zero-ingestion":
c.scrape.EnableCreatedTimestampZeroIngestion = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
@ -420,7 +429,7 @@ func main() {
a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, promql-experimental-functions, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
promlogflag.AddFlags(a, &cfg.promlogConfig)
@ -611,14 +620,52 @@ func main() {
discoveryManagerNotify discoveryManager
)
// Kubernetes client metrics are used by Kubernetes SD.
// They are registered here in the main function, because SD mechanisms
// can only register metrics specific to a SD instance.
// Kubernetes client metrics are the same for the whole process -
// they are not specific to an SD instance.
err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer)
if err != nil {
level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err)
os.Exit(1)
}
if cfg.enableNewSDManager {
discovery.RegisterMetrics()
discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape"))
discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify"))
{
discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discovery.Name("scrape"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerScrape = discMgr
}
{
discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discovery.Name("notify"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
discoveryManagerNotify = discMgr
}
} else {
legacymanager.RegisterMetrics()
discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), legacymanager.Name("scrape"))
discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify"))
{
discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, legacymanager.Name("scrape"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
os.Exit(1)
}
discoveryManagerScrape = discMgr
}
{
discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, legacymanager.Name("notify"))
if discMgr == nil {
level.Error(logger).Log("msg", "failed to create a discovery manager notify")
os.Exit(1)
}
discoveryManagerNotify = discMgr
}
}
scrapeManager, err := scrape.NewManager(
@ -1124,6 +1171,7 @@ func main() {
)
localStorage.Set(db, 0)
db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
return nil
@ -1284,7 +1332,7 @@ func startsOrEndsWithQuote(s string) bool {
strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'")
}
// compileCORSRegexString compiles given string and adds anchors
// compileCORSRegexString compiles given string and adds anchors.
func compileCORSRegexString(s string) (*regexp.Regexp, error) {
r, err := relabel.NewRegexp(s)
if err != nil {
@ -1445,6 +1493,10 @@ func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels,
return 0, tsdb.ErrNotReady
}
func (n notReadyAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) {
return 0, tsdb.ErrNotReady
}
func (n notReadyAppender) Commit() error { return tsdb.ErrNotReady }
func (n notReadyAppender) Rollback() error { return tsdb.ErrNotReady }
@ -1583,7 +1635,6 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
MaxBytes: int64(opts.MaxBytes),
NoLockfile: opts.NoLockfile,
AllowOverlappingCompaction: true,
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,
SamplesPerChunk: opts.SamplesPerChunk,

View file

@ -346,7 +346,7 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
continue
}
require.Equal(t, 1, len(g.GetMetric()))
require.Len(t, g.GetMetric(), 1)
if _, ok := res[m]; ok {
t.Error("expected only one metric family for", m)
t.FailNow()

View file

@ -284,7 +284,7 @@ func (p *queryLogTest) run(t *testing.T) {
if !p.enabledAtStart {
p.query(t)
require.Equal(t, 0, len(readQueryLog(t, queryLogFile.Name())))
require.Empty(t, readQueryLog(t, queryLogFile.Name()))
p.setQueryLog(t, queryLogFile.Name())
p.reloadConfig(t)
}
@ -309,7 +309,7 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
ql = readQueryLog(t, queryLogFile.Name())
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
qc = len(ql)
p.setQueryLog(t, queryLogFile.Name())
@ -320,7 +320,7 @@ func (p *queryLogTest) run(t *testing.T) {
ql = readQueryLog(t, queryLogFile.Name())
if p.exactQueryCount() {
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
}
@ -340,7 +340,7 @@ func (p *queryLogTest) run(t *testing.T) {
require.NoError(t, os.Rename(queryLogFile.Name(), newFile.Name()))
ql = readQueryLog(t, newFile.Name())
if p.exactQueryCount() {
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
}
p.validateLastQuery(t, ql)
qc = len(ql)
@ -351,7 +351,7 @@ func (p *queryLogTest) run(t *testing.T) {
ql = readQueryLog(t, newFile.Name())
if p.exactQueryCount() {
require.Equal(t, qc, len(ql))
require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
}

View file

@ -44,7 +44,7 @@ func sortSamples(samples []backfillSample) {
})
}
func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample { // nolint:revive
func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample {
ss := q.Select(context.Background(), false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*"))
samples := []backfillSample{}
for ss.Next() {
@ -61,7 +61,7 @@ func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMa
func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, expectedBlockDuration int64, expectedSamples []backfillSample, expectedNumBlocks int) {
blocks := db.Blocks()
require.Equal(t, expectedNumBlocks, len(blocks), "did not create correct number of blocks")
require.Len(t, blocks, expectedNumBlocks, "did not create correct number of blocks")
for i, block := range blocks {
require.Equal(t, block.MinTime()/expectedBlockDuration, (block.MaxTime()-1)/expectedBlockDuration, "block %d contains data outside of one aligned block duration", i)

View file

@ -36,6 +36,7 @@ import (
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
config_util "github.com/prometheus/common/config"
@ -198,6 +199,7 @@ func main() {
testCmd := app.Command("test", "Unit testing.")
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
testRulesRun := testRulesCmd.Flag("run", "If set, will only run test groups whose names match the regular expression. Can be specified multiple times.").Strings()
testRulesFiles := testRulesCmd.Arg(
"test-rule-file",
"The unit test file.",
@ -316,7 +318,7 @@ func main() {
switch parsedCmd {
case sdCheckCmd.FullCommand():
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort))
os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
@ -366,6 +368,7 @@ func main() {
EnableAtModifier: true,
EnableNegativeOffset: true,
},
*testRulesRun,
*testRulesFiles...),
)
@ -411,8 +414,7 @@ func checkExperimental(f bool) {
}
}
// nolint:revive
var lintError = fmt.Errorf("lint error")
var errLint = fmt.Errorf("lint error")
type lintConfig struct {
all bool
@ -764,7 +766,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
hasErrors = hasErrors || !errors.Is(e, lintError)
hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
return failed, hasErrors
@ -777,7 +779,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, lintError)
hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
@ -798,7 +800,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
hasErrors = hasErrors || !errors.Is(e, lintError)
hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
continue
@ -811,7 +813,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
}
failed = true
for _, err := range errs {
hasErrors = hasErrors || !errors.Is(err, lintError)
hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
@ -838,7 +840,7 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []e
})
}
errMessage += "Might cause inconsistency while recording expressions"
return 0, []error{fmt.Errorf("%w %s", lintError, errMessage)}
return 0, []error{fmt.Errorf("%w %s", errLint, errMessage)}
}
}

View file

@ -56,7 +56,7 @@ func TestQueryRange(t *testing.T) {
defer s.Close()
urlObject, err := url.Parse(s.URL)
require.Equal(t, nil, err)
require.NoError(t, err)
p := &promqlPrinter{}
exitCode := QueryRange(urlObject, http.DefaultTransport, map[string]string{}, "up", "0", "300", 0, p)
@ -79,7 +79,7 @@ func TestQueryInstant(t *testing.T) {
defer s.Close()
urlObject, err := url.Parse(s.URL)
require.Equal(t, nil, err)
require.NoError(t, err)
p := &promqlPrinter{}
exitCode := QueryInstant(urlObject, http.DefaultTransport, "up", "300", p)

View file

@ -35,7 +35,7 @@ type mockQueryRangeAPI struct {
samples model.Matrix
}
func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, query string, r v1.Range, opts ...v1.Option) (model.Value, v1.Warnings, error) { // nolint:revive
func (mockAPI mockQueryRangeAPI) QueryRange(_ context.Context, query string, r v1.Range, opts ...v1.Option) (model.Value, v1.Warnings, error) {
return mockAPI.samples, v1.Warnings{}, nil
}
@ -91,13 +91,13 @@ func TestBackfillRuleIntegration(t *testing.T) {
for _, err := range errs {
require.NoError(t, err)
}
require.Equal(t, 3, len(ruleImporter.groups))
require.Len(t, ruleImporter.groups, 3)
group1 := ruleImporter.groups[path1+";group0"]
require.NotNil(t, group1)
const defaultInterval = 60
require.Equal(t, defaultInterval*time.Second, group1.Interval())
gRules := group1.Rules()
require.Equal(t, 1, len(gRules))
require.Len(t, gRules, 1)
require.Equal(t, "rule1", gRules[0].Name())
require.Equal(t, "ruleExpr", gRules[0].Query().String())
require.Equal(t, 1, gRules[0].Labels().Len())
@ -106,7 +106,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
require.NotNil(t, group2)
require.Equal(t, defaultInterval*time.Second, group2.Interval())
g2Rules := group2.Rules()
require.Equal(t, 2, len(g2Rules))
require.Len(t, g2Rules, 2)
require.Equal(t, "grp2_rule1", g2Rules[0].Name())
require.Equal(t, "grp2_rule1_expr", g2Rules[0].Query().String())
require.Equal(t, 0, g2Rules[0].Labels().Len())
@ -122,7 +122,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
require.NoError(t, err)
blocks := db.Blocks()
require.Equal(t, (i+1)*tt.expectedBlockCount, len(blocks))
require.Len(t, blocks, (i+1)*tt.expectedBlockCount)
q, err := db.Querier(math.MinInt64, math.MaxInt64)
require.NoError(t, err)

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@ -37,7 +38,7 @@ type sdCheckResult struct {
}
// CheckSD performs service discovery for the given job name and reports the results.
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool) int {
func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int {
logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
@ -77,7 +78,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefault
defer cancel()
for _, cfg := range scrapeConfig.ServiceDiscoveryConfigs {
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger})
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger, Registerer: registerer})
if err != nil {
fmt.Fprintln(os.Stderr, "Could not create new discoverer", err)
return failureExitCode

View file

@ -35,7 +35,7 @@ func TestSDCheckResult(t *testing.T) {
}}
reg, err := relabel.NewRegexp("(.*)")
require.Nil(t, err)
require.NoError(t, err)
scrapeConfig := &config.ScrapeConfig{
ScrapeInterval: model.Duration(1 * time.Minute),

30
cmd/promtool/testdata/rules_run.yml vendored Normal file
View file

@ -0,0 +1,30 @@
rule_files:
- rules.yml
evaluation_interval: 1m
# Minimal test cases to check focus on a rule group.
tests:
- name: correct test
input_series:
- series: test
values: 1
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test
- name: wrong test
input_series:
- series: test
values: 0
promql_expr_test:
- expr: test
eval_time: 0
exp_samples:
- value: 1
labels: test

View file

@ -16,6 +16,7 @@ package main
import (
"bufio"
"context"
"errors"
"fmt"
"io"
"os"
@ -643,10 +644,15 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
for _, chk := range chks {
// Load the actual data of the chunk.
chk, err := chunkr.Chunk(chk)
chk, iterable, err := chunkr.ChunkOrIterable(chk)
if err != nil {
return err
}
// Chunks within blocks should not need to be re-written, so an
// iterable is not expected to be returned from the chunk reader.
if iterable != nil {
return errors.New("ChunkOrIterable should not return an iterable when reading a block")
}
switch chk.Encoding() {
case chunkenc.EncXOR:
floatChunkSamplesCount = append(floatChunkSamplesCount, chk.NumSamples())

View file

@ -26,6 +26,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
@ -39,11 +40,16 @@ import (
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files ...string) int {
failed := false
var run *regexp.Regexp
if runStrings != nil {
run = regexp.MustCompile(strings.Join(runStrings, "|"))
}
for _, f := range files {
if errs := ruleUnitTest(f, queryOpts); errs != nil {
if errs := ruleUnitTest(f, queryOpts, run); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@ -61,7 +67,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp) []error {
fmt.Println("Unit Testing: ", filename)
b, err := os.ReadFile(filename)
@ -96,6 +102,10 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
// Testing.
var errs []error
for _, t := range unitTestInp.Tests {
if !matchesRun(t.TestGroupName, run) {
continue
}
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
@ -111,6 +121,14 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
return nil
}
func matchesRun(name string, run *regexp.Regexp) bool {
if run == nil {
return true
}
return run.MatchString(name)
}
// unitTestFile holds the contents of a single unit test file.
type unitTestFile struct {
RuleFiles []string `yaml:"rule_files"`

View file

@ -125,7 +125,60 @@ func TestRulesUnitTest(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, tt.args.files...); got != tt.want {
if got := RulesUnitTest(tt.queryOpts, nil, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
}
}
func TestRulesUnitTestRun(t *testing.T) {
type args struct {
run []string
files []string
}
tests := []struct {
name string
args args
queryOpts promql.LazyLoaderOpts
want int
}{
{
name: "Test all without run arg",
args: args{
run: nil,
files: []string{"./testdata/rules_run.yml"},
},
want: 1,
},
{
name: "Test all with run arg",
args: args{
run: []string{"correct", "wrong"},
files: []string{"./testdata/rules_run.yml"},
},
want: 1,
},
{
name: "Test correct",
args: args{
run: []string{"correct"},
files: []string{"./testdata/rules_run.yml"},
},
want: 0,
},
{
name: "Test wrong",
args: args{
run: []string{"wrong"},
files: []string{"./testdata/rules_run.yml"},
},
want: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := RulesUnitTest(tt.queryOpts, tt.args.run, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})

View file

@ -158,6 +158,7 @@ var (
HonorLabels: false,
HonorTimestamps: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
EnableCompression: true,
}
// DefaultAlertmanagerConfig is the default alertmanager configuration.
@ -453,12 +454,19 @@ var (
OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0",
}
// DefaultScrapeProtocols is the set of scrape protocols that will be proposed
// to scrape target, ordered by priority.
DefaultScrapeProtocols = []ScrapeProtocol{
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText0_0_4,
}
DefaultNativeHistogramScrapeProtocols = []ScrapeProtocol{
// DefaultProtoFirstScrapeProtocols is like DefaultScrapeProtocols, but it
// favors protobuf Prometheus exposition format.
// Used by default for certain feature-flags like
// "native-histograms" and "created-timestamp-zero-ingestion".
DefaultProtoFirstScrapeProtocols = []ScrapeProtocol{
PrometheusProto,
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
@ -563,6 +571,8 @@ type ScrapeConfig struct {
HonorLabels bool `yaml:"honor_labels,omitempty"`
// Indicator whether the scraped timestamps should be respected.
HonorTimestamps bool `yaml:"honor_timestamps"`
// Indicator whether to track the staleness of the scraped timestamps.
TrackTimestampsStaleness bool `yaml:"track_timestamps_staleness"`
// A set of query parameters with which the target is scraped.
Params url.Values `yaml:"params,omitempty"`
// How frequently to scrape the targets of this scrape config.
@ -580,6 +590,8 @@ type ScrapeConfig struct {
MetricsPath string `yaml:"metrics_path,omitempty"`
// The URL scheme with which to fetch metrics from targets.
Scheme string `yaml:"scheme,omitempty"`
// Indicator whether to request compressed response from the target.
EnableCompression bool `yaml:"enable_compression"`
// An uncompressed response body larger than this many bytes will cause the
// scrape to fail. 0 means no limit.
BodySizeLimit units.Base2Bytes `yaml:"body_size_limit,omitempty"`

View file

@ -186,6 +186,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -288,6 +289,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(50 * time.Second),
ScrapeTimeout: model.Duration(5 * time.Second),
EnableCompression: true,
BodySizeLimit: 10 * units.MiB,
SampleLimit: 1000,
TargetLimit: 35,
@ -384,6 +386,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -438,6 +441,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: model.Duration(10 * time.Second),
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -470,6 +474,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -508,6 +513,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -546,6 +552,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -573,6 +580,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -609,6 +617,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -642,6 +651,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -682,6 +692,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -712,6 +723,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -745,6 +757,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -771,6 +784,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -800,6 +814,7 @@ var expectedConf = &Config{
HonorTimestamps: false,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -829,6 +844,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -858,6 +874,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -884,6 +901,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -918,6 +936,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -951,6 +970,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -980,6 +1000,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1009,6 +1030,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1042,6 +1064,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1078,6 +1101,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1133,6 +1157,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1159,6 +1184,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1196,6 +1222,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1239,6 +1266,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1273,6 +1301,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1301,6 +1330,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1332,6 +1362,7 @@ var expectedConf = &Config{
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EnableCompression: true,
BodySizeLimit: globBodySizeLimit,
SampleLimit: globSampleLimit,
TargetLimit: globTargetLimit,
@ -1426,8 +1457,8 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
require.Equal(t, true, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
require.Equal(t, false, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
require.True(t, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
require.False(t, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
}
func TestLoadConfig(t *testing.T) {
@ -1444,9 +1475,9 @@ func TestLoadConfig(t *testing.T) {
func TestScrapeIntervalLarger(t *testing.T) {
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, 1, len(c.ScrapeConfigs))
require.Len(t, c.ScrapeConfigs, 1)
for _, sc := range c.ScrapeConfigs {
require.Equal(t, true, sc.ScrapeInterval >= sc.ScrapeTimeout)
require.GreaterOrEqual(t, sc.ScrapeInterval, sc.ScrapeTimeout)
}
}
@ -1462,7 +1493,7 @@ func TestElideSecrets(t *testing.T) {
yamlConfig := string(config)
matches := secretRe.FindAllStringIndex(yamlConfig, -1)
require.Equal(t, 22, len(matches), "wrong number of secret matches found")
require.Len(t, matches, 22, "wrong number of secret matches found")
require.NotContains(t, yamlConfig, "mysecret",
"yaml marshal reveals authentication credentials.")
}
@ -2032,7 +2063,7 @@ func TestAgentMode(t *testing.T) {
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger())
require.NoError(t, err)
require.Len(t, c.RemoteWriteConfigs, 0)
require.Empty(t, c.RemoteWriteConfigs)
c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger())
require.NoError(t, err)
@ -2060,9 +2091,10 @@ func TestGetScrapeConfigs(t *testing.T) {
ScrapeTimeout: scrapeTimeout,
ScrapeProtocols: DefaultGlobalConfig.ScrapeProtocols,
MetricsPath: "/metrics",
Scheme: "http",
HTTPClientConfig: config.DefaultHTTPClientConfig,
MetricsPath: "/metrics",
Scheme: "http",
EnableCompression: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
ServiceDiscoveryConfigs: discovery.Configs{
discovery.StaticConfig{
{
@ -2118,6 +2150,8 @@ func TestGetScrapeConfigs(t *testing.T) {
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
EnableCompression: true,
HTTPClientConfig: config.HTTPClientConfig{
TLSConfig: config.TLSConfig{
CertFile: filepath.FromSlash("testdata/scrape_configs/valid_cert_file"),
@ -2158,6 +2192,8 @@ func TestGetScrapeConfigs(t *testing.T) {
MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme,
EnableCompression: true,
ServiceDiscoveryConfigs: discovery.Configs{
&vultr.SDConfig{
HTTPClientConfig: config.HTTPClientConfig{
@ -2210,3 +2246,16 @@ func kubernetesSDHostURL() config.URL {
tURL, _ := url.Parse("https://localhost:1234")
return config.URL{URL: tURL}
}
func TestScrapeConfigDisableCompression(t *testing.T) {
want, err := LoadFile("testdata/scrape_config_disable_compression.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
out, err := yaml.Marshal(want)
require.NoError(t, err)
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
require.False(t, got.ScrapeConfigs[0].EnableCompression)
}

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['localhost:8080']
enable_compression: false

View file

@ -234,6 +234,11 @@ type Config interface {
type DiscovererOptions struct {
Logger log.Logger
// A registerer for the Discoverer's metrics.
Registerer prometheus.Registerer
HTTPClientOptions []config.HTTPClientOption
}
```

View file

@ -30,6 +30,7 @@ import (
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -101,7 +102,7 @@ func (*EC2SDConfig) Name() string { return "ec2" }
// NewDiscoverer returns a Discoverer for the EC2 Config.
func (c *EC2SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewEC2Discovery(c, opts.Logger), nil
return NewEC2Discovery(c, opts.Logger, opts.Registerer), nil
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the EC2 Config.
@ -147,7 +148,7 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery {
func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, reg prometheus.Registerer) *EC2Discovery {
if logger == nil {
logger = log.NewNopLogger()
}
@ -156,10 +157,13 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery {
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
logger,
"ec2",
time.Duration(d.cfg.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "ec2",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d
}

View file

@ -29,6 +29,7 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lightsail"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -84,7 +85,7 @@ func (*LightsailSDConfig) Name() string { return "lightsail" }
// NewDiscoverer returns a Discoverer for the Lightsail Config.
func (c *LightsailSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewLightsailDiscovery(c, opts.Logger), nil
return NewLightsailDiscovery(c, opts.Logger, opts.Registerer), nil
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the Lightsail Config.
@ -121,7 +122,7 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *LightsailDiscovery {
func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, reg prometheus.Registerer) *LightsailDiscovery {
if logger == nil {
logger = log.NewNopLogger()
}
@ -129,10 +130,13 @@ func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *Lightsai
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
logger,
"lightsail",
time.Duration(d.cfg.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "lightsail",
Interval: time.Duration(d.cfg.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d
}

View file

@ -17,6 +17,7 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"net"
"net/http"
"strings"
@ -27,13 +28,17 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -74,12 +79,6 @@ var (
AuthenticationMethod: authMethodOAuth,
HTTPClientConfig: config_util.DefaultHTTPClientConfig,
}
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_azure_failures_total",
Help: "Number of Azure service discovery refresh failures.",
})
)
var environments = map[string]cloud.Configuration{
@ -96,7 +95,7 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) {
name = strings.ToUpper(name)
env, ok := environments[name]
if !ok {
return env, fmt.Errorf("There is no cloud configuration matching the name %q", name)
return env, fmt.Errorf("there is no cloud configuration matching the name %q", name)
}
return env, nil
@ -104,7 +103,6 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) {
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for Azure based service discovery.
@ -127,7 +125,7 @@ func (*SDConfig) Name() string { return "azure" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger), nil
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
func validateAuthParam(param, name string) error {
@ -145,7 +143,6 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err != nil {
return err
}
if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil {
return err
}
@ -171,28 +168,49 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type Discovery struct {
*refresh.Discovery
logger log.Logger
cfg *SDConfig
port int
logger log.Logger
cfg *SDConfig
port int
cache *cache.Cache[string, *armnetwork.Interface]
failuresCount prometheus.Counter
cacheHitCount prometheus.Counter
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
func NewDiscovery(cfg *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
l := cache.New(cache.AsLRU[string, *armnetwork.Interface](lru.WithCapacity(5000)))
d := &Discovery{
cfg: cfg,
port: cfg.Port,
logger: logger,
cache: l,
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_azure_failures_total",
Help: "Number of Azure service discovery refresh failures.",
}),
cacheHitCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_azure_cache_hit_total",
Help: "Number of cache hit during refresh.",
}),
}
d.Discovery = refresh.NewDiscovery(
logger,
"azure",
time.Duration(cfg.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "azure",
Interval: time.Duration(cfg.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount, d.cacheHitCount},
},
)
return d
return d, nil
}
// azureClient represents multiple Azure Resource Manager providers.
@ -281,7 +299,7 @@ func newCredential(cfg SDConfig, policyClientOptions policy.ClientOptions) (azco
return credential, nil
}
// virtualMachine represents an Azure virtual machine (which can also be created by a VMSS)
// virtualMachine represents an Azure virtual machine (which can also be created by a VMSS).
type virtualMachine struct {
ID string
Name string
@ -290,6 +308,7 @@ type virtualMachine struct {
Location string
OsType string
ScaleSet string
InstanceID string
Tags map[string]*string
NetworkInterfaces []string
Size string
@ -314,14 +333,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
client, err := createAzureClient(*d.cfg)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
client.logger = d.logger
machines, err := client.getVMs(ctx, d.cfg.ResourceGroup)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machines: %w", err)
}
@ -330,14 +349,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
// Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machine scale sets: %w", err)
}
for _, scaleSet := range scaleSets {
scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machine scale set vms: %w", err)
}
machines = append(machines, scaleSetVms...)
@ -385,15 +404,36 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
// Get the IP address information via separate call to the network provider.
for _, nicID := range vm.NetworkInterfaces {
networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID)
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
var networkInterface *armnetwork.Interface
if v, ok := d.getFromCache(nicID); ok {
networkInterface = v
d.cacheHitCount.Add(1)
} else {
if vm.ScaleSet == "" {
networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID)
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
} else {
ch <- target{labelSet: nil, err: err}
}
// Get out of this routine because we cannot continue without a network interface.
return
}
d.addToCache(nicID, networkInterface)
} else {
ch <- target{labelSet: nil, err: err}
networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID)
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
} else {
ch <- target{labelSet: nil, err: err}
}
// Get out of this routine because we cannot continue without a network interface.
return
}
d.addToCache(nicID, networkInterface)
}
// Get out of this routine because we cannot continue without a network interface.
return
}
if networkInterface.Properties == nil {
@ -440,7 +480,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
var tg targetgroup.Group
for tgt := range ch {
if tgt.err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("unable to complete Azure service discovery: %w", tgt.err)
}
if tgt.labelSet != nil {
@ -601,6 +641,7 @@ func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName st
Location: *(vm.Location),
OsType: osType,
ScaleSet: scaleSetName,
InstanceID: *(vm.InstanceID),
Tags: tags,
NetworkInterfaces: networkInterfaces,
Size: size,
@ -609,22 +650,58 @@ func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName st
var errorNotFound = errors.New("network interface does not exist")
// getNetworkInterfaceByID gets the network interface.
// getVMNetworkInterfaceByID gets the network interface.
// If a 404 is returned from the Azure API, `errorNotFound` is returned.
func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
func (client *azureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
r, err := newAzureResourceFromID(networkInterfaceID, client.logger)
if err != nil {
return nil, fmt.Errorf("could not parse network interface ID: %w", err)
}
resp, err := client.nic.Get(ctx, r.ResourceGroupName, r.Name, nil)
resp, err := client.nic.Get(ctx, r.ResourceGroupName, r.Name, &armnetwork.InterfacesClientGetOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")})
if err != nil {
var responseError *azcore.ResponseError
if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound {
return nil, errorNotFound
}
return nil, fmt.Errorf("Failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
}
return &resp.Interface, nil
}
// getVMScaleSetVMNetworkInterfaceByID gets the network interface.
// If a 404 is returned from the Azure API, `errorNotFound` is returned.
func (client *azureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) {
r, err := newAzureResourceFromID(networkInterfaceID, client.logger)
if err != nil {
return nil, fmt.Errorf("could not parse network interface ID: %w", err)
}
resp, err := client.nic.GetVirtualMachineScaleSetNetworkInterface(ctx, r.ResourceGroupName, scaleSetName, instanceID, r.Name, &armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")})
if err != nil {
var responseError *azcore.ResponseError
if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound {
return nil, errorNotFound
}
return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
}
return &resp.Interface, nil
}
// addToCache will add the network interface information for the specified nicID.
func (d *Discovery) addToCache(nicID string, netInt *armnetwork.Interface) {
random := rand.Int63n(int64(time.Duration(d.cfg.RefreshInterval * 3).Seconds()))
rs := time.Duration(random) * time.Second
exptime := time.Duration(d.cfg.RefreshInterval*10) + rs
d.cache.Set(nicID, netInt, cache.WithExpiration(exptime))
level.Debug(d.logger).Log("msg", "Adding nic", "nic", nicID, "time", exptime.Seconds())
}
// getFromCache will get the network Interface for the specified nicID
// If the cache is disabled nothing will happen.
func (d *Discovery) getFromCache(nicID string) (*armnetwork.Interface, bool) {
net, found := d.cache.Get(nicID)
return net, found
}

View file

@ -142,6 +142,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
instanceID := "123"
location := "westeurope"
computerName := "computer_name"
networkProfile := armcompute.NetworkProfile{
@ -166,6 +167,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
ID: &id,
Name: &name,
Type: &vmType,
InstanceID: &instanceID,
Location: &location,
Tags: nil,
Properties: properties,
@ -182,6 +184,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
Tags: map[string]*string{},
NetworkInterfaces: []string{},
ScaleSet: scaleSet,
InstanceID: instanceID,
Size: size,
}
@ -197,6 +200,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
instanceID := "123"
location := "westeurope"
computerName := "computer_name"
tags := map[string]*string{
@ -224,6 +228,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
ID: &id,
Name: &name,
Type: &vmType,
InstanceID: &instanceID,
Location: &location,
Tags: tags,
Properties: properties,
@ -240,6 +245,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
Tags: tags,
NetworkInterfaces: []string{},
ScaleSet: scaleSet,
InstanceID: instanceID,
Size: size,
}
@ -269,7 +275,7 @@ func TestNewAzureResourceFromID(t *testing.T) {
},
} {
actual, err := newAzureResourceFromID(tc.id, nil)
require.Nil(t, err)
require.NoError(t, err)
require.Equal(t, tc.expected.Name, actual.Name)
require.Equal(t, tc.expected.ResourceGroupName, actual.ResourceGroupName)
}

View file

@ -50,7 +50,7 @@ const (
tagsLabel = model.MetaLabelPrefix + "consul_tags"
// serviceLabel is the name of the label containing the service name.
serviceLabel = model.MetaLabelPrefix + "consul_service"
// healthLabel is the name of the label containing the health of the service instance
// healthLabel is the name of the label containing the health of the service instance.
healthLabel = model.MetaLabelPrefix + "consul_health"
// serviceAddressLabel is the name of the label containing the (optional) service address.
serviceAddressLabel = model.MetaLabelPrefix + "consul_service_address"
@ -71,41 +71,18 @@ const (
namespace = "prometheus"
)
var (
rpcFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_consul_rpc_failures_total",
Help: "The number of Consul RPC call failures.",
})
rpcDuration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_consul_rpc_duration_seconds",
Help: "The duration of a Consul RPC call in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"endpoint", "call"},
)
// Initialize metric vectors.
servicesRPCDuration = rpcDuration.WithLabelValues("catalog", "services")
serviceRPCDuration = rpcDuration.WithLabelValues("catalog", "service")
// DefaultSDConfig is the default Consul SD configuration.
DefaultSDConfig = SDConfig{
TagSeparator: ",",
Scheme: "http",
Server: "localhost:8500",
AllowStale: true,
RefreshInterval: model.Duration(30 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
)
// DefaultSDConfig is the default Consul SD configuration.
var DefaultSDConfig = SDConfig{
TagSeparator: ",",
Scheme: "http",
Server: "localhost:8500",
AllowStale: true,
RefreshInterval: model.Duration(30 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(rpcFailuresCount, rpcDuration)
}
// SDConfig is the configuration for Consul service discovery.
@ -147,7 +124,7 @@ func (*SDConfig) Name() string { return "consul" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -184,22 +161,27 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// Discovery retrieves target information from a Consul server
// and updates them via watches.
type Discovery struct {
client *consul.Client
clientDatacenter string
clientNamespace string
clientPartition string
tagSeparator string
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
allowStale bool
refreshInterval time.Duration
finalizer func()
logger log.Logger
client *consul.Client
clientDatacenter string
clientNamespace string
clientPartition string
tagSeparator string
watchedServices []string // Set of services which will be discovered.
watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string
allowStale bool
refreshInterval time.Duration
finalizer func()
logger log.Logger
rpcFailuresCount prometheus.Counter
rpcDuration *prometheus.SummaryVec
servicesRPCDuration prometheus.Observer
serviceRPCDuration prometheus.Observer
metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a new Discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -237,7 +219,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
clientPartition: conf.Partition,
finalizer: wrapper.CloseIdleConnections,
logger: logger,
rpcFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_consul_rpc_failures_total",
Help: "The number of Consul RPC call failures.",
}),
rpcDuration: prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_consul_rpc_duration_seconds",
Help: "The duration of a Consul RPC call in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"endpoint", "call"},
),
}
cd.metricRegisterer = discovery.NewMetricRegisterer(
reg,
[]prometheus.Collector{
cd.rpcFailuresCount,
cd.rpcDuration,
},
)
// Initialize metric vectors.
cd.servicesRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "services")
cd.serviceRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "service")
return cd, nil
}
@ -293,7 +303,7 @@ func (d *Discovery) getDatacenter() error {
info, err := d.client.Agent().Self()
if err != nil {
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
rpcFailuresCount.Inc()
d.rpcFailuresCount.Inc()
return err
}
@ -334,6 +344,13 @@ func (d *Discovery) initialize(ctx context.Context) {
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
err := d.metricRegisterer.RegisterMetrics()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
return
}
defer d.metricRegisterer.UnregisterMetrics()
if d.finalizer != nil {
defer d.finalizer()
}
@ -382,7 +399,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
t0 := time.Now()
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
elapsed := time.Since(t0)
servicesRPCDuration.Observe(elapsed.Seconds())
d.servicesRPCDuration.Observe(elapsed.Seconds())
// Check the context before in order to exit early.
select {
@ -393,7 +410,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
if err != nil {
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
rpcFailuresCount.Inc()
d.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
}
@ -449,13 +466,15 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
// consulService contains data belonging to the same service.
type consulService struct {
name string
tags []string
labels model.LabelSet
discovery *Discovery
client *consul.Client
tagSeparator string
logger log.Logger
name string
tags []string
labels model.LabelSet
discovery *Discovery
client *consul.Client
tagSeparator string
logger log.Logger
rpcFailuresCount prometheus.Counter
serviceRPCDuration prometheus.Observer
}
// Start watching a service.
@ -469,8 +488,10 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
serviceLabel: model.LabelValue(name),
datacenterLabel: model.LabelValue(d.clientDatacenter),
},
tagSeparator: d.tagSeparator,
logger: d.logger,
tagSeparator: d.tagSeparator,
logger: d.logger,
rpcFailuresCount: d.rpcFailuresCount,
serviceRPCDuration: d.serviceRPCDuration,
}
go func() {
@ -508,7 +529,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
t0 := time.Now()
serviceNodes, meta, err := health.ServiceMultipleTags(srv.name, srv.tags, false, opts.WithContext(ctx))
elapsed := time.Since(t0)
serviceRPCDuration.Observe(elapsed.Seconds())
srv.serviceRPCDuration.Observe(elapsed.Seconds())
// Check the context before in order to exit early.
select {
@ -520,7 +541,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
if err != nil {
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
rpcFailuresCount.Inc()
srv.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
}

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -39,7 +40,7 @@ func TestConfiguredService(t *testing.T) {
conf := &SDConfig{
Services: []string{"configuredServiceName"},
}
consulDiscovery, err := NewDiscovery(conf, nil)
consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -56,7 +57,7 @@ func TestConfiguredServiceWithTag(t *testing.T) {
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http"},
}
consulDiscovery, err := NewDiscovery(conf, nil)
consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -151,7 +152,7 @@ func TestConfiguredServiceWithTags(t *testing.T) {
}
for _, tc := range cases {
consulDiscovery, err := NewDiscovery(tc.conf, nil)
consulDiscovery, err := NewDiscovery(tc.conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -165,7 +166,7 @@ func TestConfiguredServiceWithTags(t *testing.T) {
func TestNonConfiguredService(t *testing.T) {
conf := &SDConfig{}
consulDiscovery, err := NewDiscovery(conf, nil)
consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@ -262,19 +263,19 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
func newDiscovery(t *testing.T, config *SDConfig) *Discovery {
logger := log.NewNopLogger()
d, err := NewDiscovery(config, logger)
d, err := NewDiscovery(config, logger, prometheus.NewRegistry())
require.NoError(t, err)
return d
}
func checkOneTarget(t *testing.T, tg []*targetgroup.Group) {
require.Equal(t, 1, len(tg))
require.Len(t, tg, 1)
target := tg[0]
require.Equal(t, "test-dc", string(target.Labels["__meta_consul_dc"]))
require.Equal(t, target.Source, string(target.Labels["__meta_consul_service"]))
if target.Source == "test" {
// test service should have one node.
require.Greater(t, len(target.Targets), 0, "Test service should have one node")
require.NotEmpty(t, target.Targets, "Test service should have one node")
}
}
@ -313,7 +314,7 @@ func TestNoTargets(t *testing.T) {
}()
targets := (<-ch)[0].Targets
require.Equal(t, 0, len(targets))
require.Empty(t, targets)
cancel()
<-ch
}
@ -484,7 +485,7 @@ oauth2:
return
}
require.Equal(t, config, test.expected)
require.Equal(t, test.expected, config)
})
}
}

View file

@ -24,6 +24,7 @@ import (
"github.com/digitalocean/godo"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -75,7 +76,7 @@ func (*SDConfig) Name() string { return "digitalocean" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -103,7 +104,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
}
@ -125,10 +126,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
logger,
"digitalocean",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "digitalocean",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@ -46,7 +47,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
cfg := DefaultSDConfig
cfg.HTTPClientConfig.BearerToken = tokenID
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
@ -56,12 +57,12 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 4, len(tg.Targets))
require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{

View file

@ -21,7 +21,7 @@ import (
"testing"
)
// SDMock is the interface for the DigitalOcean mock
// SDMock is the interface for the DigitalOcean mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -35,18 +35,18 @@ func NewSDMock(t *testing.T) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)
}
// ShutdownServer creates the mock server
// ShutdownServer creates the mock server.
func (m *SDMock) ShutdownServer() {
m.Server.Close()
}

View file

@ -18,6 +18,7 @@ import (
"reflect"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -42,6 +43,15 @@ type Discoverer interface {
type DiscovererOptions struct {
Logger log.Logger
// A registerer for the Discoverer's metrics.
// Some Discoverers may ignore this registerer and use the global one instead.
// For now this will work, because the Prometheus `main` function uses the global registry.
// However, in the future the Prometheus `main` function will be updated to not use the global registry.
// Hence, if a discoverer wants its metrics to be visible via the Prometheus executable's
// `/metrics` endpoint, it should use this explicit registerer.
// TODO(ptodev): Update this comment once the Prometheus `main` function does not use the global registry.
Registerer prometheus.Registerer
// Extra HTTP client options to expose to Discoverers. This field may be
// ignored; Discoverer implementations must opt-in to reading it.
HTTPClientOptions []config.HTTPClientOption

View file

@ -42,35 +42,21 @@ const (
dnsSrvRecordPortLabel = dnsSrvRecordPrefix + "port"
dnsMxRecordPrefix = model.MetaLabelPrefix + "dns_mx_record_"
dnsMxRecordTargetLabel = dnsMxRecordPrefix + "target"
dnsNsRecordPrefix = model.MetaLabelPrefix + "dns_ns_record_"
dnsNsRecordTargetLabel = dnsNsRecordPrefix + "target"
// Constants for instrumentation.
namespace = "prometheus"
)
var (
dnsSDLookupsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookups_total",
Help: "The number of DNS-SD lookups.",
})
dnsSDLookupFailuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.",
})
// DefaultSDConfig is the default DNS SD configuration.
DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(30 * time.Second),
Type: "SRV",
}
)
// DefaultSDConfig is the default DNS SD configuration.
var DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(30 * time.Second),
Type: "SRV",
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(dnsSDLookupFailuresCount, dnsSDLookupsCount)
}
// SDConfig is the configuration for DNS based service discovery.
@ -86,7 +72,7 @@ func (*SDConfig) Name() string { return "dns" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(*c, opts.Logger), nil
return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -102,7 +88,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
switch strings.ToUpper(c.Type) {
case "SRV":
case "A", "AAAA", "MX":
case "A", "AAAA", "MX", "NS":
if c.Port == 0 {
return errors.New("a port is required in DNS-SD configs for all record types except SRV")
}
@ -116,16 +102,18 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type Discovery struct {
*refresh.Discovery
names []string
port int
qtype uint16
logger log.Logger
names []string
port int
qtype uint16
logger log.Logger
dnsSDLookupsCount prometheus.Counter
dnsSDLookupFailuresCount prometheus.Counter
lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -140,6 +128,8 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
qtype = dns.TypeSRV
case "MX":
qtype = dns.TypeMX
case "NS":
qtype = dns.TypeNS
}
d := &Discovery{
names: conf.Names,
@ -147,14 +137,32 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
port: conf.Port,
logger: logger,
lookupFn: lookupWithSearchPath,
dnsSDLookupsCount: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookups_total",
Help: "The number of DNS-SD lookups.",
}),
dnsSDLookupFailuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_dns_lookup_failures_total",
Help: "The number of DNS-SD lookup failures.",
}),
}
d.Discovery = refresh.NewDiscovery(
logger,
"dns",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "dns",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: prometheus.NewRegistry(),
Metrics: []prometheus.Collector{d.dnsSDLookupsCount, d.dnsSDLookupFailuresCount},
},
)
return d
return d, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
@ -187,9 +195,9 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
response, err := d.lookupFn(name, d.qtype, d.logger)
dnsSDLookupsCount.Inc()
d.dnsSDLookupsCount.Inc()
if err != nil {
dnsSDLookupFailuresCount.Inc()
d.dnsSDLookupFailuresCount.Inc()
return err
}
@ -199,7 +207,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
}
for _, record := range response.Answer {
var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget model.LabelValue
var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget, dnsNsRecordTarget model.LabelValue
switch addr := record.(type) {
case *dns.SRV:
@ -217,6 +225,13 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
addr.Mx = strings.TrimRight(addr.Mx, ".")
target = hostPort(addr.Mx, d.port)
case *dns.NS:
dnsNsRecordTarget = model.LabelValue(addr.Ns)
// Remove the final dot from rooted DNS names to make them look more usual.
addr.Ns = strings.TrimRight(addr.Ns, ".")
target = hostPort(addr.Ns, d.port)
case *dns.A:
target = hostPort(addr.A.String(), d.port)
case *dns.AAAA:
@ -234,6 +249,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
dnsSrvRecordTargetLabel: dnsSrvRecordTarget,
dnsSrvRecordPortLabel: dnsSrvRecordPort,
dnsMxRecordTargetLabel: dnsMxRecordTarget,
dnsNsRecordTargetLabel: dnsNsRecordTarget,
})
}

View file

@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/miekg/dns"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@ -81,6 +82,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -112,6 +114,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -143,6 +146,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db1.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
{
"__address__": "db2.example.com:3306",
@ -150,6 +154,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db2.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -180,6 +185,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db1.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
"__meta_dns_ns_record_target": "",
},
},
},
@ -227,6 +233,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "smtp1.example.com.",
"__meta_dns_ns_record_target": "",
},
{
"__address__": "smtp2.example.com:25",
@ -234,6 +241,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "smtp2.example.com.",
"__meta_dns_ns_record_target": "",
},
},
},
@ -245,7 +253,8 @@ func TestDNS(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
sd := NewDiscovery(tc.config, nil)
sd, err := NewDiscovery(tc.config, nil, prometheus.NewRegistry())
require.NoError(t, err)
sd.lookupFn = tc.lookup
tgs, err := sd.refresh(context.Background())

View file

@ -184,17 +184,17 @@ func TestFetchApps(t *testing.T) {
apps, err := fetchApps(context.TODO(), ts.URL, &http.Client{})
require.NoError(t, err)
require.Equal(t, len(apps.Applications), 2)
require.Equal(t, apps.Applications[0].Name, "CONFIG-SERVICE")
require.Equal(t, apps.Applications[1].Name, "META-SERVICE")
require.Len(t, apps.Applications, 2)
require.Equal(t, "CONFIG-SERVICE", apps.Applications[0].Name)
require.Equal(t, "META-SERVICE", apps.Applications[1].Name)
require.Equal(t, len(apps.Applications[1].Instances), 2)
require.Equal(t, apps.Applications[1].Instances[0].InstanceID, "meta-service002.test.com:meta-service:8080")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[0].XMLName.Local, "project")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[0].Content, "meta-service")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[1].XMLName.Local, "management.port")
require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[1].Content, "8090")
require.Equal(t, apps.Applications[1].Instances[1].InstanceID, "meta-service001.test.com:meta-service:8080")
require.Len(t, apps.Applications[1].Instances, 2)
require.Equal(t, "meta-service002.test.com:meta-service:8080", apps.Applications[1].Instances[0].InstanceID)
require.Equal(t, "project", apps.Applications[1].Instances[0].Metadata.Items[0].XMLName.Local)
require.Equal(t, "meta-service", apps.Applications[1].Instances[0].Metadata.Items[0].Content)
require.Equal(t, "management.port", apps.Applications[1].Instances[0].Metadata.Items[1].XMLName.Local)
require.Equal(t, "8090", apps.Applications[1].Instances[0].Metadata.Items[1].Content)
require.Equal(t, "meta-service001.test.com:meta-service:8080", apps.Applications[1].Instances[1].InstanceID)
}
func Test500ErrorHttpResponse(t *testing.T) {

View file

@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -80,7 +81,7 @@ func (*SDConfig) Name() string { return "eureka" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -117,7 +118,7 @@ type Discovery struct {
}
// NewDiscovery creates a new Eureka discovery for the given role.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd")
if err != nil {
return nil, err
@ -128,10 +129,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
server: conf.Server,
}
d.Discovery = refresh.NewDiscovery(
logger,
"eureka",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "eureka",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -20,6 +20,7 @@ import (
"net/http/httptest"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -35,7 +36,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err
Server: ts.URL,
}
md, err := NewDiscovery(&conf, nil)
md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@ -55,7 +56,7 @@ func TestEurekaSDHandleError(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.EqualError(t, err, errTesting)
require.Equal(t, len(tgs), 0)
require.Empty(t, tgs)
}
func TestEurekaSDEmptyList(t *testing.T) {
@ -72,7 +73,7 @@ func TestEurekaSDEmptyList(t *testing.T) {
)
tgs, err := testUpdateServices(respHandler)
require.NoError(t, err)
require.Equal(t, len(tgs), 1)
require.Len(t, tgs, 1)
}
func TestEurekaSDSendGroup(t *testing.T) {
@ -232,11 +233,11 @@ func TestEurekaSDSendGroup(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.NoError(t, err)
require.Equal(t, len(tgs), 1)
require.Len(t, tgs, 1)
tg := tgs[0]
require.Equal(t, tg.Source, "eureka")
require.Equal(t, len(tg.Targets), 4)
require.Equal(t, "eureka", tg.Source)
require.Len(t, tg.Targets, 4)
tgt := tg.Targets[0]
require.Equal(t, tgt[model.AddressLabel], model.LabelValue("config-service001.test.com:8080"))

View file

@ -39,24 +39,6 @@ import (
)
var (
fileSDReadErrorsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_read_errors_total",
Help: "The number of File-SD read errors.",
})
fileSDScanDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_file_scan_duration_seconds",
Help: "The duration of the File-SD scan in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
})
fileSDTimeStamp = NewTimestampCollector()
fileWatcherErrorsCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_watcher_errors_total",
Help: "The number of File-SD errors caused by filesystem watch failures.",
})
patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`)
// DefaultSDConfig is the default file SD configuration.
@ -67,7 +49,6 @@ var (
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(fileSDReadErrorsCount, fileSDScanDuration, fileSDTimeStamp, fileWatcherErrorsCount)
}
// SDConfig is the configuration for file based discovery.
@ -81,7 +62,7 @@ func (*SDConfig) Name() string { return "file" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger), nil
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -187,10 +168,17 @@ type Discovery struct {
// This is used to detect deleted target groups.
lastRefresh map[string]int
logger log.Logger
fileSDReadErrorsCount prometheus.Counter
fileSDScanDuration prometheus.Summary
fileWatcherErrorsCount prometheus.Counter
fileSDTimeStamp *TimestampCollector
metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a new file discovery for the given paths.
func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -200,9 +188,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
interval: time.Duration(conf.RefreshInterval),
timestamps: make(map[string]float64),
logger: logger,
fileSDReadErrorsCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_read_errors_total",
Help: "The number of File-SD read errors.",
}),
fileSDScanDuration: prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_file_scan_duration_seconds",
Help: "The duration of the File-SD scan in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}),
fileWatcherErrorsCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_file_watcher_errors_total",
Help: "The number of File-SD errors caused by filesystem watch failures.",
}),
fileSDTimeStamp: NewTimestampCollector(),
}
fileSDTimeStamp.addDiscoverer(disc)
return disc
disc.fileSDTimeStamp.addDiscoverer(disc)
disc.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{
disc.fileSDReadErrorsCount,
disc.fileSDScanDuration,
disc.fileWatcherErrorsCount,
disc.fileSDTimeStamp,
})
return disc, nil
}
// listFiles returns a list of all files that match the configured patterns.
@ -239,10 +253,17 @@ func (d *Discovery) watchFiles() {
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
err := d.metricRegisterer.RegisterMetrics()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
return
}
defer d.metricRegisterer.UnregisterMetrics()
watcher, err := fsnotify.NewWatcher()
if err != nil {
level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err)
fileWatcherErrorsCount.Inc()
d.fileWatcherErrorsCount.Inc()
return
}
d.watcher = watcher
@ -306,7 +327,7 @@ func (d *Discovery) stop() {
done := make(chan struct{})
defer close(done)
fileSDTimeStamp.removeDiscoverer(d)
d.fileSDTimeStamp.removeDiscoverer(d)
// Closing the watcher will deadlock unless all events and errors are drained.
go func() {
@ -332,13 +353,13 @@ func (d *Discovery) stop() {
func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) {
t0 := time.Now()
defer func() {
fileSDScanDuration.Observe(time.Since(t0).Seconds())
d.fileSDScanDuration.Observe(time.Since(t0).Seconds())
}()
ref := map[string]int{}
for _, p := range d.listFiles() {
tgroups, err := d.readFile(p)
if err != nil {
fileSDReadErrorsCount.Inc()
d.fileSDReadErrorsCount.Inc()
level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err)
// Prevent deletion down below.

View file

@ -24,6 +24,7 @@ import (
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@ -143,7 +144,7 @@ func (t *testRunner) run(files ...string) {
ctx, cancel := context.WithCancel(context.Background())
t.cancelSD = cancel
go func() {
NewDiscovery(
d, err := NewDiscovery(
&SDConfig{
Files: files,
// Setting a high refresh interval to make sure that the tests only
@ -151,7 +152,11 @@ func (t *testRunner) run(files ...string) {
RefreshInterval: model.Duration(1 * time.Hour),
},
nil,
).Run(ctx, t.ch)
prometheus.NewRegistry(),
)
require.NoError(t, err)
d.Run(ctx, t.ch)
}()
}

View file

@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"golang.org/x/oauth2/google"
"google.golang.org/api/compute/v1"
@ -86,7 +87,7 @@ func (*SDConfig) Name() string { return "gce" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(*c, opts.Logger)
return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -121,7 +122,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
project: conf.Project,
zone: conf.Zone,
@ -141,10 +142,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
d.isvc = compute.NewInstancesService(d.svc)
d.Discovery = refresh.NewDiscovery(
logger,
"gce",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "gce",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -48,12 +48,12 @@ func TestHCloudSDRefresh(t *testing.T) {
targetGroups, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(targetGroups))
require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup, "targetGroup should not be nil")
require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
require.Equal(t, 3, len(targetGroup.Targets))
require.Len(t, targetGroup.Targets, 3)
for i, labelSet := range []model.LabelSet{
{

View file

@ -21,6 +21,7 @@ import (
"github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -67,7 +68,7 @@ func (*SDConfig) Name() string { return "hetzner" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
type refresher interface {
@ -127,17 +128,20 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, logger)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
logger,
"hetzner",
time.Duration(conf.RefreshInterval),
r.refresh,
refresh.Options{
Logger: logger,
Mech: "hetzner",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: r.refresh,
Registry: reg,
},
), nil
}

View file

@ -20,7 +20,7 @@ import (
"testing"
)
// SDMock is the interface for the Hetzner Cloud mock
// SDMock is the interface for the Hetzner Cloud mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -34,19 +34,19 @@ func NewSDMock(t *testing.T) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)
m.t.Cleanup(m.Server.Close)
}
// ShutdownServer creates the mock server
// ShutdownServer creates the mock server.
func (m *SDMock) ShutdownServer() {
m.Server.Close()
}

View file

@ -47,12 +47,12 @@ func TestRobotSDRefresh(t *testing.T) {
targetGroups, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(targetGroups))
require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup, "targetGroup should not be nil")
require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
require.Equal(t, 2, len(targetGroup.Targets))
require.Len(t, targetGroup.Targets, 2)
for i, labelSet := range []model.LabelSet{
{
@ -98,5 +98,5 @@ func TestRobotSDRefreshHandleError(t *testing.T) {
require.Error(t, err)
require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error())
require.Equal(t, 0, len(targetGroups))
require.Empty(t, targetGroups)
}

View file

@ -45,17 +45,10 @@ var (
}
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_http_failures_total",
Help: "Number of HTTP service discovery refresh failures.",
})
)
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for HTTP based discovery.
@ -70,7 +63,7 @@ func (*SDConfig) Name() string { return "http" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions)
return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -112,10 +105,11 @@ type Discovery struct {
client *http.Client
refreshInterval time.Duration
tgLastLength int
failuresCount prometheus.Counter
}
// NewDiscovery returns a new HTTP discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -130,13 +124,22 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPCli
url: conf.URL,
client: client,
refreshInterval: time.Duration(conf.RefreshInterval), // Stored to be sent as headers.
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_http_failures_total",
Help: "Number of HTTP service discovery refresh failures.",
}),
}
d.Discovery = refresh.NewDiscovery(
logger,
"http",
time.Duration(conf.RefreshInterval),
d.Refresh,
refresh.Options{
Logger: logger,
Mech: "http",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.Refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount},
},
)
return d, nil
}
@ -152,7 +155,7 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) {
resp, err := d.client.Do(req.WithContext(ctx))
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
defer func() {
@ -161,31 +164,31 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) {
}()
if resp.StatusCode != http.StatusOK {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("server returned HTTP status %s", resp.Status)
}
if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
}
b, err := io.ReadAll(resp.Body)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
var targetGroups []*targetgroup.Group
if err := json.Unmarshal(b, &targetGroups); err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
for i, tg := range targetGroups {
if tg == nil {
failuresCount.Inc()
d.failuresCount.Inc()
err = errors.New("nil target group item found")
return nil, err
}

View file

@ -41,7 +41,7 @@ func TestHTTPValidRefresh(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
@ -62,8 +62,8 @@ func TestHTTPValidRefresh(t *testing.T) {
Source: urlSource(ts.URL+"/http_sd.good.json", 0),
},
}
require.Equal(t, tgs, expectedTargets)
require.Equal(t, 0.0, getFailureCount())
require.Equal(t, expectedTargets, tgs)
require.Equal(t, 0.0, getFailureCount(d.failuresCount))
}
func TestHTTPInvalidCode(t *testing.T) {
@ -79,13 +79,13 @@ func TestHTTPInvalidCode(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
_, err = d.Refresh(ctx)
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
require.Equal(t, 1.0, getFailureCount())
require.Equal(t, 1.0, getFailureCount(d.failuresCount))
}
func TestHTTPInvalidFormat(t *testing.T) {
@ -101,18 +101,16 @@ func TestHTTPInvalidFormat(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
_, err = d.Refresh(ctx)
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
require.Equal(t, 1.0, getFailureCount())
require.Equal(t, 1.0, getFailureCount(d.failuresCount))
}
var lastFailureCount float64
func getFailureCount() float64 {
func getFailureCount(failuresCount prometheus.Counter) float64 {
failureChan := make(chan prometheus.Metric)
go func() {
@ -129,10 +127,7 @@ func getFailureCount() float64 {
metric.Write(&counter)
}
// account for failures in prior tests
count := *counter.Counter.Value - lastFailureCount
lastFailureCount = *counter.Counter.Value
return count
return *counter.Counter.Value
}
func TestContentTypeRegex(t *testing.T) {
@ -417,7 +412,7 @@ func TestSourceDisappeared(t *testing.T) {
URL: ts.URL,
RefreshInterval: model.Duration(1 * time.Second),
}
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
for _, test := range cases {
ctx := context.Background()

View file

@ -14,15 +14,17 @@
package ionos
import (
"errors"
"time"
"github.com/go-kit/log"
"github.com/pkg/errors"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/client_golang/prometheus"
)
const (
@ -41,7 +43,7 @@ func init() {
type Discovery struct{}
// NewDiscovery returns a new refresh.Discovery for IONOS Cloud.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
if conf.ionosEndpoint == "" {
conf.ionosEndpoint = "https://api.ionos.com"
}
@ -52,10 +54,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error)
}
return refresh.NewDiscovery(
logger,
"ionos",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "ionos",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
), nil
}
@ -86,7 +91,7 @@ func (c SDConfig) Name() string {
// NewDiscoverer returns a new discovery.Discoverer for IONOS Cloud.
func (c SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(&c, options.Logger)
return NewDiscovery(&c, options.Logger, options.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.

View file

@ -48,12 +48,12 @@ func TestIONOSServerRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 2, len(tg.Targets))
require.Len(t, tg.Targets, 2)
for i, lbls := range []model.LabelSet{
{

View file

@ -11,7 +11,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// nolint:revive // Many legitimately empty blocks in this file.
package kubernetes
import (
@ -23,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@ -31,12 +31,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
epAddCount = eventCount.WithLabelValues("endpoints", "add")
epUpdateCount = eventCount.WithLabelValues("endpoints", "update")
epDeleteCount = eventCount.WithLabelValues("endpoints", "delete")
)
// Endpoints discovers new endpoint targets.
type Endpoints struct {
logger log.Logger
@ -55,10 +49,19 @@ type Endpoints struct {
}
// NewEndpoints returns a new endpoints discovery.
func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *Endpoints {
func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
if l == nil {
l = log.NewNopLogger()
}
epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd)
epUpdateCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleUpdate)
epDeleteCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleDelete)
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
e := &Endpoints{
logger: l,
endpointsInf: eps,
@ -69,7 +72,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
podStore: pod.GetStore(),
nodeInf: node,
withNodeMetadata: node != nil,
queue: workqueue.NewNamed("endpoints"),
queue: workqueue.NewNamed(RoleEndpoint.String()),
}
_, err := e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{

View file

@ -15,12 +15,14 @@ package kubernetes
import (
"context"
"errors"
"fmt"
"net"
"strconv"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
@ -32,12 +34,6 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
var (
epslAddCount = eventCount.WithLabelValues("endpointslice", "add")
epslUpdateCount = eventCount.WithLabelValues("endpointslice", "update")
epslDeleteCount = eventCount.WithLabelValues("endpointslice", "delete")
)
// EndpointSlice discovers new endpoint targets.
type EndpointSlice struct {
logger log.Logger
@ -56,10 +52,19 @@ type EndpointSlice struct {
}
// NewEndpointSlice returns a new endpointslice discovery.
func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *EndpointSlice {
func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
if l == nil {
l = log.NewNopLogger()
}
epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd)
epslUpdateCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleUpdate)
epslDeleteCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleDelete)
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
e := &EndpointSlice{
logger: l,
endpointSliceInf: eps,
@ -70,7 +75,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
podStore: pod.GetStore(),
nodeInf: node,
withNodeMetadata: node != nil,
queue: workqueue.NewNamed("endpointSlice"),
queue: workqueue.NewNamed(RoleEndpointSlice.String()),
}
_, err := e.endpointSliceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
@ -183,14 +188,14 @@ func (e *EndpointSlice) Run(ctx context.Context, ch chan<- []*targetgroup.Group)
cacheSyncs = append(cacheSyncs, e.nodeInf.HasSynced)
}
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if ctx.Err() != context.Canceled {
if !errors.Is(ctx.Err(), context.Canceled) {
level.Error(e.logger).Log("msg", "endpointslice informer unable to sync cache")
}
return
}
go func() {
for e.process(ctx, ch) { // nolint:revive
for e.process(ctx, ch) {
}
}()

View file

@ -20,7 +20,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// endpointSliceAdaptor is an adaptor for the different EndpointSlice versions
// endpointSliceAdaptor is an adaptor for the different EndpointSlice versions.
type endpointSliceAdaptor interface {
get() interface{}
getObjectMeta() metav1.ObjectMeta
@ -55,7 +55,7 @@ type endpointSliceEndpointConditionsAdaptor interface {
terminating() *bool
}
// Adaptor for k8s.io/api/discovery/v1
// Adaptor for k8s.io/api/discovery/v1.
type endpointSliceAdaptorV1 struct {
endpointSlice *v1.EndpointSlice
}
@ -108,7 +108,7 @@ func (e *endpointSliceAdaptorV1) labelServiceName() string {
return v1.LabelServiceName
}
// Adaptor for k8s.io/api/discovery/v1beta1
// Adaptor for k8s.io/api/discovery/v1beta1.
type endpointSliceAdaptorV1Beta1 struct {
endpointSlice *v1beta1.EndpointSlice
}

View file

@ -29,7 +29,7 @@ func Test_EndpointSliceAdaptor_v1(t *testing.T) {
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
require.Equal(t, endpointSlice.Labels[v1.LabelServiceName], "testendpoints")
require.Equal(t, "testendpoints", endpointSlice.Labels[v1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
@ -57,7 +57,7 @@ func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) {
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
require.Equal(t, endpointSlice.Labels[v1beta1.LabelServiceName], "testendpoints")
require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())

View file

@ -21,6 +21,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
@ -30,12 +31,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
ingressAddCount = eventCount.WithLabelValues("ingress", "add")
ingressUpdateCount = eventCount.WithLabelValues("ingress", "update")
ingressDeleteCount = eventCount.WithLabelValues("ingress", "delete")
)
// Ingress implements discovery of Kubernetes ingress.
type Ingress struct {
logger log.Logger
@ -45,8 +40,18 @@ type Ingress struct {
}
// NewIngress returns a new ingress discovery.
func NewIngress(l log.Logger, inf cache.SharedInformer) *Ingress {
s := &Ingress{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")}
func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd)
ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate)
ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete)
s := &Ingress{
logger: l,
informer: inf,
store: inf.GetStore(),
queue: workqueue.NewNamed(RoleIngress.String()),
}
_, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
ingressAddCount.Inc()
@ -88,7 +93,7 @@ func (i *Ingress) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for i.process(ctx, ch) { // nolint:revive
for i.process(ctx, ch) {
}
}()

View file

@ -19,7 +19,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// ingressAdaptor is an adaptor for the different Ingress versions
// ingressAdaptor is an adaptor for the different Ingress versions.
type ingressAdaptor interface {
getObjectMeta() metav1.ObjectMeta
name() string
@ -36,7 +36,7 @@ type ingressRuleAdaptor interface {
host() string
}
// Adaptor for networking.k8s.io/v1
// Adaptor for networking.k8s.io/v1.
type ingressAdaptorV1 struct {
ingress *v1.Ingress
}
@ -90,7 +90,7 @@ func (i *ingressRuleAdaptorV1) paths() []string {
func (i *ingressRuleAdaptorV1) host() string { return i.rule.Host }
// Adaptor for networking.k8s.io/v1beta1
// Adaptor for networking.k8s.io/v1beta1.
type ingressAdaptorV1Beta1 struct {
ingress *v1beta1.Ingress
}

View file

@ -58,25 +58,15 @@ import (
const (
// metaLabelPrefix is the meta prefix used for all meta labels.
// in this discovery.
metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
namespaceLabel = metaLabelPrefix + "namespace"
metricsNamespace = "prometheus_sd_kubernetes"
presentValue = model.LabelValue("true")
metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
namespaceLabel = metaLabelPrefix + "namespace"
presentValue = model.LabelValue("true")
)
var (
// Http header
// Http header.
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
// Custom events metric
eventCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Name: "events_total",
Help: "The number of Kubernetes events handled.",
},
[]string{"role", "event"},
)
// DefaultSDConfig is the default Kubernetes SD configuration
// DefaultSDConfig is the default Kubernetes SD configuration.
DefaultSDConfig = SDConfig{
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
@ -84,15 +74,6 @@ var (
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(eventCount)
// Initialize metric vectors.
for _, role := range []string{"endpointslice", "endpoints", "node", "pod", "service", "ingress"} {
for _, evt := range []string{"add", "delete", "update"} {
eventCount.WithLabelValues(role, evt)
}
}
(&clientGoRequestMetricAdapter{}).Register(prometheus.DefaultRegisterer)
(&clientGoWorkqueueMetricsProvider{}).Register(prometheus.DefaultRegisterer)
}
// Role is role of the service in Kubernetes.
@ -121,6 +102,16 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
}
func (c Role) String() string {
return string(c)
}
const (
MetricLabelRoleAdd = "add"
MetricLabelRoleDelete = "delete"
MetricLabelRoleUpdate = "update"
)
// SDConfig is the configuration for Kubernetes service discovery.
type SDConfig struct {
APIServer config.URL `yaml:"api_server,omitempty"`
@ -137,7 +128,7 @@ func (*SDConfig) Name() string { return "kubernetes" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return New(opts.Logger, c)
return New(opts.Logger, opts.Registerer, c)
}
// SetDirectory joins any relative file paths with dir.
@ -274,6 +265,8 @@ type Discovery struct {
selectors roleSelector
ownNamespace string
attachMetadata AttachMetadataConfig
eventCount *prometheus.CounterVec
metricRegisterer discovery.MetricRegisterer
}
func (d *Discovery) getNamespaces() []string {
@ -292,7 +285,7 @@ func (d *Discovery) getNamespaces() []string {
}
// New creates a new Kubernetes discovery for the given role.
func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
func New(l log.Logger, reg prometheus.Registerer, conf *SDConfig) (*Discovery, error) {
if l == nil {
l = log.NewNopLogger()
}
@ -346,7 +339,7 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
return nil, err
}
return &Discovery{
d := &Discovery{
client: c,
logger: l,
role: conf.Role,
@ -355,7 +348,37 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
selectors: mapSelector(conf.Selectors),
ownNamespace: ownNamespace,
attachMetadata: conf.AttachMetadata,
}, nil
eventCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: discovery.KubernetesMetricsNamespace,
Name: "events_total",
Help: "The number of Kubernetes events handled.",
},
[]string{"role", "event"},
),
}
d.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{d.eventCount})
// Initialize metric vectors.
for _, role := range []string{
RoleEndpointSlice.String(),
RoleEndpoint.String(),
RoleNode.String(),
RolePod.String(),
RoleService.String(),
RoleIngress.String(),
} {
for _, evt := range []string{
MetricLabelRoleAdd,
MetricLabelRoleDelete,
MetricLabelRoleUpdate,
} {
d.eventCount.WithLabelValues(role, evt)
}
}
return d, nil
}
func mapSelector(rawSelector []SelectorConfig) roleSelector {
@ -391,6 +414,14 @@ const resyncDisabled = 0
// Run implements the discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
d.Lock()
err := d.metricRegisterer.RegisterMetrics()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
return
}
defer d.metricRegisterer.UnregisterMetrics()
namespaces := d.getNamespaces()
switch d.role {
@ -482,6 +513,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
nodeInf,
d.eventCount,
)
d.discoverers = append(d.discoverers, eps)
go eps.endpointSliceInf.Run(ctx.Done())
@ -541,6 +573,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
nodeInf,
d.eventCount,
)
d.discoverers = append(d.discoverers, eps)
go eps.endpointsInf.Run(ctx.Done())
@ -572,6 +605,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
log.With(d.logger, "role", "pod"),
d.newPodsByNodeInformer(plw),
nodeInformer,
d.eventCount,
)
d.discoverers = append(d.discoverers, pod)
go pod.podInf.Run(ctx.Done())
@ -594,6 +628,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
svc := NewService(
log.With(d.logger, "role", "service"),
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
d.eventCount,
)
d.discoverers = append(d.discoverers, svc)
go svc.informer.Run(ctx.Done())
@ -651,13 +686,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
ingress := NewIngress(
log.With(d.logger, "role", "ingress"),
informer,
d.eventCount,
)
d.discoverers = append(d.discoverers, ingress)
go ingress.informer.Run(ctx.Done())
}
case RoleNode:
nodeInformer := d.newNodeInformer(ctx)
node := NewNode(log.With(d.logger, "role", "node"), nodeInformer)
node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.eventCount)
d.discoverers = append(d.discoverers, node)
go node.informer.Run(ctx.Done())
default:

View file

@ -29,6 +29,8 @@ import (
"k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/cache"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil"
@ -49,13 +51,25 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer
fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery)
fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: k8sVer}
return &Discovery{
d := &Discovery{
client: clientset,
logger: log.NewNopLogger(),
role: role,
namespaceDiscovery: &nsDiscovery,
ownNamespace: "own-ns",
}, clientset
eventCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: discovery.KubernetesMetricsNamespace,
Name: "events_total",
Help: "The number of Kubernetes events handled.",
},
[]string{"role", "event"},
),
}
d.metricRegisterer = discovery.NewMetricRegisterer(prometheus.NewRegistry(), []prometheus.Collector{d.eventCount})
return d, clientset
}
// makeDiscoveryWithMetadata creates a kubernetes.Discovery instance with the specified metadata config.

View file

@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@ -35,12 +36,6 @@ const (
NodeLegacyHostIP = "LegacyHostIP"
)
var (
nodeAddCount = eventCount.WithLabelValues("node", "add")
nodeUpdateCount = eventCount.WithLabelValues("node", "update")
nodeDeleteCount = eventCount.WithLabelValues("node", "delete")
)
// Node discovers Kubernetes nodes.
type Node struct {
logger log.Logger
@ -50,11 +45,22 @@ type Node struct {
}
// NewNode returns a new node discovery.
func NewNode(l log.Logger, inf cache.SharedInformer) *Node {
func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
if l == nil {
l = log.NewNopLogger()
}
n := &Node{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("node")}
nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd)
nodeUpdateCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleUpdate)
nodeDeleteCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleDelete)
n := &Node{
logger: l,
informer: inf,
store: inf.GetStore(),
queue: workqueue.NewNamed(RoleNode.String()),
}
_, err := n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
nodeAddCount.Inc()
@ -96,7 +102,7 @@ func (n *Node) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for n.process(ctx, ch) { // nolint:revive
for n.process(ctx, ch) {
}
}()
@ -202,7 +208,7 @@ func (n *Node) buildNode(node *apiv1.Node) *targetgroup.Group {
// 5. NodeLegacyHostIP
// 6. NodeHostName
//
// Derived from k8s.io/kubernetes/pkg/util/node/node.go
// Derived from k8s.io/kubernetes/pkg/util/node/node.go.
func nodeAddress(node *apiv1.Node) (string, map[apiv1.NodeAddressType][]string, error) {
m := map[apiv1.NodeAddressType][]string{}
for _, a := range node.Status.Addresses {

View file

@ -23,6 +23,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -34,12 +35,6 @@ import (
const nodeIndex = "node"
var (
podAddCount = eventCount.WithLabelValues("pod", "add")
podUpdateCount = eventCount.WithLabelValues("pod", "update")
podDeleteCount = eventCount.WithLabelValues("pod", "delete")
)
// Pod discovers new pod targets.
type Pod struct {
podInf cache.SharedIndexInformer
@ -51,18 +46,22 @@ type Pod struct {
}
// NewPod creates a new pod discovery.
func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer) *Pod {
func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
if l == nil {
l = log.NewNopLogger()
}
podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd)
podDeleteCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleDelete)
podUpdateCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleUpdate)
p := &Pod{
podInf: pods,
nodeInf: nodes,
withNodeMetadata: nodes != nil,
store: pods.GetStore(),
logger: l,
queue: workqueue.NewNamed("pod"),
queue: workqueue.NewNamed(RolePod.String()),
}
_, err := p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
@ -131,7 +130,7 @@ func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for p.process(ctx, ch) { // nolint:revive
for p.process(ctx, ch) {
}
}()

View file

@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@ -30,12 +31,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
svcAddCount = eventCount.WithLabelValues("service", "add")
svcUpdateCount = eventCount.WithLabelValues("service", "update")
svcDeleteCount = eventCount.WithLabelValues("service", "delete")
)
// Service implements discovery of Kubernetes services.
type Service struct {
logger log.Logger
@ -45,11 +40,22 @@ type Service struct {
}
// NewService returns a new service discovery.
func NewService(l log.Logger, inf cache.SharedInformer) *Service {
func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
if l == nil {
l = log.NewNopLogger()
}
s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")}
svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
s := &Service{
logger: l,
informer: inf,
store: inf.GetStore(),
queue: workqueue.NewNamed(RoleService.String()),
}
_, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
svcAddCount.Inc()
@ -91,7 +97,7 @@ func (s *Service) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
go func() {
for s.process(ctx, ch) { // nolint:revive
for s.process(ctx, ch) {
}
}()

View file

@ -28,48 +28,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
failedConfigs = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
},
[]string{"name"},
)
discoveredTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_discovered_targets",
Help: "Current number of discovered targets.",
},
[]string{"name", "config"},
)
receivedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.",
},
[]string{"name"},
)
delayedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.",
},
[]string{"name"},
)
sentUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.",
},
[]string{"name"},
)
)
func RegisterMetrics() {
prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates)
}
type poolKey struct {
setName string
provider string
@ -84,7 +42,7 @@ type provider struct {
}
// NewManager is the Discovery Manager constructor.
func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager {
func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
}
@ -96,10 +54,21 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager
ctx: ctx,
updatert: 5 * time.Second,
triggerSend: make(chan struct{}, 1),
registerer: registerer,
}
for _, option := range options {
option(mgr)
}
// Register the metrics.
// We have to do this after setting all options, so that the name of the Manager is set.
if metrics, err := discovery.NewMetrics(registerer, mgr.name); err == nil {
mgr.metrics = metrics
} else {
level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
return nil
}
return mgr
}
@ -135,9 +104,14 @@ type Manager struct {
// The triggerSend channel signals to the manager that new updates have been received from providers.
triggerSend chan struct{}
// A registerer for all service discovery metrics.
registerer prometheus.Registerer
metrics *discovery.Metrics
}
// Run starts the background processing
// Run starts the background processing.
func (m *Manager) Run() error {
go m.sender()
<-m.ctx.Done()
@ -157,7 +131,7 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
for pk := range m.targets {
if _, ok := cfg[pk.setName]; !ok {
discoveredTargets.DeleteLabelValues(m.name, pk.setName)
m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName)
}
}
m.cancelDiscoverers()
@ -168,9 +142,9 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
failedCount := 0
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
discoveredTargets.WithLabelValues(m.name, name).Set(0)
m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0)
}
failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
m.metrics.FailedConfigs.Set(float64(failedCount))
for _, prov := range m.providers {
m.startProvider(m.ctx, prov)
@ -207,7 +181,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ
case <-ctx.Done():
return
case tgs, ok := <-updates:
receivedUpdates.WithLabelValues(m.name).Inc()
m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
return
@ -236,11 +210,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
select {
case <-m.triggerSend:
sentUpdates.WithLabelValues(m.name).Inc()
m.metrics.SentUpdates.Inc()
select {
case m.syncCh <- m.allGroups():
default:
delayedUpdates.WithLabelValues(m.name).Inc()
m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
@ -288,7 +262,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
}
}
for setName, v := range n {
discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v))
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}
@ -309,7 +283,8 @@ func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int
}
typ := cfg.Name()
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{
Logger: log.With(m.logger, "discovery", typ, "config", setName),
Logger: log.With(m.logger, "discovery", typ, "config", setName),
Registerer: m.registerer,
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)

View file

@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -664,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
@ -746,7 +748,8 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou
func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -774,7 +777,8 @@ func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
func TestDiscovererConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -798,7 +802,8 @@ func TestDiscovererConfigs(t *testing.T) {
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -837,7 +842,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, nil)
discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -868,7 +874,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -893,7 +900,8 @@ func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Disco
func TestGaugeFailedConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -907,7 +915,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount := client_testutil.ToFloat64(failedConfigs)
failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
@ -918,7 +926,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount = client_testutil.ToFloat64(failedConfigs)
failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
@ -1049,7 +1057,8 @@ func TestCoordinationWithReceiver(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
mgr := NewManager(ctx, nil)
mgr := NewManager(ctx, nil, prometheus.NewRegistry())
require.NotNil(t, mgr)
mgr.updatert = updateDelay
go mgr.Run()

View file

@ -51,6 +51,7 @@ const (
linodeLabelStatus = linodeLabel + "status"
linodeLabelTags = linodeLabel + "tags"
linodeLabelGroup = linodeLabel + "group"
linodeLabelGPUs = linodeLabel + "gpus"
linodeLabelHypervisor = linodeLabel + "hypervisor"
linodeLabelBackups = linodeLabel + "backups"
linodeLabelSpecsDiskBytes = linodeLabel + "specs_disk_bytes"
@ -66,24 +67,15 @@ const (
)
// DefaultSDConfig is the default Linode SD configuration.
var (
DefaultSDConfig = SDConfig{
TagSeparator: ",",
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_linode_failures_total",
Help: "Number of Linode service discovery refresh failures.",
})
)
var DefaultSDConfig = SDConfig{
TagSeparator: ",",
Port: 80,
RefreshInterval: model.Duration(60 * time.Second),
HTTPClientConfig: config.DefaultHTTPClientConfig,
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for Linode based service discovery.
@ -100,7 +92,7 @@ func (*SDConfig) Name() string { return "linode" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -130,16 +122,22 @@ type Discovery struct {
pollCount int
lastResults []*targetgroup.Group
eventPollingEnabled bool
failuresCount prometheus.Counter
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
tagSeparator: conf.TagSeparator,
pollCount: 0,
lastRefreshTimestamp: time.Now().UTC(),
eventPollingEnabled: true,
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_linode_failures_total",
Help: "Number of Linode service discovery refresh failures.",
}),
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "linode_sd")
@ -157,10 +155,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
d.client = &client
d.Discovery = refresh.NewDiscovery(
logger,
"linode",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "linode",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount},
},
)
return d, nil
}
@ -221,14 +223,14 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro
// Gather all linode instances.
instances, err := d.client.ListInstances(ctx, &linodego.ListOptions{PageSize: 500})
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
// Gather detailed IP address info for all IPs on all linode instances.
detailedIPs, err := d.client.ListIPAddresses(ctx, &linodego.ListOptions{PageSize: 500})
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
@ -302,6 +304,7 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro
linodeLabelType: model.LabelValue(instance.Type),
linodeLabelStatus: model.LabelValue(instance.Status),
linodeLabelGroup: model.LabelValue(instance.Group),
linodeLabelGPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.GPUs)),
linodeLabelHypervisor: model.LabelValue(instance.Hypervisor),
linodeLabelBackups: model.LabelValue(backupsStatus),
linodeLabelSpecsDiskBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Disk)<<20)),

View file

@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -52,7 +53,7 @@ func TestLinodeSDRefresh(t *testing.T) {
Credentials: tokenID,
Type: "Bearer",
}
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
@ -61,12 +62,12 @@ func TestLinodeSDRefresh(t *testing.T) {
tgs, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 4, len(tg.Targets))
require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
@ -85,6 +86,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("85899345920"),
@ -109,6 +111,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("85899345920"),
@ -132,6 +135,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("53687091200"),
@ -155,6 +159,7 @@ func TestLinodeSDRefresh(t *testing.T) {
"__meta_linode_status": model.LabelValue("running"),
"__meta_linode_tags": model.LabelValue(",monitoring,"),
"__meta_linode_group": model.LabelValue(""),
"__meta_linode_gpus": model.LabelValue("0"),
"__meta_linode_hypervisor": model.LabelValue("kvm"),
"__meta_linode_backups": model.LabelValue("disabled"),
"__meta_linode_specs_disk_bytes": model.LabelValue("26843545600"),

View file

@ -20,7 +20,7 @@ import (
"testing"
)
// SDMock is the interface for the Linode mock
// SDMock is the interface for the Linode mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -34,18 +34,18 @@ func NewSDMock(t *testing.T) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)
}
// ShutdownServer creates the mock server
// ShutdownServer creates the mock server.
func (m *SDMock) ShutdownServer() {
m.Server.Close()
}

View file

@ -28,48 +28,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
failedConfigs = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
},
[]string{"name"},
)
discoveredTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_discovered_targets",
Help: "Current number of discovered targets.",
},
[]string{"name", "config"},
)
receivedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.",
},
[]string{"name"},
)
delayedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.",
},
[]string{"name"},
)
sentUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.",
},
[]string{"name"},
)
)
func RegisterMetrics() {
prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates)
}
type poolKey struct {
setName string
provider string
@ -92,7 +50,7 @@ type Provider struct {
newSubs map[string]struct{}
}
// Discoverer return the Discoverer of the provider
// Discoverer return the Discoverer of the provider.
func (p *Provider) Discoverer() Discoverer {
return p.d
}
@ -107,7 +65,7 @@ func (p *Provider) Config() interface{} {
}
// NewManager is the Discovery Manager constructor.
func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager {
func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
}
@ -118,10 +76,21 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager
ctx: ctx,
updatert: 5 * time.Second,
triggerSend: make(chan struct{}, 1),
registerer: registerer,
}
for _, option := range options {
option(mgr)
}
// Register the metrics.
// We have to do this after setting all options, so that the name of the Manager is set.
if metrics, err := NewMetrics(registerer, mgr.name); err == nil {
mgr.metrics = metrics
} else {
level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
return nil
}
return mgr
}
@ -170,6 +139,11 @@ type Manager struct {
// lastProvider counts providers registered during Manager's lifetime.
lastProvider uint
// A registerer for all service discovery metrics.
registerer prometheus.Registerer
metrics *Metrics
}
// Providers returns the currently configured SD providers.
@ -200,7 +174,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
}
failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
m.metrics.FailedConfigs.Set(float64(failedCount))
var (
wg sync.WaitGroup
@ -230,13 +204,13 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
// Remove obsolete subs' targets.
if _, ok := prov.newSubs[s]; !ok {
delete(m.targets, poolKey{s, prov.name})
discoveredTargets.DeleteLabelValues(m.name, s)
m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, s)
}
}
// Set metrics and targets for new subs.
for s := range prov.newSubs {
if _, ok := prov.subs[s]; !ok {
discoveredTargets.WithLabelValues(m.name, s).Set(0)
m.metrics.DiscoveredTargets.WithLabelValues(s).Set(0)
}
if l := len(refTargets); l > 0 {
m.targets[poolKey{s, prov.name}] = make(map[string]*targetgroup.Group, l)
@ -316,7 +290,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ
case <-ctx.Done():
return
case tgs, ok := <-updates:
receivedUpdates.WithLabelValues(m.name).Inc()
m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
// Wait for provider cancellation to ensure targets are cleaned up when expected.
@ -349,11 +323,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often, so we throttle these with the ticker.
select {
case <-m.triggerSend:
sentUpdates.WithLabelValues(m.name).Inc()
m.metrics.SentUpdates.Inc()
select {
case m.syncCh <- m.allGroups():
default:
delayedUpdates.WithLabelValues(m.name).Inc()
m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
@ -405,7 +379,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
}
}
for setName, v := range n {
discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v))
m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}
@ -428,6 +402,7 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int {
d, err := cfg.NewDiscoverer(DiscovererOptions{
Logger: log.With(m.logger, "discovery", typ, "config", setName),
HTTPClientOptions: m.httpOpts,
Registerer: m.registerer,
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)

View file

@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -664,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
@ -778,7 +780,8 @@ func pk(provider, setName string, n int) poolKey {
func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -790,27 +793,28 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Equal(t, 1, len(syncedTargets))
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(discoveryManager.targets))
require.Len(t, discoveryManager.targets, 1)
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(discoveryManager.targets))
require.Equal(t, 1, len(syncedTargets))
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -822,12 +826,12 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Equal(t, 1, len(syncedTargets))
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(discoveryManager.targets))
require.Len(t, discoveryManager.targets, 1)
c["prometheus2"] = c["prometheus"]
delete(c, "prometheus")
@ -836,16 +840,17 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
syncedTargets = <-discoveryManager.SyncCh()
p = pk("static", "prometheus2", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(discoveryManager.targets))
require.Equal(t, 1, len(syncedTargets))
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus2"]))
require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -860,30 +865,31 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi
c["prometheus2"] = c["prometheus"]
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Equal(t, 2, len(syncedTargets))
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus2"]))
require.Len(t, syncedTargets["prometheus2"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 2, len(discoveryManager.targets))
require.Len(t, discoveryManager.targets, 2)
delete(c, "prometheus")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
p = pk("static", "prometheus2", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(discoveryManager.targets))
require.Equal(t, 1, len(syncedTargets))
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus2"]))
require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -895,9 +901,9 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
require.Equal(t, 1, len(syncedTargets))
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
var mu sync.Mutex
c["prometheus2"] = Configs{
@ -912,39 +918,40 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
// Original targets should be present as soon as possible.
syncedTargets = <-discoveryManager.SyncCh()
mu.Unlock()
require.Equal(t, 1, len(syncedTargets))
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
// prometheus2 configs should be ready on second sync.
syncedTargets = <-discoveryManager.SyncCh()
require.Equal(t, 2, len(syncedTargets))
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"bar:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus2"]))
require.Len(t, syncedTargets["prometheus2"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
p = pk("lockstatic", "prometheus2", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
require.Equal(t, 2, len(discoveryManager.targets))
require.Len(t, discoveryManager.targets, 2)
// Delete part of config and ensure only original targets exist.
delete(c, "prometheus2")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
require.Equal(t, 1, len(discoveryManager.targets))
require.Len(t, discoveryManager.targets, 1)
verifyPresence(t, discoveryManager.targets, pk("static", "prometheus", 0), "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets))
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
}
func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -959,31 +966,32 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
require.Equal(t, 1, len(discoveryManager.targets))
require.Equal(t, 1, len(syncedTargets))
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"bar:9090\"}", true)
require.Equal(t, 2, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 2)
c["prometheus"] = Configs{
staticConfig("foo:9090"),
}
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
require.Equal(t, 1, len(discoveryManager.targets))
require.Len(t, discoveryManager.targets, 1)
p = pk("static", "prometheus", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", false)
require.Equal(t, 1, len(discoveryManager.targets))
require.Equal(t, 1, len(syncedTargets))
require.Len(t, discoveryManager.targets, 1)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
}
func TestDiscovererConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1001,12 +1009,12 @@ func TestDiscovererConfigs(t *testing.T) {
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
p = pk("static", "prometheus", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"baz:9090\"}", true)
require.Equal(t, 2, len(discoveryManager.targets))
require.Equal(t, 1, len(syncedTargets))
require.Len(t, discoveryManager.targets, 2)
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"bar:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"baz:9090\"}", true)
require.Equal(t, 3, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 3)
}
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
@ -1015,7 +1023,8 @@ func TestDiscovererConfigs(t *testing.T) {
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1029,9 +1038,9 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
syncedTargets := <-discoveryManager.SyncCh()
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets))
require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
c["prometheus"] = Configs{
StaticConfig{{}},
@ -1052,8 +1061,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
if len(group.Targets) != 0 {
t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets))
}
require.Equal(t, 1, len(syncedTargets))
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets, 1)
require.Len(t, syncedTargets["prometheus"], 1)
if lbls := syncedTargets["prometheus"][0].Labels; lbls != nil {
t.Fatalf("Unexpected Group: expected nil Labels, got %v", lbls)
}
@ -1062,7 +1071,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, nil)
discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1082,11 +1092,11 @@ func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
if len(discoveryManager.providers) != 1 {
t.Fatalf("Invalid number of providers: expected 1, got %d", len(discoveryManager.providers))
}
require.Equal(t, 2, len(syncedTargets))
require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus"]))
require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
require.Equal(t, 1, len(syncedTargets["prometheus2"]))
require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
@ -1098,7 +1108,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1144,7 +1155,8 @@ func (s lockStaticDiscoverer) Run(ctx context.Context, up chan<- []*targetgroup.
func TestGaugeFailedConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@ -1158,7 +1170,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount := client_testutil.ToFloat64(failedConfigs)
failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
@ -1169,7 +1181,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
failedCount = client_testutil.ToFloat64(failedConfigs)
failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
@ -1300,7 +1312,8 @@ func TestCoordinationWithReceiver(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
mgr := NewManager(ctx, nil)
mgr := NewManager(ctx, nil, prometheus.NewRegistry())
require.NotNil(t, mgr)
mgr.updatert = updateDelay
go mgr.Run()
@ -1392,10 +1405,11 @@ func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) {
// TestTargetSetTargetGroupsUpdateDuringApplyConfig is used to detect races when
// ApplyConfig happens at the same time as targets update.
func TestTargetSetTargetGroupsUpdateDuringApplyConfig(*testing.T) {
func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()

View file

@ -28,6 +28,7 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -48,7 +49,7 @@ const (
// imageLabel is the label that is used for the docker image running the service.
imageLabel model.LabelName = metaLabelPrefix + "image"
// portIndexLabel is the integer port index when multiple ports are defined;
// e.g. PORT1 would have a value of '1'
// e.g. PORT1 would have a value of '1'.
portIndexLabel model.LabelName = metaLabelPrefix + "port_index"
// taskLabel contains the mesos task name of the app instance.
taskLabel model.LabelName = metaLabelPrefix + "task"
@ -83,7 +84,7 @@ func (*SDConfig) Name() string { return "marathon" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(*c, opts.Logger)
return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -132,7 +133,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Marathon Discovery.
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd")
if err != nil {
return nil, err
@ -154,10 +155,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
appsClient: fetchApps,
}
d.Discovery = refresh.NewDiscovery(
logger,
"marathon",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "marathon",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -21,6 +21,7 @@ import (
"net/http/httptest"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -36,7 +37,7 @@ func testConfig() SDConfig {
}
func testUpdateServices(client appListClient) ([]*targetgroup.Group, error) {
md, err := NewDiscovery(testConfig(), nil)
md, err := NewDiscovery(testConfig(), nil, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@ -129,7 +130,7 @@ func TestMarathonSDSendGroup(t *testing.T) {
}
func TestMarathonSDRemoveApp(t *testing.T) {
md, err := NewDiscovery(testConfig(), nil)
md, err := NewDiscovery(testConfig(), nil, prometheus.NewRegistry())
if err != nil {
t.Fatalf("%s", err)
}

101
discovery/metrics.go Normal file
View file

@ -0,0 +1,101 @@
// Copyright 2016 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package discovery
import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
)
var (
clientGoRequestMetrics = &clientGoRequestMetricAdapter{}
clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{}
)
func init() {
clientGoRequestMetrics.RegisterWithK8sGoClient()
clientGoWorkloadMetrics.RegisterWithK8sGoClient()
}
// Metrics to be used with a discovery manager.
type Metrics struct {
FailedConfigs prometheus.Gauge
DiscoveredTargets *prometheus.GaugeVec
ReceivedUpdates prometheus.Counter
DelayedUpdates prometheus.Counter
SentUpdates prometheus.Counter
}
func NewMetrics(registerer prometheus.Registerer, sdManagerName string) (*Metrics, error) {
m := &Metrics{}
m.FailedConfigs = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "prometheus_sd_failed_configs",
Help: "Current number of service discovery configurations that failed to load.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
m.DiscoveredTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "prometheus_sd_discovered_targets",
Help: "Current number of discovered targets.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
[]string{"config"},
)
m.ReceivedUpdates = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
m.DelayedUpdates = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
m.SentUpdates = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.",
ConstLabels: prometheus.Labels{"name": sdManagerName},
},
)
metrics := []prometheus.Collector{
m.FailedConfigs,
m.DiscoveredTargets,
m.ReceivedUpdates,
m.DelayedUpdates,
m.SentUpdates,
}
for _, collector := range metrics {
err := registerer.Register(collector)
if err != nil {
return nil, fmt.Errorf("failed to register discovery manager metrics: %w", err)
}
}
return m, nil
}

View file

@ -11,10 +11,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
package kubernetes
package discovery
import (
"context"
"fmt"
"net/url"
"time"
@ -23,13 +24,22 @@ import (
"k8s.io/client-go/util/workqueue"
)
const workqueueMetricsNamespace = metricsNamespace + "_workqueue"
// This file registers metrics used by the Kubernetes Go client (k8s.io/client-go).
// Unfortunately, k8s.io/client-go metrics are global.
// If we instantiate multiple k8s SD instances, their k8s/client-go metrics will overlap.
// To prevent us from displaying misleading metrics, we register k8s.io/client-go metrics
// outside of the Kubernetes SD.
const (
KubernetesMetricsNamespace = "prometheus_sd_kubernetes"
workqueueMetricsNamespace = KubernetesMetricsNamespace + "_workqueue"
)
var (
// Metrics for client-go's HTTP requests.
clientGoRequestResultMetricVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Namespace: KubernetesMetricsNamespace,
Name: "http_request_total",
Help: "Total number of HTTP requests to the Kubernetes API by status code.",
},
@ -37,7 +47,7 @@ var (
)
clientGoRequestLatencyMetricVec = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: metricsNamespace,
Namespace: KubernetesMetricsNamespace,
Name: "http_request_duration_seconds",
Help: "Summary of latencies for HTTP requests to the Kubernetes API by endpoint.",
Objectives: map[float64]float64{},
@ -45,7 +55,7 @@ var (
[]string{"endpoint"},
)
// Definition of metrics for client-go workflow metrics provider
// Definition of metrics for client-go workflow metrics provider.
clientGoWorkqueueDepthMetricVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: workqueueMetricsNamespace,
@ -106,20 +116,41 @@ func (noopMetric) Dec() {}
func (noopMetric) Observe(float64) {}
func (noopMetric) Set(float64) {}
// Definition of client-go metrics adapters for HTTP requests observation
// Definition of client-go metrics adapters for HTTP requests observation.
type clientGoRequestMetricAdapter struct{}
func (f *clientGoRequestMetricAdapter) Register(registerer prometheus.Registerer) {
// Returns all of the Prometheus metrics derived from k8s.io/client-go.
// This may be used tu register and unregister the metrics.
func clientGoMetrics() []prometheus.Collector {
return []prometheus.Collector{
clientGoRequestResultMetricVec,
clientGoRequestLatencyMetricVec,
clientGoWorkqueueDepthMetricVec,
clientGoWorkqueueAddsMetricVec,
clientGoWorkqueueLatencyMetricVec,
clientGoWorkqueueUnfinishedWorkSecondsMetricVec,
clientGoWorkqueueLongestRunningProcessorMetricVec,
clientGoWorkqueueWorkDurationMetricVec,
}
}
func RegisterK8sClientMetricsWithPrometheus(registerer prometheus.Registerer) error {
for _, collector := range clientGoMetrics() {
err := registerer.Register(collector)
if err != nil {
return fmt.Errorf("failed to register Kubernetes Go Client metrics: %w", err)
}
}
return nil
}
func (f *clientGoRequestMetricAdapter) RegisterWithK8sGoClient() {
metrics.Register(
metrics.RegisterOpts{
RequestLatency: f,
RequestResult: f,
},
)
registerer.MustRegister(
clientGoRequestResultMetricVec,
clientGoRequestLatencyMetricVec,
)
}
func (clientGoRequestMetricAdapter) Increment(_ context.Context, code, _, _ string) {
@ -130,19 +161,11 @@ func (clientGoRequestMetricAdapter) Observe(_ context.Context, _ string, u url.U
clientGoRequestLatencyMetricVec.WithLabelValues(u.EscapedPath()).Observe(latency.Seconds())
}
// Definition of client-go workqueue metrics provider definition
// Definition of client-go workqueue metrics provider definition.
type clientGoWorkqueueMetricsProvider struct{}
func (f *clientGoWorkqueueMetricsProvider) Register(registerer prometheus.Registerer) {
func (f *clientGoWorkqueueMetricsProvider) RegisterWithK8sGoClient() {
workqueue.SetProvider(f)
registerer.MustRegister(
clientGoWorkqueueDepthMetricVec,
clientGoWorkqueueAddsMetricVec,
clientGoWorkqueueLatencyMetricVec,
clientGoWorkqueueWorkDurationMetricVec,
clientGoWorkqueueUnfinishedWorkSecondsMetricVec,
clientGoWorkqueueLongestRunningProcessorMetricVec,
)
}
func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric {

View file

@ -26,6 +26,7 @@ import (
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -80,7 +81,7 @@ func (*DockerSDConfig) Name() string { return "docker" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *DockerSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDockerDiscovery(c, opts.Logger)
return NewDockerDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -114,7 +115,7 @@ type DockerDiscovery struct {
}
// NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets.
func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger) (*DockerDiscovery, error) {
func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, reg prometheus.Registerer) (*DockerDiscovery, error) {
var err error
d := &DockerDiscovery{
@ -165,10 +166,13 @@ func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger) (*DockerDiscove
}
d.Discovery = refresh.NewDiscovery(
logger,
"docker",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "docker",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -37,19 +38,19 @@ host: %s
var cfg DockerSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
d, err := NewDockerDiscovery(&cfg, log.NewNopLogger())
d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 3, len(tg.Targets))
require.Len(t, tg.Targets, 3)
for i, lbls := range []model.LabelSet{
{

View file

@ -23,6 +23,7 @@ import (
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -74,7 +75,7 @@ func (*DockerSwarmSDConfig) Name() string { return "dockerswarm" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *DockerSwarmSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -117,7 +118,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
var err error
d := &Discovery{
@ -168,10 +169,13 @@ func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger) (*Discovery, err
}
d.Discovery = refresh.NewDiscovery(
logger,
"dockerswarm",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "dockerswarm",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

View file

@ -29,7 +29,7 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
// SDMock is the interface for the DigitalOcean mock
// SDMock is the interface for the DigitalOcean mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -47,12 +47,12 @@ func NewSDMock(t *testing.T, directory string) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)

View file

@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -38,19 +39,19 @@ host: %s
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 5, len(tg.Targets))
require.Len(t, tg.Targets, 5)
for i, lbls := range []model.LabelSet{
{

View file

@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -38,19 +39,19 @@ host: %s
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 15, len(tg.Targets))
require.Len(t, tg.Targets, 15)
for i, lbls := range []model.LabelSet{
{
@ -332,19 +333,19 @@ filters:
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg, "tg should not be nil")
require.NotNil(t, tg.Targets, "tg.targets should not be nil")
require.Equal(t, 4, len(tg.Targets))
require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{

View file

@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -38,19 +39,19 @@ host: %s
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 27, len(tg.Targets))
require.Len(t, tg.Targets, 27)
for i, lbls := range []model.LabelSet{
{

View file

@ -49,27 +49,18 @@ const (
)
// DefaultSDConfig is the default nomad SD configuration.
var (
DefaultSDConfig = SDConfig{
AllowStale: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
Namespace: "default",
RefreshInterval: model.Duration(60 * time.Second),
Region: "global",
Server: "http://localhost:4646",
TagSeparator: ",",
}
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_nomad_failures_total",
Help: "Number of nomad service discovery refresh failures.",
})
)
var DefaultSDConfig = SDConfig{
AllowStale: true,
HTTPClientConfig: config.DefaultHTTPClientConfig,
Namespace: "default",
RefreshInterval: model.Duration(60 * time.Second),
Region: "global",
Server: "http://localhost:4646",
TagSeparator: ",",
}
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for nomad based service discovery.
@ -88,7 +79,7 @@ func (*SDConfig) Name() string { return "nomad" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -121,10 +112,11 @@ type Discovery struct {
region string
server string
tagSeparator string
failuresCount prometheus.Counter
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
allowStale: conf.AllowStale,
namespace: conf.Namespace,
@ -132,6 +124,11 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
region: conf.Region,
server: conf.Server,
tagSeparator: conf.TagSeparator,
failuresCount: prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_nomad_failures_total",
Help: "Number of nomad service discovery refresh failures.",
}),
}
HTTPClient, err := config.NewClientFromConfig(conf.HTTPClientConfig, "nomad_sd")
@ -153,10 +150,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
d.client = client
d.Discovery = refresh.NewDiscovery(
logger,
"nomad",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "nomad",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
Metrics: []prometheus.Collector{d.failuresCount},
},
)
return d, nil
}
@ -167,7 +168,7 @@ func (d *Discovery) refresh(context.Context) ([]*targetgroup.Group, error) {
}
stubs, _, err := d.client.Services().List(opts)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, err
}
@ -179,7 +180,7 @@ func (d *Discovery) refresh(context.Context) ([]*targetgroup.Group, error) {
for _, service := range stub.Services {
instances, _, err := d.client.Services().Get(service.ServiceName, opts)
if err != nil {
failuresCount.Inc()
d.failuresCount.Inc()
return nil, fmt.Errorf("failed to fetch services: %w", err)
}

View file

@ -22,6 +22,7 @@ import (
"testing"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@ -30,7 +31,7 @@ type NomadSDTestSuite struct {
Mock *SDMock
}
// SDMock is the interface for the nomad mock
// SDMock is the interface for the nomad mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -127,7 +128,7 @@ func TestConfiguredService(t *testing.T) {
conf := &SDConfig{
Server: "http://localhost:4646",
}
_, err := NewDiscovery(conf, nil)
_, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
require.NoError(t, err)
}
@ -141,18 +142,18 @@ func TestNomadSDRefresh(t *testing.T) {
cfg := DefaultSDConfig
cfg.Server = endpoint.String()
d, err := NewDiscovery(&cfg, log.NewNopLogger())
d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
tgs, err := d.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 1, len(tg.Targets))
require.Len(t, tg.Targets, 1)
lbls := model.LabelSet{
"__address__": model.LabelValue("127.0.0.1:30456"),

View file

@ -53,12 +53,12 @@ func TestOpenstackSDHypervisorRefresh(t *testing.T) {
hypervisor, _ := mock.openstackAuthSuccess()
ctx := context.Background()
tgs, err := hypervisor.refresh(ctx)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NoError(t, err)
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 2, len(tg.Targets))
require.Len(t, tg.Targets, 2)
for l, v := range map[string]string{
"__address__": "172.16.70.14:0",

View file

@ -145,16 +145,16 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
openstackLabelUserID: model.LabelValue(s.UserID),
}
flavorId, ok := s.Flavor["id"].(string)
flavorID, ok := s.Flavor["id"].(string)
if !ok {
level.Warn(i.logger).Log("msg", "Invalid type for flavor id, expected string")
continue
}
labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorId)
labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID)
imageId, ok := s.Image["id"].(string)
imageID, ok := s.Image["id"].(string)
if ok {
labels[openstackLabelInstanceImage] = model.LabelValue(imageId)
labels[openstackLabelInstanceImage] = model.LabelValue(imageID)
}
for k, v := range s.Metadata {

View file

@ -61,12 +61,12 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
tgs, err := instance.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(tgs))
require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
require.Equal(t, 4, len(tg.Targets))
require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{

View file

@ -20,7 +20,7 @@ import (
"testing"
)
// SDMock is the interface for the OpenStack mock
// SDMock is the interface for the OpenStack mock.
type SDMock struct {
t *testing.T
Server *httptest.Server
@ -34,12 +34,12 @@ func NewSDMock(t *testing.T) *SDMock {
}
}
// Endpoint returns the URI to the mock server
// Endpoint returns the URI to the mock server.
func (m *SDMock) Endpoint() string {
return m.Server.URL + "/"
}
// Setup creates the mock server
// Setup creates the mock server.
func (m *SDMock) Setup() {
m.Mux = http.NewServeMux()
m.Server = httptest.NewServer(m.Mux)
@ -60,7 +60,7 @@ func testHeader(t *testing.T, r *http.Request, header, expected string) {
}
}
// HandleVersionsSuccessfully mocks version call
// HandleVersionsSuccessfully mocks version call.
func (m *SDMock) HandleVersionsSuccessfully() {
m.Mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, `
@ -88,7 +88,7 @@ func (m *SDMock) HandleVersionsSuccessfully() {
})
}
// HandleAuthSuccessfully mocks auth call
// HandleAuthSuccessfully mocks auth call.
func (m *SDMock) HandleAuthSuccessfully() {
m.Mux.HandleFunc("/v3/auth/tokens", func(w http.ResponseWriter, r *http.Request) {
w.Header().Add("X-Subject-Token", tokenID)
@ -236,7 +236,7 @@ const hypervisorListBody = `
]
}`
// HandleHypervisorListSuccessfully mocks os-hypervisors detail call
// HandleHypervisorListSuccessfully mocks os-hypervisors detail call.
func (m *SDMock) HandleHypervisorListSuccessfully() {
m.Mux.HandleFunc("/os-hypervisors/detail", func(w http.ResponseWriter, r *http.Request) {
testMethod(m.t, r, "GET")
@ -533,7 +533,7 @@ const serverListBody = `
}
`
// HandleServerListSuccessfully mocks server detail call
// HandleServerListSuccessfully mocks server detail call.
func (m *SDMock) HandleServerListSuccessfully() {
m.Mux.HandleFunc("/servers/detail", func(w http.ResponseWriter, r *http.Request) {
testMethod(m.t, r, "GET")
@ -572,7 +572,7 @@ const listOutput = `
}
`
// HandleFloatingIPListSuccessfully mocks floating ips call
// HandleFloatingIPListSuccessfully mocks floating ips call.
func (m *SDMock) HandleFloatingIPListSuccessfully() {
m.Mux.HandleFunc("/os-floating-ips", func(w http.ResponseWriter, r *http.Request) {
testMethod(m.t, r, "GET")

View file

@ -24,6 +24,7 @@ import (
"github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack"
"github.com/mwitkow/go-conntrack"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -70,7 +71,7 @@ func (*SDConfig) Name() string { return "openstack" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -134,16 +135,19 @@ type refresher interface {
}
// NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, l log.Logger) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, l log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, l)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
l,
"openstack",
time.Duration(conf.RefreshInterval),
r.refresh,
refresh.Options{
Logger: l,
Mech: "openstack",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: r.refresh,
Registry: reg,
},
), nil
}

View file

@ -47,11 +47,11 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr
targetGroups, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(targetGroups))
require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup)
require.NotNil(t, targetGroup.Targets)
require.Equal(t, 1, len(targetGroup.Targets))
require.Len(t, targetGroup.Targets, 1)
for i, lbls := range []model.LabelSet{
{

View file

@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/ovh/go-ovh/ovh"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -93,7 +94,7 @@ func createClient(config *SDConfig) (*ovh.Client, error) {
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, options.Logger)
return NewDiscovery(c, options.Logger, options.Registerer)
}
func init() {
@ -140,16 +141,19 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) {
}
// NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, logger)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
logger,
"ovhcloud",
time.Duration(conf.RefreshInterval),
r.refresh,
refresh.Options{
Logger: logger,
Mech: "ovhcloud",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: r.refresh,
Registry: reg,
},
), nil
}

View file

@ -18,6 +18,7 @@ import (
"fmt"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@ -122,7 +123,8 @@ func TestDiscoverer(t *testing.T) {
conf, _ := getMockConf("vps")
logger := testutil.NewLogger(t)
_, err := conf.NewDiscoverer(discovery.DiscovererOptions{
Logger: logger,
Logger: logger,
Registerer: prometheus.NewRegistry(),
})
require.NoError(t, err)

View file

@ -49,11 +49,11 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr
targetGroups, err := d.refresh(ctx)
require.NoError(t, err)
require.Equal(t, 1, len(targetGroups))
require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup)
require.NotNil(t, targetGroup.Targets)
require.Equal(t, 1, len(targetGroup.Targets))
require.Len(t, targetGroup.Targets, 1)
for i, lbls := range []model.LabelSet{
{
"__address__": "192.0.2.1",

View file

@ -29,6 +29,7 @@ import (
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -83,7 +84,7 @@ func (*SDConfig) Name() string { return "puppetdb" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
return NewDiscovery(c, opts.Logger)
return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@ -130,7 +131,7 @@ type Discovery struct {
}
// NewDiscovery returns a new PuppetDB discovery for the given config.
func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@ -156,10 +157,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
logger,
"http",
time.Duration(conf.RefreshInterval),
d.refresh,
refresh.Options{
Logger: logger,
Mech: "http",
Interval: time.Duration(conf.RefreshInterval),
RefreshF: d.refresh,
Registry: reg,
},
)
return d, nil
}

Some files were not shown because too many files have changed in this diff Show more