diff --git a/.github/workflows/lock.yml b/.github/workflows/lock.yml
index 3f557a0895..e7e813e3b6 100644
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
if: github.repository_owner == 'prometheus'
steps:
- - uses: dessant/lock-threads@be8aa5be94131386884a6da4189effda9b14aa21 # v4.0.1
+ - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1
with:
process-only: 'issues'
issue-inactive-days: '180'
diff --git a/.golangci.yml b/.golangci.yml
index 4df572c198..166b2e0d48 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -23,6 +23,7 @@ linters:
- nolintlint
- predeclared
- revive
+ - testifylint
- unconvert
- unused
@@ -35,13 +36,9 @@ issues:
- path: _test.go
linters:
- errcheck
- - path: tsdb/
+ - path: "tsdb/head_wal.go"
linters:
- errorlint
- - path: tsdb/
- text: "import 'github.com/pkg/errors' is not allowed"
- linters:
- - depguard
- linters:
- godot
source: "^// ==="
@@ -81,8 +78,55 @@ linters-settings:
gofumpt:
extra-rules: true
revive:
+ # By default, revive will enable only the linting rules that are named in the configuration file.
+ # So, it's needed to explicitly set in configuration all required rules.
+ # The following configuration enables all the rules from the defaults.toml
+ # https://github.com/mgechev/revive/blob/master/defaults.toml
rules:
- # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#unused-parameter
- - name: unused-parameter
- severity: warning
+ # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
+ - name: blank-imports
+ - name: context-as-argument
+ arguments:
+ # allow functions with test or bench signatures
+ - allowTypesBefore: "*testing.T,testing.TB"
+ - name: context-keys-type
+ - name: dot-imports
+ # A lot of false positives: incorrectly identifies channel draining as "empty code block".
+ # See https://github.com/mgechev/revive/issues/386
+ - name: empty-block
disabled: true
+ - name: error-naming
+ - name: error-return
+ - name: error-strings
+ - name: errorf
+ - name: exported
+ - name: increment-decrement
+ - name: indent-error-flow
+ - name: package-comments
+ - name: range
+ - name: receiver-naming
+ - name: redefines-builtin-id
+ - name: superfluous-else
+ - name: time-naming
+ - name: unexported-return
+ - name: unreachable-code
+ - name: unused-parameter
+ disabled: true
+ - name: var-declaration
+ - name: var-naming
+ testifylint:
+ disable:
+ - float-compare
+ - go-require
+ enable:
+ - bool-compare
+ - compares
+ - empty
+ - error-is-as
+ - error-nil
+ - expected-actual
+ - len
+ - require-error
+ - suite-dont-use-pkg
+ - suite-extra-assert-call
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 35f9f77682..71b8c97fe4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,21 +1,16 @@
# Changelog
-## 2.48.0-rc.2 / 2023-11-02
+## unreleased
-* [ENHANCEMENT] Scraping: Add configuration option for tracking staleness of scraped timestamps. #13060
-* [BUGFIX] Storage: Fix crash caused by incorrect mixed samples handling. #13055
-* [BUGFIX] TSDB: Fix compactor failures by adding min time to histogram chunks. #13062
+* [ENHANCEMENT] TSDB: Make the wlog watcher read segments synchronously when not tailing. #13224
+* [BUGFIX] Agent: Participate in notify calls. #13223
-## 2.48.0-rc.1 / 2023-10-24
-
-* [BUGFIX] PromQL: Reduce inefficiency introduced by warnings/annotations and temporarily remove possible non-counter warnings. #13012
-
-## 2.48.0-rc.0 / 2023-10-10
+## 2.48.0 / 2023-11-16
* [CHANGE] Remote-write: respect Retry-After header on 5xx errors. #12677
* [FEATURE] Alerting: Add AWS SigV4 authentication support for Alertmanager endpoints. #12774
* [FEATURE] Promtool: Add support for histograms in the TSDB dump command. #12775
-* [FEATURE] PromQL: Add warnings (and annotations) to PromQL query results. #12152 #12982 #12988
+* [FEATURE] PromQL: Add warnings (and annotations) to PromQL query results. #12152 #12982 #12988 #13012
* [FEATURE] Remote-write: Add Azure AD OAuth authentication support for remote write requests. #12572
* [ENHANCEMENT] Remote-write: Add a header to count retried remote write requests. #12729
* [ENHANCEMENT] TSDB: Improve query performance by re-using iterator when moving between series. #12757
@@ -31,6 +26,7 @@
* [ENHANCEMENT] Scraping: Save memory when scraping by delaying creation of buffer. #12953
* [ENHANCEMENT] Agent: Allow ingestion of out-of-order samples. #12897
* [ENHANCEMENT] Promtool: Improve support for native histograms in TSDB analyze command. #12869
+* [ENHANCEMENT] Scraping: Add configuration option for tracking staleness of scraped timestamps. #13060
* [BUGFIX] SD: Ensure that discovery managers are properly canceled. #10569
* [BUGFIX] TSDB: Fix PostingsForMatchers race with creating new series. #12558
* [BUGFIX] TSDB: Fix handling of explicit counter reset header in histograms. #12772
@@ -40,6 +36,8 @@
* [BUGFIX] Promtool: Fix errors not being reported in check rules command. #12715
* [BUGFIX] TSDB: Avoid panics reported in logs when head initialization takes a long time. #12876
* [BUGFIX] TSDB: Ensure that WBL is repaired when possible. #12406
+* [BUGFIX] Storage: Fix crash caused by incorrect mixed samples handling. #13055
+* [BUGFIX] TSDB: Fix compactor failures by adding min time to histogram chunks. #13062
## 2.47.1 / 2023-10-04
diff --git a/Makefile b/Makefile
index 0dd8673af3..ab229f9311 100644
--- a/Makefile
+++ b/Makefile
@@ -93,9 +93,9 @@ endif
# If we only want to only test go code we have to change the test target
# which is called by all.
ifeq ($(GO_ONLY),1)
-test: common-test
+test: common-test check-go-mod-version
else
-test: common-test ui-build-module ui-test ui-lint
+test: common-test ui-build-module ui-test ui-lint check-go-mod-version
endif
.PHONY: npm_licenses
@@ -138,3 +138,17 @@ bench_tsdb: $(PROMU)
cli-documentation:
$(GO) run ./cmd/prometheus/ --write-documentation > docs/command-line/prometheus.md
$(GO) run ./cmd/promtool/ write-documentation > docs/command-line/promtool.md
+
+.PHONY: check-go-mod-version
+check-go-mod-version:
+ @echo ">> checking go.mod version matching"
+ @./scripts/check-go-mod-version.sh
+
+.PHONY: update-all-go-deps
+update-all-go-deps:
+ @$(MAKE) update-go-deps
+ @echo ">> updating Go dependencies in ./documentation/examples/remote_storage/"
+ @cd ./documentation/examples/remote_storage/ && for m in $$($(GO) list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \
+ $(GO) get -d $$m; \
+ done
+ @cd ./documentation/examples/remote_storage/ && $(GO) mod tidy
diff --git a/Makefile.common b/Makefile.common
index 1338c61a76..f8d26a53ee 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_
SKIP_GOLANGCI_LINT :=
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
-GOLANGCI_LINT_VERSION ?= v1.54.2
+GOLANGCI_LINT_VERSION ?= v1.55.2
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
diff --git a/RELEASE.md b/RELEASE.md
index c39cc824ba..6ab2f63899 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -53,7 +53,8 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.46 | 2023-07-12 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.47 | 2023-08-23 | Bryan Boreham (GitHub: @bboreham) |
| v2.48 | 2023-10-04 | Levi Harrison (GitHub: @LeviHarrison) |
-| v2.49 | 2023-11-15 | **searching for volunteer** |
+| v2.49 | 2023-12-05 | Bartek Plotka (GitHub: @bwplotka) |
+| v2.50 | 2024-01-16 | **searching for volunteer** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@@ -108,14 +109,16 @@ This is also a good time to consider any experimental features and feature
flags for promotion to stable or for deprecation or ultimately removal. Do any
of these in pull requests, one per feature.
+> NOTE: As a validation step check if all security alerts are closed after this step: https://github.com/prometheus/prometheus/security/dependabot. Sometimes it's ok
+> if not critical and e.g. fix is not released yet (or it does not relate to
+> upgrading) or when we are unaffected.
+
#### Manually updating Go dependencies
This is usually only needed for `+incompatible` and `v0.0.0` non-semver updates.
```bash
-make update-go-deps
-git add go.mod go.sum
-git commit -m "Update dependencies"
+make update-all-go-deps
```
#### Manually updating React dependencies
diff --git a/VERSION b/VERSION
index 10429c7ad0..9a9feb0847 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.48.0-rc.2
+2.48.0
diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index 4112cd842b..a8bd2f2b33 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -206,9 +206,15 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "native-histograms":
c.tsdb.EnableNativeHistograms = true
// Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
- config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
- config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultNativeHistogramScrapeProtocols
+ config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
+ config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
level.Info(logger).Log("msg", "Experimental native histogram support enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
+ case "created-timestamp-zero-ingestion":
+ c.scrape.EnableCreatedTimestampZeroIngestion = true
+ // Change relevant global variables. Hacky, but it's hard to pass a new option or default to unmarshallers.
+ config.DefaultConfig.GlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
+ config.DefaultGlobalConfig.ScrapeProtocols = config.DefaultProtoFirstScrapeProtocols
+ level.Info(logger).Log("msg", "Experimental created timestamp zero ingestion enabled. Changed default scrape_protocols to prefer PrometheusProto format.", "global.scrape_protocols", fmt.Sprintf("%v", config.DefaultGlobalConfig.ScrapeProtocols))
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
@@ -614,14 +620,52 @@ func main() {
discoveryManagerNotify discoveryManager
)
+ // Kubernetes client metrics are used by Kubernetes SD.
+ // They are registered here in the main function, because SD mechanisms
+ // can only register metrics specific to a SD instance.
+ // Kubernetes client metrics are the same for the whole process -
+ // they are not specific to an SD instance.
+ err = discovery.RegisterK8sClientMetricsWithPrometheus(prometheus.DefaultRegisterer)
+ if err != nil {
+ level.Error(logger).Log("msg", "failed to register Kubernetes client metrics", "err", err)
+ os.Exit(1)
+ }
if cfg.enableNewSDManager {
- discovery.RegisterMetrics()
- discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape"))
- discoveryManagerNotify = discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), discovery.Name("notify"))
+ {
+ discMgr := discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, discovery.Name("scrape"))
+ if discMgr == nil {
+ level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
+ os.Exit(1)
+ }
+ discoveryManagerScrape = discMgr
+ }
+
+ {
+ discMgr := discovery.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, discovery.Name("notify"))
+ if discMgr == nil {
+ level.Error(logger).Log("msg", "failed to create a discovery manager notify")
+ os.Exit(1)
+ }
+ discoveryManagerNotify = discMgr
+ }
} else {
- legacymanager.RegisterMetrics()
- discoveryManagerScrape = legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), legacymanager.Name("scrape"))
- discoveryManagerNotify = legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), legacymanager.Name("notify"))
+ {
+ discMgr := legacymanager.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), prometheus.DefaultRegisterer, legacymanager.Name("scrape"))
+ if discMgr == nil {
+ level.Error(logger).Log("msg", "failed to create a discovery manager scrape")
+ os.Exit(1)
+ }
+ discoveryManagerScrape = discMgr
+ }
+
+ {
+ discMgr := legacymanager.NewManager(ctxNotify, log.With(logger, "component", "discovery manager notify"), prometheus.DefaultRegisterer, legacymanager.Name("notify"))
+ if discMgr == nil {
+ level.Error(logger).Log("msg", "failed to create a discovery manager notify")
+ os.Exit(1)
+ }
+ discoveryManagerNotify = discMgr
+ }
}
scrapeManager, err := scrape.NewManager(
@@ -1127,6 +1171,7 @@ func main() {
)
localStorage.Set(db, 0)
+ db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
return nil
@@ -1448,6 +1493,10 @@ func (n notReadyAppender) UpdateMetadata(ref storage.SeriesRef, l labels.Labels,
return 0, tsdb.ErrNotReady
}
+func (n notReadyAppender) AppendCTZeroSample(ref storage.SeriesRef, l labels.Labels, t, ct int64) (storage.SeriesRef, error) {
+ return 0, tsdb.ErrNotReady
+}
+
func (n notReadyAppender) Commit() error { return tsdb.ErrNotReady }
func (n notReadyAppender) Rollback() error { return tsdb.ErrNotReady }
@@ -1586,7 +1635,6 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
RetentionDuration: int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
MaxBytes: int64(opts.MaxBytes),
NoLockfile: opts.NoLockfile,
- AllowOverlappingCompaction: true,
WALCompression: wlog.ParseCompressionType(opts.WALCompression, opts.WALCompressionType),
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,
SamplesPerChunk: opts.SamplesPerChunk,
diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go
index e4f831939f..f4fe3855c6 100644
--- a/cmd/prometheus/main_test.go
+++ b/cmd/prometheus/main_test.go
@@ -346,7 +346,7 @@ func getCurrentGaugeValuesFor(t *testing.T, reg prometheus.Gatherer, metricNames
continue
}
- require.Equal(t, 1, len(g.GetMetric()))
+ require.Len(t, g.GetMetric(), 1)
if _, ok := res[m]; ok {
t.Error("expected only one metric family for", m)
t.FailNow()
diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go
index f20f2a22c0..dd6b56672c 100644
--- a/cmd/prometheus/query_log_test.go
+++ b/cmd/prometheus/query_log_test.go
@@ -284,7 +284,7 @@ func (p *queryLogTest) run(t *testing.T) {
if !p.enabledAtStart {
p.query(t)
- require.Equal(t, 0, len(readQueryLog(t, queryLogFile.Name())))
+ require.Empty(t, readQueryLog(t, queryLogFile.Name()))
p.setQueryLog(t, queryLogFile.Name())
p.reloadConfig(t)
}
@@ -309,7 +309,7 @@ func (p *queryLogTest) run(t *testing.T) {
p.query(t)
ql = readQueryLog(t, queryLogFile.Name())
- require.Equal(t, qc, len(ql))
+ require.Len(t, ql, qc)
qc = len(ql)
p.setQueryLog(t, queryLogFile.Name())
@@ -320,7 +320,7 @@ func (p *queryLogTest) run(t *testing.T) {
ql = readQueryLog(t, queryLogFile.Name())
if p.exactQueryCount() {
- require.Equal(t, qc, len(ql))
+ require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
}
@@ -340,7 +340,7 @@ func (p *queryLogTest) run(t *testing.T) {
require.NoError(t, os.Rename(queryLogFile.Name(), newFile.Name()))
ql = readQueryLog(t, newFile.Name())
if p.exactQueryCount() {
- require.Equal(t, qc, len(ql))
+ require.Len(t, ql, qc)
}
p.validateLastQuery(t, ql)
qc = len(ql)
@@ -351,7 +351,7 @@ func (p *queryLogTest) run(t *testing.T) {
ql = readQueryLog(t, newFile.Name())
if p.exactQueryCount() {
- require.Equal(t, qc, len(ql))
+ require.Len(t, ql, qc)
} else {
require.Greater(t, len(ql), qc, "no queries logged")
}
diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go
index f373ebd6e4..7d29690e47 100644
--- a/cmd/promtool/backfill_test.go
+++ b/cmd/promtool/backfill_test.go
@@ -61,7 +61,7 @@ func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMa
func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime, expectedBlockDuration int64, expectedSamples []backfillSample, expectedNumBlocks int) {
blocks := db.Blocks()
- require.Equal(t, expectedNumBlocks, len(blocks), "did not create correct number of blocks")
+ require.Len(t, blocks, expectedNumBlocks, "did not create correct number of blocks")
for i, block := range blocks {
require.Equal(t, block.MinTime()/expectedBlockDuration, (block.MaxTime()-1)/expectedBlockDuration, "block %d contains data outside of one aligned block duration", i)
diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go
index 973795a86e..508b681b88 100644
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@@ -36,6 +36,7 @@ import (
"github.com/google/pprof/profile"
"github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/client_golang/prometheus/testutil/promlint"
config_util "github.com/prometheus/common/config"
@@ -198,6 +199,7 @@ func main() {
testCmd := app.Command("test", "Unit testing.")
testRulesCmd := testCmd.Command("rules", "Unit tests for rules.")
+ testRulesRun := testRulesCmd.Flag("run", "If set, will only run test groups whose names match the regular expression. Can be specified multiple times.").Strings()
testRulesFiles := testRulesCmd.Arg(
"test-rule-file",
"The unit test file.",
@@ -316,7 +318,7 @@ func main() {
switch parsedCmd {
case sdCheckCmd.FullCommand():
- os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort))
+ os.Exit(CheckSD(*sdConfigFile, *sdJobName, *sdTimeout, noDefaultScrapePort, prometheus.DefaultRegisterer))
case checkConfigCmd.FullCommand():
os.Exit(CheckConfig(*agentMode, *checkConfigSyntaxOnly, newLintConfig(*checkConfigLint, *checkConfigLintFatal), *configFiles...))
@@ -366,6 +368,7 @@ func main() {
EnableAtModifier: true,
EnableNegativeOffset: true,
},
+ *testRulesRun,
*testRulesFiles...),
)
@@ -411,7 +414,7 @@ func checkExperimental(f bool) {
}
}
-var lintError = fmt.Errorf("lint error")
+var errLint = fmt.Errorf("lint error")
type lintConfig struct {
all bool
@@ -763,7 +766,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
- hasErrors = hasErrors || !errors.Is(e, lintError)
+ hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
return failed, hasErrors
@@ -776,7 +779,7 @@ func checkRulesFromStdin(ls lintConfig) (bool, bool) {
}
failed = true
for _, err := range errs {
- hasErrors = hasErrors || !errors.Is(err, lintError)
+ hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
@@ -797,7 +800,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
- hasErrors = hasErrors || !errors.Is(e, lintError)
+ hasErrors = hasErrors || !errors.Is(e, errLint)
}
if hasErrors {
continue
@@ -810,7 +813,7 @@ func checkRules(files []string, ls lintConfig) (bool, bool) {
}
failed = true
for _, err := range errs {
- hasErrors = hasErrors || !errors.Is(err, lintError)
+ hasErrors = hasErrors || !errors.Is(err, errLint)
}
} else {
fmt.Printf(" SUCCESS: %d rules found\n", n)
@@ -837,7 +840,7 @@ func checkRuleGroups(rgs *rulefmt.RuleGroups, lintSettings lintConfig) (int, []e
})
}
errMessage += "Might cause inconsistency while recording expressions"
- return 0, []error{fmt.Errorf("%w %s", lintError, errMessage)}
+ return 0, []error{fmt.Errorf("%w %s", errLint, errMessage)}
}
}
diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go
index 09c91f92a5..7306a3e648 100644
--- a/cmd/promtool/main_test.go
+++ b/cmd/promtool/main_test.go
@@ -56,7 +56,7 @@ func TestQueryRange(t *testing.T) {
defer s.Close()
urlObject, err := url.Parse(s.URL)
- require.Equal(t, nil, err)
+ require.NoError(t, err)
p := &promqlPrinter{}
exitCode := QueryRange(urlObject, http.DefaultTransport, map[string]string{}, "up", "0", "300", 0, p)
@@ -79,7 +79,7 @@ func TestQueryInstant(t *testing.T) {
defer s.Close()
urlObject, err := url.Parse(s.URL)
- require.Equal(t, nil, err)
+ require.NoError(t, err)
p := &promqlPrinter{}
exitCode := QueryInstant(urlObject, http.DefaultTransport, "up", "300", p)
diff --git a/cmd/promtool/rules_test.go b/cmd/promtool/rules_test.go
index 1c06982880..75aad67864 100644
--- a/cmd/promtool/rules_test.go
+++ b/cmd/promtool/rules_test.go
@@ -91,13 +91,13 @@ func TestBackfillRuleIntegration(t *testing.T) {
for _, err := range errs {
require.NoError(t, err)
}
- require.Equal(t, 3, len(ruleImporter.groups))
+ require.Len(t, ruleImporter.groups, 3)
group1 := ruleImporter.groups[path1+";group0"]
require.NotNil(t, group1)
const defaultInterval = 60
require.Equal(t, defaultInterval*time.Second, group1.Interval())
gRules := group1.Rules()
- require.Equal(t, 1, len(gRules))
+ require.Len(t, gRules, 1)
require.Equal(t, "rule1", gRules[0].Name())
require.Equal(t, "ruleExpr", gRules[0].Query().String())
require.Equal(t, 1, gRules[0].Labels().Len())
@@ -106,7 +106,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
require.NotNil(t, group2)
require.Equal(t, defaultInterval*time.Second, group2.Interval())
g2Rules := group2.Rules()
- require.Equal(t, 2, len(g2Rules))
+ require.Len(t, g2Rules, 2)
require.Equal(t, "grp2_rule1", g2Rules[0].Name())
require.Equal(t, "grp2_rule1_expr", g2Rules[0].Query().String())
require.Equal(t, 0, g2Rules[0].Labels().Len())
@@ -122,7 +122,7 @@ func TestBackfillRuleIntegration(t *testing.T) {
require.NoError(t, err)
blocks := db.Blocks()
- require.Equal(t, (i+1)*tt.expectedBlockCount, len(blocks))
+ require.Len(t, blocks, (i+1)*tt.expectedBlockCount)
q, err := db.Querier(math.MinInt64, math.MaxInt64)
require.NoError(t, err)
diff --git a/cmd/promtool/sd.go b/cmd/promtool/sd.go
index 7c5ae70365..155152e1ac 100644
--- a/cmd/promtool/sd.go
+++ b/cmd/promtool/sd.go
@@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
@@ -37,7 +38,7 @@ type sdCheckResult struct {
}
// CheckSD performs service discovery for the given job name and reports the results.
-func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool) int {
+func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefaultScrapePort bool, registerer prometheus.Registerer) int {
logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))
cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
@@ -77,7 +78,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration, noDefault
defer cancel()
for _, cfg := range scrapeConfig.ServiceDiscoveryConfigs {
- d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger})
+ d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger, Registerer: registerer})
if err != nil {
fmt.Fprintln(os.Stderr, "Could not create new discoverer", err)
return failureExitCode
diff --git a/cmd/promtool/sd_test.go b/cmd/promtool/sd_test.go
index 82bc9947f7..2f4d3aba7d 100644
--- a/cmd/promtool/sd_test.go
+++ b/cmd/promtool/sd_test.go
@@ -35,7 +35,7 @@ func TestSDCheckResult(t *testing.T) {
}}
reg, err := relabel.NewRegexp("(.*)")
- require.Nil(t, err)
+ require.NoError(t, err)
scrapeConfig := &config.ScrapeConfig{
ScrapeInterval: model.Duration(1 * time.Minute),
diff --git a/cmd/promtool/testdata/rules_run.yml b/cmd/promtool/testdata/rules_run.yml
new file mode 100644
index 0000000000..1905d89fa4
--- /dev/null
+++ b/cmd/promtool/testdata/rules_run.yml
@@ -0,0 +1,30 @@
+rule_files:
+ - rules.yml
+
+evaluation_interval: 1m
+
+# Minimal test cases to check focus on a rule group.
+tests:
+ - name: correct test
+ input_series:
+ - series: test
+ values: 1
+
+ promql_expr_test:
+ - expr: test
+ eval_time: 0
+ exp_samples:
+ - value: 1
+ labels: test
+
+ - name: wrong test
+ input_series:
+ - series: test
+ values: 0
+
+ promql_expr_test:
+ - expr: test
+ eval_time: 0
+ exp_samples:
+ - value: 1
+ labels: test
diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go
index d37e03e52b..a25a8596d4 100644
--- a/cmd/promtool/unittest.go
+++ b/cmd/promtool/unittest.go
@@ -26,6 +26,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/grafana/regexp"
"github.com/prometheus/common/model"
"gopkg.in/yaml.v2"
@@ -39,11 +40,16 @@ import (
// RulesUnitTest does unit testing of rules based on the unit testing files provided.
// More info about the file format can be found in the docs.
-func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
+func RulesUnitTest(queryOpts promql.LazyLoaderOpts, runStrings []string, files ...string) int {
failed := false
+ var run *regexp.Regexp
+ if runStrings != nil {
+ run = regexp.MustCompile(strings.Join(runStrings, "|"))
+ }
+
for _, f := range files {
- if errs := ruleUnitTest(f, queryOpts); errs != nil {
+ if errs := ruleUnitTest(f, queryOpts, run); errs != nil {
fmt.Fprintln(os.Stderr, " FAILED:")
for _, e := range errs {
fmt.Fprintln(os.Stderr, e.Error())
@@ -61,7 +67,7 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
return successExitCode
}
-func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
+func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts, run *regexp.Regexp) []error {
fmt.Println("Unit Testing: ", filename)
b, err := os.ReadFile(filename)
@@ -96,6 +102,10 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
// Testing.
var errs []error
for _, t := range unitTestInp.Tests {
+ if !matchesRun(t.TestGroupName, run) {
+ continue
+ }
+
if t.Interval == 0 {
t.Interval = unitTestInp.EvaluationInterval
}
@@ -111,6 +121,14 @@ func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
return nil
}
+func matchesRun(name string, run *regexp.Regexp) bool {
+ if run == nil {
+ return true
+ }
+
+ return run.MatchString(name)
+}
+
// unitTestFile holds the contents of a single unit test file.
type unitTestFile struct {
RuleFiles []string `yaml:"rule_files"`
diff --git a/cmd/promtool/unittest_test.go b/cmd/promtool/unittest_test.go
index c96883113a..fb4012e3c1 100644
--- a/cmd/promtool/unittest_test.go
+++ b/cmd/promtool/unittest_test.go
@@ -125,7 +125,60 @@ func TestRulesUnitTest(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- if got := RulesUnitTest(tt.queryOpts, tt.args.files...); got != tt.want {
+ if got := RulesUnitTest(tt.queryOpts, nil, tt.args.files...); got != tt.want {
+ t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestRulesUnitTestRun(t *testing.T) {
+ type args struct {
+ run []string
+ files []string
+ }
+ tests := []struct {
+ name string
+ args args
+ queryOpts promql.LazyLoaderOpts
+ want int
+ }{
+ {
+ name: "Test all without run arg",
+ args: args{
+ run: nil,
+ files: []string{"./testdata/rules_run.yml"},
+ },
+ want: 1,
+ },
+ {
+ name: "Test all with run arg",
+ args: args{
+ run: []string{"correct", "wrong"},
+ files: []string{"./testdata/rules_run.yml"},
+ },
+ want: 1,
+ },
+ {
+ name: "Test correct",
+ args: args{
+ run: []string{"correct"},
+ files: []string{"./testdata/rules_run.yml"},
+ },
+ want: 0,
+ },
+ {
+ name: "Test wrong",
+ args: args{
+ run: []string{"wrong"},
+ files: []string{"./testdata/rules_run.yml"},
+ },
+ want: 1,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := RulesUnitTest(tt.queryOpts, tt.args.run, tt.args.files...); got != tt.want {
t.Errorf("RulesUnitTest() = %v, want %v", got, tt.want)
}
})
diff --git a/config/config.go b/config/config.go
index b832ac9a17..ddcca84dc7 100644
--- a/config/config.go
+++ b/config/config.go
@@ -454,12 +454,19 @@ var (
OpenMetricsText1_0_0: "application/openmetrics-text;version=1.0.0",
}
+ // DefaultScrapeProtocols is the set of scrape protocols that will be proposed
+ // to scrape target, ordered by priority.
DefaultScrapeProtocols = []ScrapeProtocol{
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
PrometheusText0_0_4,
}
- DefaultNativeHistogramScrapeProtocols = []ScrapeProtocol{
+
+ // DefaultProtoFirstScrapeProtocols is like DefaultScrapeProtocols, but it
+ // favors protobuf Prometheus exposition format.
+ // Used by default for certain feature-flags like
+ // "native-histograms" and "created-timestamp-zero-ingestion".
+ DefaultProtoFirstScrapeProtocols = []ScrapeProtocol{
PrometheusProto,
OpenMetricsText1_0_0,
OpenMetricsText0_0_1,
diff --git a/config/config_test.go b/config/config_test.go
index 408622cd5a..e614a44637 100644
--- a/config/config_test.go
+++ b/config/config_test.go
@@ -568,6 +568,7 @@ var expectedConf = &Config{
ServiceDiscoveryConfigs: discovery.Configs{
&xds.KumaSDConfig{
Server: "http://kuma-control-plane.kuma-system.svc:5676",
+ ClientID: "main-prometheus",
HTTPClientConfig: config.DefaultHTTPClientConfig,
RefreshInterval: model.Duration(15 * time.Second),
FetchTimeout: model.Duration(2 * time.Minute),
@@ -1457,8 +1458,8 @@ func TestRemoteWriteRetryOnRateLimit(t *testing.T) {
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
- require.Equal(t, true, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
- require.Equal(t, false, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
+ require.True(t, got.RemoteWriteConfigs[0].QueueConfig.RetryOnRateLimit)
+ require.False(t, got.RemoteWriteConfigs[1].QueueConfig.RetryOnRateLimit)
}
func TestLoadConfig(t *testing.T) {
@@ -1475,9 +1476,9 @@ func TestLoadConfig(t *testing.T) {
func TestScrapeIntervalLarger(t *testing.T) {
c, err := LoadFile("testdata/scrape_interval_larger.good.yml", false, false, log.NewNopLogger())
require.NoError(t, err)
- require.Equal(t, 1, len(c.ScrapeConfigs))
+ require.Len(t, c.ScrapeConfigs, 1)
for _, sc := range c.ScrapeConfigs {
- require.Equal(t, true, sc.ScrapeInterval >= sc.ScrapeTimeout)
+ require.GreaterOrEqual(t, sc.ScrapeInterval, sc.ScrapeTimeout)
}
}
@@ -1493,7 +1494,7 @@ func TestElideSecrets(t *testing.T) {
yamlConfig := string(config)
matches := secretRe.FindAllStringIndex(yamlConfig, -1)
- require.Equal(t, 22, len(matches), "wrong number of secret matches found")
+ require.Len(t, matches, 22, "wrong number of secret matches found")
require.NotContains(t, yamlConfig, "mysecret",
"yaml marshal reveals authentication credentials.")
}
@@ -2063,7 +2064,7 @@ func TestAgentMode(t *testing.T) {
c, err := LoadFile("testdata/agent_mode.without_remote_writes.yml", true, false, log.NewNopLogger())
require.NoError(t, err)
- require.Len(t, c.RemoteWriteConfigs, 0)
+ require.Empty(t, c.RemoteWriteConfigs)
c, err = LoadFile("testdata/agent_mode.good.yml", true, false, log.NewNopLogger())
require.NoError(t, err)
@@ -2257,5 +2258,5 @@ func TestScrapeConfigDisableCompression(t *testing.T) {
got := &Config{}
require.NoError(t, yaml.UnmarshalStrict(out, got))
- require.Equal(t, false, got.ScrapeConfigs[0].EnableCompression)
+ require.False(t, got.ScrapeConfigs[0].EnableCompression)
}
diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml
index e034eff431..b584301649 100644
--- a/config/testdata/conf.good.yml
+++ b/config/testdata/conf.good.yml
@@ -221,6 +221,7 @@ scrape_configs:
kuma_sd_configs:
- server: http://kuma-control-plane.kuma-system.svc:5676
+ client_id: main-prometheus
- job_name: service-marathon
marathon_sd_configs:
diff --git a/config/testdata/roundtrip.good.yml b/config/testdata/roundtrip.good.yml
index f2634d257a..24ab7d2592 100644
--- a/config/testdata/roundtrip.good.yml
+++ b/config/testdata/roundtrip.good.yml
@@ -108,6 +108,7 @@ scrape_configs:
kuma_sd_configs:
- server: http://kuma-control-plane.kuma-system.svc:5676
+ client_id: main-prometheus
marathon_sd_configs:
- servers:
diff --git a/discovery/README.md b/discovery/README.md
index 19b579b399..4c06608625 100644
--- a/discovery/README.md
+++ b/discovery/README.md
@@ -234,6 +234,11 @@ type Config interface {
type DiscovererOptions struct {
Logger log.Logger
+
+ // A registerer for the Discoverer's metrics.
+ Registerer prometheus.Registerer
+
+ HTTPClientOptions []config.HTTPClientOption
}
```
diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go
index 64c8fdce63..40e6e7cb79 100644
--- a/discovery/aws/ec2.go
+++ b/discovery/aws/ec2.go
@@ -30,6 +30,7 @@ import (
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -101,7 +102,7 @@ func (*EC2SDConfig) Name() string { return "ec2" }
// NewDiscoverer returns a Discoverer for the EC2 Config.
func (c *EC2SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewEC2Discovery(c, opts.Logger), nil
+ return NewEC2Discovery(c, opts.Logger, opts.Registerer), nil
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the EC2 Config.
@@ -147,7 +148,7 @@ type EC2Discovery struct {
}
// NewEC2Discovery returns a new EC2Discovery which periodically refreshes its targets.
-func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery {
+func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger, reg prometheus.Registerer) *EC2Discovery {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -156,10 +157,13 @@ func NewEC2Discovery(conf *EC2SDConfig, logger log.Logger) *EC2Discovery {
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "ec2",
- time.Duration(d.cfg.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "ec2",
+ Interval: time.Duration(d.cfg.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d
}
diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go
index c0198d6a77..5382ea0159 100644
--- a/discovery/aws/lightsail.go
+++ b/discovery/aws/lightsail.go
@@ -29,6 +29,7 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/lightsail"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -84,7 +85,7 @@ func (*LightsailSDConfig) Name() string { return "lightsail" }
// NewDiscoverer returns a Discoverer for the Lightsail Config.
func (c *LightsailSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewLightsailDiscovery(c, opts.Logger), nil
+ return NewLightsailDiscovery(c, opts.Logger, opts.Registerer), nil
}
// UnmarshalYAML implements the yaml.Unmarshaler interface for the Lightsail Config.
@@ -121,7 +122,7 @@ type LightsailDiscovery struct {
}
// NewLightsailDiscovery returns a new LightsailDiscovery which periodically refreshes its targets.
-func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *LightsailDiscovery {
+func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger, reg prometheus.Registerer) *LightsailDiscovery {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -129,10 +130,13 @@ func NewLightsailDiscovery(conf *LightsailSDConfig, logger log.Logger) *Lightsai
cfg: conf,
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "lightsail",
- time.Duration(d.cfg.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "lightsail",
+ Interval: time.Duration(d.cfg.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d
}
diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go
index faccadcf85..fa198c320f 100644
--- a/discovery/azure/azure.go
+++ b/discovery/azure/azure.go
@@ -28,9 +28,10 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
+ "github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
- "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4"
- "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v2"
+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4"
cache "github.com/Code-Hex/go-generics-cache"
"github.com/Code-Hex/go-generics-cache/policy/lru"
"github.com/go-kit/log"
@@ -78,17 +79,6 @@ var (
AuthenticationMethod: authMethodOAuth,
HTTPClientConfig: config_util.DefaultHTTPClientConfig,
}
-
- failuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_azure_failures_total",
- Help: "Number of Azure service discovery refresh failures.",
- })
- cacheHitCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_azure_cache_hit_total",
- Help: "Number of cache hit during refresh.",
- })
)
var environments = map[string]cloud.Configuration{
@@ -105,7 +95,7 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) {
name = strings.ToUpper(name)
env, ok := environments[name]
if !ok {
- return env, fmt.Errorf("There is no cloud configuration matching the name %q", name)
+ return env, fmt.Errorf("there is no cloud configuration matching the name %q", name)
}
return env, nil
@@ -113,8 +103,6 @@ func CloudConfigurationFromName(name string) (cloud.Configuration, error) {
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(failuresCount)
- prometheus.MustRegister(cacheHitCount)
}
// SDConfig is the configuration for Azure based service discovery.
@@ -137,7 +125,7 @@ func (*SDConfig) Name() string { return "azure" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger), nil
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
func validateAuthParam(param, name string) error {
@@ -180,14 +168,16 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type Discovery struct {
*refresh.Discovery
- logger log.Logger
- cfg *SDConfig
- port int
- cache *cache.Cache[string, *armnetwork.Interface]
+ logger log.Logger
+ cfg *SDConfig
+ port int
+ cache *cache.Cache[string, *armnetwork.Interface]
+ failuresCount prometheus.Counter
+ cacheHitCount prometheus.Counter
}
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
-func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
+func NewDiscovery(cfg *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -197,16 +187,30 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
port: cfg.Port,
logger: logger,
cache: l,
+ failuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_azure_failures_total",
+ Help: "Number of Azure service discovery refresh failures.",
+ }),
+ cacheHitCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_azure_cache_hit_total",
+ Help: "Number of cache hit during refresh.",
+ }),
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "azure",
- time.Duration(cfg.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "azure",
+ Interval: time.Duration(cfg.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ Metrics: []prometheus.Collector{d.failuresCount, d.cacheHitCount},
+ },
)
- return d
+ return d, nil
}
// azureClient represents multiple Azure Resource Manager providers.
@@ -304,6 +308,7 @@ type virtualMachine struct {
Location string
OsType string
ScaleSet string
+ InstanceID string
Tags map[string]*string
NetworkInterfaces []string
Size string
@@ -328,14 +333,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
client, err := createAzureClient(*d.cfg)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("could not create Azure client: %w", err)
}
client.logger = d.logger
machines, err := client.getVMs(ctx, d.cfg.ResourceGroup)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machines: %w", err)
}
@@ -344,14 +349,14 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
// Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets(ctx, d.cfg.ResourceGroup)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machine scale sets: %w", err)
}
for _, scaleSet := range scaleSets {
scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("could not get virtual machine scale set vms: %w", err)
}
machines = append(machines, scaleSetVms...)
@@ -402,9 +407,13 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
var networkInterface *armnetwork.Interface
if v, ok := d.getFromCache(nicID); ok {
networkInterface = v
- cacheHitCount.Add(1)
+ d.cacheHitCount.Add(1)
} else {
- networkInterface, err = client.getNetworkInterfaceByID(ctx, nicID)
+ if vm.ScaleSet == "" {
+ networkInterface, err = client.getVMNetworkInterfaceByID(ctx, nicID)
+ } else {
+ networkInterface, err = client.getVMScaleSetVMNetworkInterfaceByID(ctx, nicID, vm.ScaleSet, vm.InstanceID)
+ }
if err != nil {
if errors.Is(err, errorNotFound) {
level.Warn(d.logger).Log("msg", "Network interface does not exist", "name", nicID, "err", err)
@@ -461,7 +470,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
var tg targetgroup.Group
for tgt := range ch {
if tgt.err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("unable to complete Azure service discovery: %w", tgt.err)
}
if tgt.labelSet != nil {
@@ -622,6 +631,7 @@ func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName st
Location: *(vm.Location),
OsType: osType,
ScaleSet: scaleSetName,
+ InstanceID: *(vm.InstanceID),
Tags: tags,
NetworkInterfaces: networkInterfaces,
Size: size,
@@ -630,21 +640,41 @@ func mapFromVMScaleSetVM(vm armcompute.VirtualMachineScaleSetVM, scaleSetName st
var errorNotFound = errors.New("network interface does not exist")
-// getNetworkInterfaceByID gets the network interface.
+// getVMNetworkInterfaceByID gets the network interface.
// If a 404 is returned from the Azure API, `errorNotFound` is returned.
-func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
+func (client *azureClient) getVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*armnetwork.Interface, error) {
r, err := newAzureResourceFromID(networkInterfaceID, client.logger)
if err != nil {
return nil, fmt.Errorf("could not parse network interface ID: %w", err)
}
- resp, err := client.nic.Get(ctx, r.ResourceGroupName, r.Name, nil)
+ resp, err := client.nic.Get(ctx, r.ResourceGroupName, r.Name, &armnetwork.InterfacesClientGetOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")})
if err != nil {
var responseError *azcore.ResponseError
if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound {
return nil, errorNotFound
}
- return nil, fmt.Errorf("Failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
+ return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
+ }
+
+ return &resp.Interface, nil
+}
+
+// getVMScaleSetVMNetworkInterfaceByID gets the network interface.
+// If a 404 is returned from the Azure API, `errorNotFound` is returned.
+func (client *azureClient) getVMScaleSetVMNetworkInterfaceByID(ctx context.Context, networkInterfaceID, scaleSetName, instanceID string) (*armnetwork.Interface, error) {
+ r, err := newAzureResourceFromID(networkInterfaceID, client.logger)
+ if err != nil {
+ return nil, fmt.Errorf("could not parse network interface ID: %w", err)
+ }
+
+ resp, err := client.nic.GetVirtualMachineScaleSetNetworkInterface(ctx, r.ResourceGroupName, scaleSetName, instanceID, r.Name, &armnetwork.InterfacesClientGetVirtualMachineScaleSetNetworkInterfaceOptions{Expand: to.Ptr("IPConfigurations/PublicIPAddress")})
+ if err != nil {
+ var responseError *azcore.ResponseError
+ if errors.As(err, &responseError) && responseError.StatusCode == http.StatusNotFound {
+ return nil, errorNotFound
+ }
+ return nil, fmt.Errorf("failed to retrieve Interface %v with error: %w", networkInterfaceID, err)
}
return &resp.Interface, nil
diff --git a/discovery/azure/azure_test.go b/discovery/azure/azure_test.go
index 6c3ec236b3..4ff937e0bc 100644
--- a/discovery/azure/azure_test.go
+++ b/discovery/azure/azure_test.go
@@ -17,7 +17,7 @@ import (
"testing"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/arm"
- "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v4"
+ "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
)
@@ -142,6 +142,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
+ instanceID := "123"
location := "westeurope"
computerName := "computer_name"
networkProfile := armcompute.NetworkProfile{
@@ -166,6 +167,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
ID: &id,
Name: &name,
Type: &vmType,
+ InstanceID: &instanceID,
Location: &location,
Tags: nil,
Properties: properties,
@@ -182,6 +184,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
Tags: map[string]*string{},
NetworkInterfaces: []string{},
ScaleSet: scaleSet,
+ InstanceID: instanceID,
Size: size,
}
@@ -197,6 +200,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
vmSize := armcompute.VirtualMachineSizeTypes(size)
osType := armcompute.OperatingSystemTypesLinux
vmType := "type"
+ instanceID := "123"
location := "westeurope"
computerName := "computer_name"
tags := map[string]*string{
@@ -224,6 +228,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
ID: &id,
Name: &name,
Type: &vmType,
+ InstanceID: &instanceID,
Location: &location,
Tags: tags,
Properties: properties,
@@ -240,6 +245,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
Tags: tags,
NetworkInterfaces: []string{},
ScaleSet: scaleSet,
+ InstanceID: instanceID,
Size: size,
}
@@ -269,7 +275,7 @@ func TestNewAzureResourceFromID(t *testing.T) {
},
} {
actual, err := newAzureResourceFromID(tc.id, nil)
- require.Nil(t, err)
+ require.NoError(t, err)
require.Equal(t, tc.expected.Name, actual.Name)
require.Equal(t, tc.expected.ResourceGroupName, actual.ResourceGroupName)
}
diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go
index b4cb152297..50f171a78a 100644
--- a/discovery/consul/consul.go
+++ b/discovery/consul/consul.go
@@ -71,41 +71,18 @@ const (
namespace = "prometheus"
)
-var (
- rpcFailuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Namespace: namespace,
- Name: "sd_consul_rpc_failures_total",
- Help: "The number of Consul RPC call failures.",
- })
- rpcDuration = prometheus.NewSummaryVec(
- prometheus.SummaryOpts{
- Namespace: namespace,
- Name: "sd_consul_rpc_duration_seconds",
- Help: "The duration of a Consul RPC call in seconds.",
- Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
- },
- []string{"endpoint", "call"},
- )
-
- // Initialize metric vectors.
- servicesRPCDuration = rpcDuration.WithLabelValues("catalog", "services")
- serviceRPCDuration = rpcDuration.WithLabelValues("catalog", "service")
-
- // DefaultSDConfig is the default Consul SD configuration.
- DefaultSDConfig = SDConfig{
- TagSeparator: ",",
- Scheme: "http",
- Server: "localhost:8500",
- AllowStale: true,
- RefreshInterval: model.Duration(30 * time.Second),
- HTTPClientConfig: config.DefaultHTTPClientConfig,
- }
-)
+// DefaultSDConfig is the default Consul SD configuration.
+var DefaultSDConfig = SDConfig{
+ TagSeparator: ",",
+ Scheme: "http",
+ Server: "localhost:8500",
+ AllowStale: true,
+ RefreshInterval: model.Duration(30 * time.Second),
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+}
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(rpcFailuresCount, rpcDuration)
}
// SDConfig is the configuration for Consul service discovery.
@@ -147,7 +124,7 @@ func (*SDConfig) Name() string { return "consul" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -184,22 +161,27 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// Discovery retrieves target information from a Consul server
// and updates them via watches.
type Discovery struct {
- client *consul.Client
- clientDatacenter string
- clientNamespace string
- clientPartition string
- tagSeparator string
- watchedServices []string // Set of services which will be discovered.
- watchedTags []string // Tags used to filter instances of a service.
- watchedNodeMeta map[string]string
- allowStale bool
- refreshInterval time.Duration
- finalizer func()
- logger log.Logger
+ client *consul.Client
+ clientDatacenter string
+ clientNamespace string
+ clientPartition string
+ tagSeparator string
+ watchedServices []string // Set of services which will be discovered.
+ watchedTags []string // Tags used to filter instances of a service.
+ watchedNodeMeta map[string]string
+ allowStale bool
+ refreshInterval time.Duration
+ finalizer func()
+ logger log.Logger
+ rpcFailuresCount prometheus.Counter
+ rpcDuration *prometheus.SummaryVec
+ servicesRPCDuration prometheus.Observer
+ serviceRPCDuration prometheus.Observer
+ metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a new Discovery for the given config.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -237,7 +219,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
clientPartition: conf.Partition,
finalizer: wrapper.CloseIdleConnections,
logger: logger,
+ rpcFailuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: namespace,
+ Name: "sd_consul_rpc_failures_total",
+ Help: "The number of Consul RPC call failures.",
+ }),
+ rpcDuration: prometheus.NewSummaryVec(
+ prometheus.SummaryOpts{
+ Namespace: namespace,
+ Name: "sd_consul_rpc_duration_seconds",
+ Help: "The duration of a Consul RPC call in seconds.",
+ Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
+ },
+ []string{"endpoint", "call"},
+ ),
}
+
+ cd.metricRegisterer = discovery.NewMetricRegisterer(
+ reg,
+ []prometheus.Collector{
+ cd.rpcFailuresCount,
+ cd.rpcDuration,
+ },
+ )
+
+ // Initialize metric vectors.
+ cd.servicesRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "services")
+ cd.serviceRPCDuration = cd.rpcDuration.WithLabelValues("catalog", "service")
+
return cd, nil
}
@@ -293,7 +303,7 @@ func (d *Discovery) getDatacenter() error {
info, err := d.client.Agent().Self()
if err != nil {
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
- rpcFailuresCount.Inc()
+ d.rpcFailuresCount.Inc()
return err
}
@@ -334,6 +344,13 @@ func (d *Discovery) initialize(ctx context.Context) {
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
+ err := d.metricRegisterer.RegisterMetrics()
+ if err != nil {
+ level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
+ return
+ }
+ defer d.metricRegisterer.UnregisterMetrics()
+
if d.finalizer != nil {
defer d.finalizer()
}
@@ -382,7 +399,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
t0 := time.Now()
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
elapsed := time.Since(t0)
- servicesRPCDuration.Observe(elapsed.Seconds())
+ d.servicesRPCDuration.Observe(elapsed.Seconds())
// Check the context before in order to exit early.
select {
@@ -393,7 +410,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
if err != nil {
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
- rpcFailuresCount.Inc()
+ d.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
}
@@ -449,13 +466,15 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
// consulService contains data belonging to the same service.
type consulService struct {
- name string
- tags []string
- labels model.LabelSet
- discovery *Discovery
- client *consul.Client
- tagSeparator string
- logger log.Logger
+ name string
+ tags []string
+ labels model.LabelSet
+ discovery *Discovery
+ client *consul.Client
+ tagSeparator string
+ logger log.Logger
+ rpcFailuresCount prometheus.Counter
+ serviceRPCDuration prometheus.Observer
}
// Start watching a service.
@@ -469,8 +488,10 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
serviceLabel: model.LabelValue(name),
datacenterLabel: model.LabelValue(d.clientDatacenter),
},
- tagSeparator: d.tagSeparator,
- logger: d.logger,
+ tagSeparator: d.tagSeparator,
+ logger: d.logger,
+ rpcFailuresCount: d.rpcFailuresCount,
+ serviceRPCDuration: d.serviceRPCDuration,
}
go func() {
@@ -508,7 +529,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
t0 := time.Now()
serviceNodes, meta, err := health.ServiceMultipleTags(srv.name, srv.tags, false, opts.WithContext(ctx))
elapsed := time.Since(t0)
- serviceRPCDuration.Observe(elapsed.Seconds())
+ srv.serviceRPCDuration.Observe(elapsed.Seconds())
// Check the context before in order to exit early.
select {
@@ -520,7 +541,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
if err != nil {
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err)
- rpcFailuresCount.Inc()
+ srv.rpcFailuresCount.Inc()
time.Sleep(retryInterval)
return
}
diff --git a/discovery/consul/consul_test.go b/discovery/consul/consul_test.go
index c929601638..97cb8fbc9d 100644
--- a/discovery/consul/consul_test.go
+++ b/discovery/consul/consul_test.go
@@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -39,7 +40,7 @@ func TestConfiguredService(t *testing.T) {
conf := &SDConfig{
Services: []string{"configuredServiceName"},
}
- consulDiscovery, err := NewDiscovery(conf, nil)
+ consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@@ -56,7 +57,7 @@ func TestConfiguredServiceWithTag(t *testing.T) {
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http"},
}
- consulDiscovery, err := NewDiscovery(conf, nil)
+ consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@@ -151,7 +152,7 @@ func TestConfiguredServiceWithTags(t *testing.T) {
}
for _, tc := range cases {
- consulDiscovery, err := NewDiscovery(tc.conf, nil)
+ consulDiscovery, err := NewDiscovery(tc.conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@@ -165,7 +166,7 @@ func TestConfiguredServiceWithTags(t *testing.T) {
func TestNonConfiguredService(t *testing.T) {
conf := &SDConfig{}
- consulDiscovery, err := NewDiscovery(conf, nil)
+ consulDiscovery, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
@@ -262,19 +263,19 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) {
func newDiscovery(t *testing.T, config *SDConfig) *Discovery {
logger := log.NewNopLogger()
- d, err := NewDiscovery(config, logger)
+ d, err := NewDiscovery(config, logger, prometheus.NewRegistry())
require.NoError(t, err)
return d
}
func checkOneTarget(t *testing.T, tg []*targetgroup.Group) {
- require.Equal(t, 1, len(tg))
+ require.Len(t, tg, 1)
target := tg[0]
require.Equal(t, "test-dc", string(target.Labels["__meta_consul_dc"]))
require.Equal(t, target.Source, string(target.Labels["__meta_consul_service"]))
if target.Source == "test" {
// test service should have one node.
- require.Greater(t, len(target.Targets), 0, "Test service should have one node")
+ require.NotEmpty(t, target.Targets, "Test service should have one node")
}
}
@@ -313,7 +314,7 @@ func TestNoTargets(t *testing.T) {
}()
targets := (<-ch)[0].Targets
- require.Equal(t, 0, len(targets))
+ require.Empty(t, targets)
cancel()
<-ch
}
@@ -484,7 +485,7 @@ oauth2:
return
}
- require.Equal(t, config, test.expected)
+ require.Equal(t, test.expected, config)
})
}
}
diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go
index e207388b3d..970258de04 100644
--- a/discovery/digitalocean/digitalocean.go
+++ b/discovery/digitalocean/digitalocean.go
@@ -24,6 +24,7 @@ import (
"github.com/digitalocean/godo"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@@ -75,7 +76,7 @@ func (*SDConfig) Name() string { return "digitalocean" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -103,7 +104,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
}
@@ -125,10 +126,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "digitalocean",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "digitalocean",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/digitalocean/digitalocean_test.go b/discovery/digitalocean/digitalocean_test.go
index a5da4b26e3..a959b312c1 100644
--- a/discovery/digitalocean/digitalocean_test.go
+++ b/discovery/digitalocean/digitalocean_test.go
@@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@@ -46,7 +47,7 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
cfg := DefaultSDConfig
cfg.HTTPClientConfig.BearerToken = tokenID
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
@@ -56,12 +57,12 @@ func TestDigitalOceanSDRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 4, len(tg.Targets))
+ require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/discovery.go b/discovery/discovery.go
index 9dc010a09a..acc4c1efe9 100644
--- a/discovery/discovery.go
+++ b/discovery/discovery.go
@@ -18,6 +18,7 @@ import (
"reflect"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -42,6 +43,15 @@ type Discoverer interface {
type DiscovererOptions struct {
Logger log.Logger
+ // A registerer for the Discoverer's metrics.
+ // Some Discoverers may ignore this registerer and use the global one instead.
+ // For now this will work, because the Prometheus `main` function uses the global registry.
+ // However, in the future the Prometheus `main` function will be updated to not use the global registry.
+ // Hence, if a discoverer wants its metrics to be visible via the Prometheus executable's
+ // `/metrics` endpoint, it should use this explicit registerer.
+ // TODO(ptodev): Update this comment once the Prometheus `main` function does not use the global registry.
+ Registerer prometheus.Registerer
+
// Extra HTTP client options to expose to Discoverers. This field may be
// ignored; Discoverer implementations must opt-in to reading it.
HTTPClientOptions []config.HTTPClientOption
diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go
index 96e07254f0..9b6bd6741e 100644
--- a/discovery/dns/dns.go
+++ b/discovery/dns/dns.go
@@ -42,35 +42,21 @@ const (
dnsSrvRecordPortLabel = dnsSrvRecordPrefix + "port"
dnsMxRecordPrefix = model.MetaLabelPrefix + "dns_mx_record_"
dnsMxRecordTargetLabel = dnsMxRecordPrefix + "target"
+ dnsNsRecordPrefix = model.MetaLabelPrefix + "dns_ns_record_"
+ dnsNsRecordTargetLabel = dnsNsRecordPrefix + "target"
// Constants for instrumentation.
namespace = "prometheus"
)
-var (
- dnsSDLookupsCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Namespace: namespace,
- Name: "sd_dns_lookups_total",
- Help: "The number of DNS-SD lookups.",
- })
- dnsSDLookupFailuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Namespace: namespace,
- Name: "sd_dns_lookup_failures_total",
- Help: "The number of DNS-SD lookup failures.",
- })
-
- // DefaultSDConfig is the default DNS SD configuration.
- DefaultSDConfig = SDConfig{
- RefreshInterval: model.Duration(30 * time.Second),
- Type: "SRV",
- }
-)
+// DefaultSDConfig is the default DNS SD configuration.
+var DefaultSDConfig = SDConfig{
+ RefreshInterval: model.Duration(30 * time.Second),
+ Type: "SRV",
+}
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(dnsSDLookupFailuresCount, dnsSDLookupsCount)
}
// SDConfig is the configuration for DNS based service discovery.
@@ -86,7 +72,7 @@ func (*SDConfig) Name() string { return "dns" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(*c, opts.Logger), nil
+ return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -102,7 +88,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
switch strings.ToUpper(c.Type) {
case "SRV":
- case "A", "AAAA", "MX":
+ case "A", "AAAA", "MX", "NS":
if c.Port == 0 {
return errors.New("a port is required in DNS-SD configs for all record types except SRV")
}
@@ -116,16 +102,18 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// the Discoverer interface.
type Discovery struct {
*refresh.Discovery
- names []string
- port int
- qtype uint16
- logger log.Logger
+ names []string
+ port int
+ qtype uint16
+ logger log.Logger
+ dnsSDLookupsCount prometheus.Counter
+ dnsSDLookupFailuresCount prometheus.Counter
lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
+func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -140,6 +128,8 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
qtype = dns.TypeSRV
case "MX":
qtype = dns.TypeMX
+ case "NS":
+ qtype = dns.TypeNS
}
d := &Discovery{
names: conf.Names,
@@ -147,14 +137,32 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
port: conf.Port,
logger: logger,
lookupFn: lookupWithSearchPath,
+ dnsSDLookupsCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: namespace,
+ Name: "sd_dns_lookups_total",
+ Help: "The number of DNS-SD lookups.",
+ }),
+ dnsSDLookupFailuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: namespace,
+ Name: "sd_dns_lookup_failures_total",
+ Help: "The number of DNS-SD lookup failures.",
+ }),
}
+
d.Discovery = refresh.NewDiscovery(
- logger,
- "dns",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "dns",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: prometheus.NewRegistry(),
+ Metrics: []prometheus.Collector{d.dnsSDLookupsCount, d.dnsSDLookupFailuresCount},
+ },
)
- return d
+
+ return d, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
@@ -187,9 +195,9 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
response, err := d.lookupFn(name, d.qtype, d.logger)
- dnsSDLookupsCount.Inc()
+ d.dnsSDLookupsCount.Inc()
if err != nil {
- dnsSDLookupFailuresCount.Inc()
+ d.dnsSDLookupFailuresCount.Inc()
return err
}
@@ -199,7 +207,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
}
for _, record := range response.Answer {
- var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget model.LabelValue
+ var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget, dnsNsRecordTarget model.LabelValue
switch addr := record.(type) {
case *dns.SRV:
@@ -217,6 +225,13 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
addr.Mx = strings.TrimRight(addr.Mx, ".")
target = hostPort(addr.Mx, d.port)
+ case *dns.NS:
+ dnsNsRecordTarget = model.LabelValue(addr.Ns)
+
+ // Remove the final dot from rooted DNS names to make them look more usual.
+ addr.Ns = strings.TrimRight(addr.Ns, ".")
+
+ target = hostPort(addr.Ns, d.port)
case *dns.A:
target = hostPort(addr.A.String(), d.port)
case *dns.AAAA:
@@ -234,6 +249,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ
dnsSrvRecordTargetLabel: dnsSrvRecordTarget,
dnsSrvRecordPortLabel: dnsSrvRecordPort,
dnsMxRecordTargetLabel: dnsMxRecordTarget,
+ dnsNsRecordTargetLabel: dnsNsRecordTarget,
})
}
diff --git a/discovery/dns/dns_test.go b/discovery/dns/dns_test.go
index 50b2860496..b8dd2efaac 100644
--- a/discovery/dns/dns_test.go
+++ b/discovery/dns/dns_test.go
@@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/miekg/dns"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@@ -81,6 +82,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "",
+ "__meta_dns_ns_record_target": "",
},
},
},
@@ -112,6 +114,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "",
+ "__meta_dns_ns_record_target": "",
},
},
},
@@ -143,6 +146,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db1.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
+ "__meta_dns_ns_record_target": "",
},
{
"__address__": "db2.example.com:3306",
@@ -150,6 +154,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db2.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
+ "__meta_dns_ns_record_target": "",
},
},
},
@@ -180,6 +185,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "db1.example.com.",
"__meta_dns_srv_record_port": "3306",
"__meta_dns_mx_record_target": "",
+ "__meta_dns_ns_record_target": "",
},
},
},
@@ -227,6 +233,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "smtp1.example.com.",
+ "__meta_dns_ns_record_target": "",
},
{
"__address__": "smtp2.example.com:25",
@@ -234,6 +241,7 @@ func TestDNS(t *testing.T) {
"__meta_dns_srv_record_target": "",
"__meta_dns_srv_record_port": "",
"__meta_dns_mx_record_target": "smtp2.example.com.",
+ "__meta_dns_ns_record_target": "",
},
},
},
@@ -245,7 +253,8 @@ func TestDNS(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
- sd := NewDiscovery(tc.config, nil)
+ sd, err := NewDiscovery(tc.config, nil, prometheus.NewRegistry())
+ require.NoError(t, err)
sd.lookupFn = tc.lookup
tgs, err := sd.refresh(context.Background())
diff --git a/discovery/eureka/client_test.go b/discovery/eureka/client_test.go
index f1451c3a9e..83f6fd5ff1 100644
--- a/discovery/eureka/client_test.go
+++ b/discovery/eureka/client_test.go
@@ -184,17 +184,17 @@ func TestFetchApps(t *testing.T) {
apps, err := fetchApps(context.TODO(), ts.URL, &http.Client{})
require.NoError(t, err)
- require.Equal(t, len(apps.Applications), 2)
- require.Equal(t, apps.Applications[0].Name, "CONFIG-SERVICE")
- require.Equal(t, apps.Applications[1].Name, "META-SERVICE")
+ require.Len(t, apps.Applications, 2)
+ require.Equal(t, "CONFIG-SERVICE", apps.Applications[0].Name)
+ require.Equal(t, "META-SERVICE", apps.Applications[1].Name)
- require.Equal(t, len(apps.Applications[1].Instances), 2)
- require.Equal(t, apps.Applications[1].Instances[0].InstanceID, "meta-service002.test.com:meta-service:8080")
- require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[0].XMLName.Local, "project")
- require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[0].Content, "meta-service")
- require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[1].XMLName.Local, "management.port")
- require.Equal(t, apps.Applications[1].Instances[0].Metadata.Items[1].Content, "8090")
- require.Equal(t, apps.Applications[1].Instances[1].InstanceID, "meta-service001.test.com:meta-service:8080")
+ require.Len(t, apps.Applications[1].Instances, 2)
+ require.Equal(t, "meta-service002.test.com:meta-service:8080", apps.Applications[1].Instances[0].InstanceID)
+ require.Equal(t, "project", apps.Applications[1].Instances[0].Metadata.Items[0].XMLName.Local)
+ require.Equal(t, "meta-service", apps.Applications[1].Instances[0].Metadata.Items[0].Content)
+ require.Equal(t, "management.port", apps.Applications[1].Instances[0].Metadata.Items[1].XMLName.Local)
+ require.Equal(t, "8090", apps.Applications[1].Instances[0].Metadata.Items[1].Content)
+ require.Equal(t, "meta-service001.test.com:meta-service:8080", apps.Applications[1].Instances[1].InstanceID)
}
func Test500ErrorHttpResponse(t *testing.T) {
diff --git a/discovery/eureka/eureka.go b/discovery/eureka/eureka.go
index 5d9d8d552d..d3e4084e56 100644
--- a/discovery/eureka/eureka.go
+++ b/discovery/eureka/eureka.go
@@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -80,7 +81,7 @@ func (*SDConfig) Name() string { return "eureka" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -117,7 +118,7 @@ type Discovery struct {
}
// NewDiscovery creates a new Eureka discovery for the given role.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "eureka_sd")
if err != nil {
return nil, err
@@ -128,10 +129,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
server: conf.Server,
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "eureka",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "eureka",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/eureka/eureka_test.go b/discovery/eureka/eureka_test.go
index 0641aa7bf2..1fe3c710e1 100644
--- a/discovery/eureka/eureka_test.go
+++ b/discovery/eureka/eureka_test.go
@@ -20,6 +20,7 @@ import (
"net/http/httptest"
"testing"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -35,7 +36,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err
Server: ts.URL,
}
- md, err := NewDiscovery(&conf, nil)
+ md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@@ -55,7 +56,7 @@ func TestEurekaSDHandleError(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.EqualError(t, err, errTesting)
- require.Equal(t, len(tgs), 0)
+ require.Empty(t, tgs)
}
func TestEurekaSDEmptyList(t *testing.T) {
@@ -72,7 +73,7 @@ func TestEurekaSDEmptyList(t *testing.T) {
)
tgs, err := testUpdateServices(respHandler)
require.NoError(t, err)
- require.Equal(t, len(tgs), 1)
+ require.Len(t, tgs, 1)
}
func TestEurekaSDSendGroup(t *testing.T) {
@@ -232,11 +233,11 @@ func TestEurekaSDSendGroup(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.NoError(t, err)
- require.Equal(t, len(tgs), 1)
+ require.Len(t, tgs, 1)
tg := tgs[0]
- require.Equal(t, tg.Source, "eureka")
- require.Equal(t, len(tg.Targets), 4)
+ require.Equal(t, "eureka", tg.Source)
+ require.Len(t, tg.Targets, 4)
tgt := tg.Targets[0]
require.Equal(t, tgt[model.AddressLabel], model.LabelValue("config-service001.test.com:8080"))
diff --git a/discovery/file/file.go b/discovery/file/file.go
index 60b63350f5..ef6ed1f5ee 100644
--- a/discovery/file/file.go
+++ b/discovery/file/file.go
@@ -39,24 +39,6 @@ import (
)
var (
- fileSDReadErrorsCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_file_read_errors_total",
- Help: "The number of File-SD read errors.",
- })
- fileSDScanDuration = prometheus.NewSummary(
- prometheus.SummaryOpts{
- Name: "prometheus_sd_file_scan_duration_seconds",
- Help: "The duration of the File-SD scan in seconds.",
- Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
- })
- fileSDTimeStamp = NewTimestampCollector()
- fileWatcherErrorsCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_file_watcher_errors_total",
- Help: "The number of File-SD errors caused by filesystem watch failures.",
- })
-
patFileSDName = regexp.MustCompile(`^[^*]*(\*[^/]*)?\.(json|yml|yaml|JSON|YML|YAML)$`)
// DefaultSDConfig is the default file SD configuration.
@@ -67,7 +49,6 @@ var (
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(fileSDReadErrorsCount, fileSDScanDuration, fileSDTimeStamp, fileWatcherErrorsCount)
}
// SDConfig is the configuration for file based discovery.
@@ -81,7 +62,7 @@ func (*SDConfig) Name() string { return "file" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger), nil
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -187,10 +168,17 @@ type Discovery struct {
// This is used to detect deleted target groups.
lastRefresh map[string]int
logger log.Logger
+
+ fileSDReadErrorsCount prometheus.Counter
+ fileSDScanDuration prometheus.Summary
+ fileWatcherErrorsCount prometheus.Counter
+ fileSDTimeStamp *TimestampCollector
+
+ metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a new file discovery for the given paths.
-func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -200,9 +188,35 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
interval: time.Duration(conf.RefreshInterval),
timestamps: make(map[string]float64),
logger: logger,
+ fileSDReadErrorsCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_file_read_errors_total",
+ Help: "The number of File-SD read errors.",
+ }),
+ fileSDScanDuration: prometheus.NewSummary(
+ prometheus.SummaryOpts{
+ Name: "prometheus_sd_file_scan_duration_seconds",
+ Help: "The duration of the File-SD scan in seconds.",
+ Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
+ }),
+ fileWatcherErrorsCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_file_watcher_errors_total",
+ Help: "The number of File-SD errors caused by filesystem watch failures.",
+ }),
+ fileSDTimeStamp: NewTimestampCollector(),
}
- fileSDTimeStamp.addDiscoverer(disc)
- return disc
+
+ disc.fileSDTimeStamp.addDiscoverer(disc)
+
+ disc.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{
+ disc.fileSDReadErrorsCount,
+ disc.fileSDScanDuration,
+ disc.fileWatcherErrorsCount,
+ disc.fileSDTimeStamp,
+ })
+
+ return disc, nil
}
// listFiles returns a list of all files that match the configured patterns.
@@ -239,10 +253,17 @@ func (d *Discovery) watchFiles() {
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
+ err := d.metricRegisterer.RegisterMetrics()
+ if err != nil {
+ level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
+ return
+ }
+ defer d.metricRegisterer.UnregisterMetrics()
+
watcher, err := fsnotify.NewWatcher()
if err != nil {
level.Error(d.logger).Log("msg", "Error adding file watcher", "err", err)
- fileWatcherErrorsCount.Inc()
+ d.fileWatcherErrorsCount.Inc()
return
}
d.watcher = watcher
@@ -306,7 +327,7 @@ func (d *Discovery) stop() {
done := make(chan struct{})
defer close(done)
- fileSDTimeStamp.removeDiscoverer(d)
+ d.fileSDTimeStamp.removeDiscoverer(d)
// Closing the watcher will deadlock unless all events and errors are drained.
go func() {
@@ -332,13 +353,13 @@ func (d *Discovery) stop() {
func (d *Discovery) refresh(ctx context.Context, ch chan<- []*targetgroup.Group) {
t0 := time.Now()
defer func() {
- fileSDScanDuration.Observe(time.Since(t0).Seconds())
+ d.fileSDScanDuration.Observe(time.Since(t0).Seconds())
}()
ref := map[string]int{}
for _, p := range d.listFiles() {
tgroups, err := d.readFile(p)
if err != nil {
- fileSDReadErrorsCount.Inc()
+ d.fileSDReadErrorsCount.Inc()
level.Error(d.logger).Log("msg", "Error reading file", "path", p, "err", err)
// Prevent deletion down below.
diff --git a/discovery/file/file_test.go b/discovery/file/file_test.go
index 76e1cebed9..c138fc8a95 100644
--- a/discovery/file/file_test.go
+++ b/discovery/file/file_test.go
@@ -24,6 +24,7 @@ import (
"testing"
"time"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@@ -143,7 +144,7 @@ func (t *testRunner) run(files ...string) {
ctx, cancel := context.WithCancel(context.Background())
t.cancelSD = cancel
go func() {
- NewDiscovery(
+ d, err := NewDiscovery(
&SDConfig{
Files: files,
// Setting a high refresh interval to make sure that the tests only
@@ -151,7 +152,11 @@ func (t *testRunner) run(files ...string) {
RefreshInterval: model.Duration(1 * time.Hour),
},
nil,
- ).Run(ctx, t.ch)
+ prometheus.NewRegistry(),
+ )
+ require.NoError(t, err)
+
+ d.Run(ctx, t.ch)
}()
}
diff --git a/discovery/gce/gce.go b/discovery/gce/gce.go
index fa05fbbf38..21a95ee39e 100644
--- a/discovery/gce/gce.go
+++ b/discovery/gce/gce.go
@@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"golang.org/x/oauth2/google"
"google.golang.org/api/compute/v1"
@@ -86,7 +87,7 @@ func (*SDConfig) Name() string { return "gce" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(*c, opts.Logger)
+ return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
@@ -121,7 +122,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
project: conf.Project,
zone: conf.Zone,
@@ -141,10 +142,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
d.isvc = compute.NewInstancesService(d.svc)
d.Discovery = refresh.NewDiscovery(
- logger,
- "gce",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "gce",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/hetzner/hcloud_test.go b/discovery/hetzner/hcloud_test.go
index a4f19cfddd..10b799037a 100644
--- a/discovery/hetzner/hcloud_test.go
+++ b/discovery/hetzner/hcloud_test.go
@@ -48,12 +48,12 @@ func TestHCloudSDRefresh(t *testing.T) {
targetGroups, err := d.refresh(context.Background())
require.NoError(t, err)
- require.Equal(t, 1, len(targetGroups))
+ require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup, "targetGroup should not be nil")
require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
- require.Equal(t, 3, len(targetGroup.Targets))
+ require.Len(t, targetGroup.Targets, 3)
for i, labelSet := range []model.LabelSet{
{
diff --git a/discovery/hetzner/hetzner.go b/discovery/hetzner/hetzner.go
index c3f7ec39c3..9d3e6aa65d 100644
--- a/discovery/hetzner/hetzner.go
+++ b/discovery/hetzner/hetzner.go
@@ -21,6 +21,7 @@ import (
"github.com/go-kit/log"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -67,7 +68,7 @@ func (*SDConfig) Name() string { return "hetzner" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
type refresher interface {
@@ -127,17 +128,20 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, logger)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
- logger,
- "hetzner",
- time.Duration(conf.RefreshInterval),
- r.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "hetzner",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: r.refresh,
+ Registry: reg,
+ },
), nil
}
diff --git a/discovery/hetzner/robot_test.go b/discovery/hetzner/robot_test.go
index f78a0bbda1..abee5fea90 100644
--- a/discovery/hetzner/robot_test.go
+++ b/discovery/hetzner/robot_test.go
@@ -47,12 +47,12 @@ func TestRobotSDRefresh(t *testing.T) {
targetGroups, err := d.refresh(context.Background())
require.NoError(t, err)
- require.Equal(t, 1, len(targetGroups))
+ require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup, "targetGroup should not be nil")
require.NotNil(t, targetGroup.Targets, "targetGroup.targets should not be nil")
- require.Equal(t, 2, len(targetGroup.Targets))
+ require.Len(t, targetGroup.Targets, 2)
for i, labelSet := range []model.LabelSet{
{
@@ -98,5 +98,5 @@ func TestRobotSDRefreshHandleError(t *testing.T) {
require.Error(t, err)
require.Equal(t, "non 2xx status '401' response during hetzner service discovery with role robot", err.Error())
- require.Equal(t, 0, len(targetGroups))
+ require.Empty(t, targetGroups)
}
diff --git a/discovery/http/http.go b/discovery/http/http.go
index 2980d7efda..c12fdb26d2 100644
--- a/discovery/http/http.go
+++ b/discovery/http/http.go
@@ -45,17 +45,10 @@ var (
}
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
-
- failuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_http_failures_total",
- Help: "Number of HTTP service discovery refresh failures.",
- })
)
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for HTTP based discovery.
@@ -70,7 +63,7 @@ func (*SDConfig) Name() string { return "http" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions)
+ return NewDiscovery(c, opts.Logger, opts.HTTPClientOptions, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -112,10 +105,11 @@ type Discovery struct {
client *http.Client
refreshInterval time.Duration
tgLastLength int
+ failuresCount prometheus.Counter
}
// NewDiscovery returns a new HTTP discovery for the given config.
-func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPClientOption, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -130,13 +124,22 @@ func NewDiscovery(conf *SDConfig, logger log.Logger, clientOpts []config.HTTPCli
url: conf.URL,
client: client,
refreshInterval: time.Duration(conf.RefreshInterval), // Stored to be sent as headers.
+ failuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_http_failures_total",
+ Help: "Number of HTTP service discovery refresh failures.",
+ }),
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "http",
- time.Duration(conf.RefreshInterval),
- d.Refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "http",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.Refresh,
+ Registry: reg,
+ Metrics: []prometheus.Collector{d.failuresCount},
+ },
)
return d, nil
}
@@ -152,7 +155,7 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) {
resp, err := d.client.Do(req.WithContext(ctx))
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, err
}
defer func() {
@@ -161,31 +164,31 @@ func (d *Discovery) Refresh(ctx context.Context) ([]*targetgroup.Group, error) {
}()
if resp.StatusCode != http.StatusOK {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("server returned HTTP status %s", resp.Status)
}
if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
}
b, err := io.ReadAll(resp.Body)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, err
}
var targetGroups []*targetgroup.Group
if err := json.Unmarshal(b, &targetGroups); err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, err
}
for i, tg := range targetGroups {
if tg == nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
err = errors.New("nil target group item found")
return nil, err
}
diff --git a/discovery/http/http_test.go b/discovery/http/http_test.go
index a284e7f361..164719e900 100644
--- a/discovery/http/http_test.go
+++ b/discovery/http/http_test.go
@@ -41,7 +41,7 @@ func TestHTTPValidRefresh(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
@@ -62,8 +62,8 @@ func TestHTTPValidRefresh(t *testing.T) {
Source: urlSource(ts.URL+"/http_sd.good.json", 0),
},
}
- require.Equal(t, tgs, expectedTargets)
- require.Equal(t, 0.0, getFailureCount())
+ require.Equal(t, expectedTargets, tgs)
+ require.Equal(t, 0.0, getFailureCount(d.failuresCount))
}
func TestHTTPInvalidCode(t *testing.T) {
@@ -79,13 +79,13 @@ func TestHTTPInvalidCode(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
_, err = d.Refresh(ctx)
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
- require.Equal(t, 1.0, getFailureCount())
+ require.Equal(t, 1.0, getFailureCount(d.failuresCount))
}
func TestHTTPInvalidFormat(t *testing.T) {
@@ -101,18 +101,16 @@ func TestHTTPInvalidFormat(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
_, err = d.Refresh(ctx)
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
- require.Equal(t, 1.0, getFailureCount())
+ require.Equal(t, 1.0, getFailureCount(d.failuresCount))
}
-var lastFailureCount float64
-
-func getFailureCount() float64 {
+func getFailureCount(failuresCount prometheus.Counter) float64 {
failureChan := make(chan prometheus.Metric)
go func() {
@@ -129,10 +127,7 @@ func getFailureCount() float64 {
metric.Write(&counter)
}
- // account for failures in prior tests
- count := *counter.Counter.Value - lastFailureCount
- lastFailureCount = *counter.Counter.Value
- return count
+ return *counter.Counter.Value
}
func TestContentTypeRegex(t *testing.T) {
@@ -417,7 +412,7 @@ func TestSourceDisappeared(t *testing.T) {
URL: ts.URL,
RefreshInterval: model.Duration(1 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil)
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), nil, prometheus.NewRegistry())
require.NoError(t, err)
for _, test := range cases {
ctx := context.Background()
diff --git a/discovery/ionos/ionos.go b/discovery/ionos/ionos.go
index 3afed8d799..36623745ab 100644
--- a/discovery/ionos/ionos.go
+++ b/discovery/ionos/ionos.go
@@ -23,6 +23,8 @@ import (
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
+
+ "github.com/prometheus/client_golang/prometheus"
)
const (
@@ -41,7 +43,7 @@ func init() {
type Discovery struct{}
// NewDiscovery returns a new refresh.Discovery for IONOS Cloud.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
if conf.ionosEndpoint == "" {
conf.ionosEndpoint = "https://api.ionos.com"
}
@@ -52,10 +54,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error)
}
return refresh.NewDiscovery(
- logger,
- "ionos",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "ionos",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
), nil
}
@@ -86,7 +91,7 @@ func (c SDConfig) Name() string {
// NewDiscoverer returns a new discovery.Discoverer for IONOS Cloud.
func (c SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(&c, options.Logger)
+ return NewDiscovery(&c, options.Logger, options.Registerer)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
diff --git a/discovery/ionos/server_test.go b/discovery/ionos/server_test.go
index 92f2a96f9d..30f358e325 100644
--- a/discovery/ionos/server_test.go
+++ b/discovery/ionos/server_test.go
@@ -48,12 +48,12 @@ func TestIONOSServerRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 2, len(tg.Targets))
+ require.Len(t, tg.Targets, 2)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/kubernetes/endpoints.go b/discovery/kubernetes/endpoints.go
index 708e229a2f..512d775523 100644
--- a/discovery/kubernetes/endpoints.go
+++ b/discovery/kubernetes/endpoints.go
@@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@@ -30,12 +31,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
-var (
- epAddCount = eventCount.WithLabelValues("endpoints", "add")
- epUpdateCount = eventCount.WithLabelValues("endpoints", "update")
- epDeleteCount = eventCount.WithLabelValues("endpoints", "delete")
-)
-
// Endpoints discovers new endpoint targets.
type Endpoints struct {
logger log.Logger
@@ -54,10 +49,19 @@ type Endpoints struct {
}
// NewEndpoints returns a new endpoints discovery.
-func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *Endpoints {
+func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *Endpoints {
if l == nil {
l = log.NewNopLogger()
}
+
+ epAddCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleAdd)
+ epUpdateCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleUpdate)
+ epDeleteCount := eventCount.WithLabelValues(RoleEndpoint.String(), MetricLabelRoleDelete)
+
+ svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
+ svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
+ svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
+
e := &Endpoints{
logger: l,
endpointsInf: eps,
@@ -68,7 +72,7 @@ func NewEndpoints(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node ca
podStore: pod.GetStore(),
nodeInf: node,
withNodeMetadata: node != nil,
- queue: workqueue.NewNamed("endpoints"),
+ queue: workqueue.NewNamed(RoleEndpoint.String()),
}
_, err := e.endpointsInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
diff --git a/discovery/kubernetes/endpointslice.go b/discovery/kubernetes/endpointslice.go
index a168623804..21095df4af 100644
--- a/discovery/kubernetes/endpointslice.go
+++ b/discovery/kubernetes/endpointslice.go
@@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
v1 "k8s.io/api/discovery/v1"
@@ -33,12 +34,6 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
-var (
- epslAddCount = eventCount.WithLabelValues("endpointslice", "add")
- epslUpdateCount = eventCount.WithLabelValues("endpointslice", "update")
- epslDeleteCount = eventCount.WithLabelValues("endpointslice", "delete")
-)
-
// EndpointSlice discovers new endpoint targets.
type EndpointSlice struct {
logger log.Logger
@@ -57,10 +52,19 @@ type EndpointSlice struct {
}
// NewEndpointSlice returns a new endpointslice discovery.
-func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer) *EndpointSlice {
+func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, node cache.SharedInformer, eventCount *prometheus.CounterVec) *EndpointSlice {
if l == nil {
l = log.NewNopLogger()
}
+
+ epslAddCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleAdd)
+ epslUpdateCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleUpdate)
+ epslDeleteCount := eventCount.WithLabelValues(RoleEndpointSlice.String(), MetricLabelRoleDelete)
+
+ svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
+ svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
+ svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
+
e := &EndpointSlice{
logger: l,
endpointSliceInf: eps,
@@ -71,7 +75,7 @@ func NewEndpointSlice(l log.Logger, eps cache.SharedIndexInformer, svc, pod, nod
podStore: pod.GetStore(),
nodeInf: node,
withNodeMetadata: node != nil,
- queue: workqueue.NewNamed("endpointSlice"),
+ queue: workqueue.NewNamed(RoleEndpointSlice.String()),
}
_, err := e.endpointSliceInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
diff --git a/discovery/kubernetes/endpointslice_adaptor_test.go b/discovery/kubernetes/endpointslice_adaptor_test.go
index e564910936..1ee3337193 100644
--- a/discovery/kubernetes/endpointslice_adaptor_test.go
+++ b/discovery/kubernetes/endpointslice_adaptor_test.go
@@ -29,7 +29,7 @@ func Test_EndpointSliceAdaptor_v1(t *testing.T) {
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
- require.Equal(t, endpointSlice.Labels[v1.LabelServiceName], "testendpoints")
+ require.Equal(t, "testendpoints", endpointSlice.Labels[v1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
@@ -57,7 +57,7 @@ func Test_EndpointSliceAdaptor_v1beta1(t *testing.T) {
require.Equal(t, endpointSlice.ObjectMeta.Namespace, adaptor.namespace())
require.Equal(t, endpointSlice.AddressType, v1beta1.AddressType(adaptor.addressType()))
require.Equal(t, endpointSlice.Labels, adaptor.labels())
- require.Equal(t, endpointSlice.Labels[v1beta1.LabelServiceName], "testendpoints")
+ require.Equal(t, "testendpoints", endpointSlice.Labels[v1beta1.LabelServiceName])
for i, endpointAdaptor := range adaptor.endpoints() {
require.Equal(t, endpointSlice.Endpoints[i].Addresses, endpointAdaptor.addresses())
diff --git a/discovery/kubernetes/ingress.go b/discovery/kubernetes/ingress.go
index fee4cc7207..7b6366b257 100644
--- a/discovery/kubernetes/ingress.go
+++ b/discovery/kubernetes/ingress.go
@@ -21,6 +21,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
v1 "k8s.io/api/networking/v1"
"k8s.io/api/networking/v1beta1"
@@ -30,12 +31,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
-var (
- ingressAddCount = eventCount.WithLabelValues("ingress", "add")
- ingressUpdateCount = eventCount.WithLabelValues("ingress", "update")
- ingressDeleteCount = eventCount.WithLabelValues("ingress", "delete")
-)
-
// Ingress implements discovery of Kubernetes ingress.
type Ingress struct {
logger log.Logger
@@ -45,8 +40,18 @@ type Ingress struct {
}
// NewIngress returns a new ingress discovery.
-func NewIngress(l log.Logger, inf cache.SharedInformer) *Ingress {
- s := &Ingress{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")}
+func NewIngress(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Ingress {
+ ingressAddCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleAdd)
+ ingressUpdateCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleUpdate)
+ ingressDeleteCount := eventCount.WithLabelValues(RoleIngress.String(), MetricLabelRoleDelete)
+
+ s := &Ingress{
+ logger: l,
+ informer: inf,
+ store: inf.GetStore(),
+ queue: workqueue.NewNamed(RoleIngress.String()),
+ }
+
_, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
ingressAddCount.Inc()
diff --git a/discovery/kubernetes/kubernetes.go b/discovery/kubernetes/kubernetes.go
index 7bd96652f9..5c5f3dfb62 100644
--- a/discovery/kubernetes/kubernetes.go
+++ b/discovery/kubernetes/kubernetes.go
@@ -58,24 +58,14 @@ import (
const (
// metaLabelPrefix is the meta prefix used for all meta labels.
// in this discovery.
- metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
- namespaceLabel = metaLabelPrefix + "namespace"
- metricsNamespace = "prometheus_sd_kubernetes"
- presentValue = model.LabelValue("true")
+ metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
+ namespaceLabel = metaLabelPrefix + "namespace"
+ presentValue = model.LabelValue("true")
)
var (
// Http header.
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
- // Custom events metric.
- eventCount = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Namespace: metricsNamespace,
- Name: "events_total",
- Help: "The number of Kubernetes events handled.",
- },
- []string{"role", "event"},
- )
// DefaultSDConfig is the default Kubernetes SD configuration.
DefaultSDConfig = SDConfig{
HTTPClientConfig: config.DefaultHTTPClientConfig,
@@ -84,15 +74,6 @@ var (
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(eventCount)
- // Initialize metric vectors.
- for _, role := range []string{"endpointslice", "endpoints", "node", "pod", "service", "ingress"} {
- for _, evt := range []string{"add", "delete", "update"} {
- eventCount.WithLabelValues(role, evt)
- }
- }
- (&clientGoRequestMetricAdapter{}).Register(prometheus.DefaultRegisterer)
- (&clientGoWorkqueueMetricsProvider{}).Register(prometheus.DefaultRegisterer)
}
// Role is role of the service in Kubernetes.
@@ -121,6 +102,16 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
}
+func (c Role) String() string {
+ return string(c)
+}
+
+const (
+ MetricLabelRoleAdd = "add"
+ MetricLabelRoleDelete = "delete"
+ MetricLabelRoleUpdate = "update"
+)
+
// SDConfig is the configuration for Kubernetes service discovery.
type SDConfig struct {
APIServer config.URL `yaml:"api_server,omitempty"`
@@ -137,7 +128,7 @@ func (*SDConfig) Name() string { return "kubernetes" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return New(opts.Logger, c)
+ return New(opts.Logger, opts.Registerer, c)
}
// SetDirectory joins any relative file paths with dir.
@@ -274,6 +265,8 @@ type Discovery struct {
selectors roleSelector
ownNamespace string
attachMetadata AttachMetadataConfig
+ eventCount *prometheus.CounterVec
+ metricRegisterer discovery.MetricRegisterer
}
func (d *Discovery) getNamespaces() []string {
@@ -292,7 +285,7 @@ func (d *Discovery) getNamespaces() []string {
}
// New creates a new Kubernetes discovery for the given role.
-func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
+func New(l log.Logger, reg prometheus.Registerer, conf *SDConfig) (*Discovery, error) {
if l == nil {
l = log.NewNopLogger()
}
@@ -346,7 +339,7 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
return nil, err
}
- return &Discovery{
+ d := &Discovery{
client: c,
logger: l,
role: conf.Role,
@@ -355,7 +348,37 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
selectors: mapSelector(conf.Selectors),
ownNamespace: ownNamespace,
attachMetadata: conf.AttachMetadata,
- }, nil
+ eventCount: prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: discovery.KubernetesMetricsNamespace,
+ Name: "events_total",
+ Help: "The number of Kubernetes events handled.",
+ },
+ []string{"role", "event"},
+ ),
+ }
+
+ d.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{d.eventCount})
+
+ // Initialize metric vectors.
+ for _, role := range []string{
+ RoleEndpointSlice.String(),
+ RoleEndpoint.String(),
+ RoleNode.String(),
+ RolePod.String(),
+ RoleService.String(),
+ RoleIngress.String(),
+ } {
+ for _, evt := range []string{
+ MetricLabelRoleAdd,
+ MetricLabelRoleDelete,
+ MetricLabelRoleUpdate,
+ } {
+ d.eventCount.WithLabelValues(role, evt)
+ }
+ }
+
+ return d, nil
}
func mapSelector(rawSelector []SelectorConfig) roleSelector {
@@ -391,6 +414,14 @@ const resyncDisabled = 0
// Run implements the discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
d.Lock()
+
+ err := d.metricRegisterer.RegisterMetrics()
+ if err != nil {
+ level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
+ return
+ }
+ defer d.metricRegisterer.UnregisterMetrics()
+
namespaces := d.getNamespaces()
switch d.role {
@@ -482,6 +513,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
nodeInf,
+ d.eventCount,
)
d.discoverers = append(d.discoverers, eps)
go eps.endpointSliceInf.Run(ctx.Done())
@@ -541,6 +573,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncDisabled),
nodeInf,
+ d.eventCount,
)
d.discoverers = append(d.discoverers, eps)
go eps.endpointsInf.Run(ctx.Done())
@@ -572,6 +605,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
log.With(d.logger, "role", "pod"),
d.newPodsByNodeInformer(plw),
nodeInformer,
+ d.eventCount,
)
d.discoverers = append(d.discoverers, pod)
go pod.podInf.Run(ctx.Done())
@@ -594,6 +628,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
svc := NewService(
log.With(d.logger, "role", "service"),
cache.NewSharedInformer(slw, &apiv1.Service{}, resyncDisabled),
+ d.eventCount,
)
d.discoverers = append(d.discoverers, svc)
go svc.informer.Run(ctx.Done())
@@ -651,13 +686,14 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
ingress := NewIngress(
log.With(d.logger, "role", "ingress"),
informer,
+ d.eventCount,
)
d.discoverers = append(d.discoverers, ingress)
go ingress.informer.Run(ctx.Done())
}
case RoleNode:
nodeInformer := d.newNodeInformer(ctx)
- node := NewNode(log.With(d.logger, "role", "node"), nodeInformer)
+ node := NewNode(log.With(d.logger, "role", "node"), nodeInformer, d.eventCount)
d.discoverers = append(d.discoverers, node)
go node.informer.Run(ctx.Done())
default:
diff --git a/discovery/kubernetes/kubernetes_test.go b/discovery/kubernetes/kubernetes_test.go
index d0ed4c6ca1..71c937e944 100644
--- a/discovery/kubernetes/kubernetes_test.go
+++ b/discovery/kubernetes/kubernetes_test.go
@@ -29,6 +29,8 @@ import (
"k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/cache"
+ "github.com/prometheus/client_golang/prometheus"
+
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil"
@@ -49,13 +51,25 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer
fakeDiscovery, _ := clientset.Discovery().(*fakediscovery.FakeDiscovery)
fakeDiscovery.FakedServerVersion = &version.Info{GitVersion: k8sVer}
- return &Discovery{
+ d := &Discovery{
client: clientset,
logger: log.NewNopLogger(),
role: role,
namespaceDiscovery: &nsDiscovery,
ownNamespace: "own-ns",
- }, clientset
+ eventCount: prometheus.NewCounterVec(
+ prometheus.CounterOpts{
+ Namespace: discovery.KubernetesMetricsNamespace,
+ Name: "events_total",
+ Help: "The number of Kubernetes events handled.",
+ },
+ []string{"role", "event"},
+ ),
+ }
+
+ d.metricRegisterer = discovery.NewMetricRegisterer(prometheus.NewRegistry(), []prometheus.Collector{d.eventCount})
+
+ return d, clientset
}
// makeDiscoveryWithMetadata creates a kubernetes.Discovery instance with the specified metadata config.
diff --git a/discovery/kubernetes/node.go b/discovery/kubernetes/node.go
index b188a3ceb1..74d87e22c4 100644
--- a/discovery/kubernetes/node.go
+++ b/discovery/kubernetes/node.go
@@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@@ -35,12 +36,6 @@ const (
NodeLegacyHostIP = "LegacyHostIP"
)
-var (
- nodeAddCount = eventCount.WithLabelValues("node", "add")
- nodeUpdateCount = eventCount.WithLabelValues("node", "update")
- nodeDeleteCount = eventCount.WithLabelValues("node", "delete")
-)
-
// Node discovers Kubernetes nodes.
type Node struct {
logger log.Logger
@@ -50,11 +45,22 @@ type Node struct {
}
// NewNode returns a new node discovery.
-func NewNode(l log.Logger, inf cache.SharedInformer) *Node {
+func NewNode(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Node {
if l == nil {
l = log.NewNopLogger()
}
- n := &Node{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("node")}
+
+ nodeAddCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleAdd)
+ nodeUpdateCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleUpdate)
+ nodeDeleteCount := eventCount.WithLabelValues(RoleNode.String(), MetricLabelRoleDelete)
+
+ n := &Node{
+ logger: l,
+ informer: inf,
+ store: inf.GetStore(),
+ queue: workqueue.NewNamed(RoleNode.String()),
+ }
+
_, err := n.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
nodeAddCount.Inc()
diff --git a/discovery/kubernetes/pod.go b/discovery/kubernetes/pod.go
index 88da7bba69..615717c138 100644
--- a/discovery/kubernetes/pod.go
+++ b/discovery/kubernetes/pod.go
@@ -23,6 +23,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -34,12 +35,6 @@ import (
const nodeIndex = "node"
-var (
- podAddCount = eventCount.WithLabelValues("pod", "add")
- podUpdateCount = eventCount.WithLabelValues("pod", "update")
- podDeleteCount = eventCount.WithLabelValues("pod", "delete")
-)
-
// Pod discovers new pod targets.
type Pod struct {
podInf cache.SharedIndexInformer
@@ -51,18 +46,22 @@ type Pod struct {
}
// NewPod creates a new pod discovery.
-func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer) *Pod {
+func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer, eventCount *prometheus.CounterVec) *Pod {
if l == nil {
l = log.NewNopLogger()
}
+ podAddCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleAdd)
+ podDeleteCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleDelete)
+ podUpdateCount := eventCount.WithLabelValues(RolePod.String(), MetricLabelRoleUpdate)
+
p := &Pod{
podInf: pods,
nodeInf: nodes,
withNodeMetadata: nodes != nil,
store: pods.GetStore(),
logger: l,
- queue: workqueue.NewNamed("pod"),
+ queue: workqueue.NewNamed(RolePod.String()),
}
_, err := p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
diff --git a/discovery/kubernetes/service.go b/discovery/kubernetes/service.go
index 9fcc6644c3..51204a5a1a 100644
--- a/discovery/kubernetes/service.go
+++ b/discovery/kubernetes/service.go
@@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
@@ -30,12 +31,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
-var (
- svcAddCount = eventCount.WithLabelValues("service", "add")
- svcUpdateCount = eventCount.WithLabelValues("service", "update")
- svcDeleteCount = eventCount.WithLabelValues("service", "delete")
-)
-
// Service implements discovery of Kubernetes services.
type Service struct {
logger log.Logger
@@ -45,11 +40,22 @@ type Service struct {
}
// NewService returns a new service discovery.
-func NewService(l log.Logger, inf cache.SharedInformer) *Service {
+func NewService(l log.Logger, inf cache.SharedInformer, eventCount *prometheus.CounterVec) *Service {
if l == nil {
l = log.NewNopLogger()
}
- s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")}
+
+ svcAddCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleAdd)
+ svcUpdateCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleUpdate)
+ svcDeleteCount := eventCount.WithLabelValues(RoleService.String(), MetricLabelRoleDelete)
+
+ s := &Service{
+ logger: l,
+ informer: inf,
+ store: inf.GetStore(),
+ queue: workqueue.NewNamed(RoleService.String()),
+ }
+
_, err := s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
svcAddCount.Inc()
diff --git a/discovery/legacymanager/manager.go b/discovery/legacymanager/manager.go
index 74c544e726..9c80f305a8 100644
--- a/discovery/legacymanager/manager.go
+++ b/discovery/legacymanager/manager.go
@@ -28,48 +28,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
-var (
- failedConfigs = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "prometheus_sd_failed_configs",
- Help: "Current number of service discovery configurations that failed to load.",
- },
- []string{"name"},
- )
- discoveredTargets = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "prometheus_sd_discovered_targets",
- Help: "Current number of discovered targets.",
- },
- []string{"name", "config"},
- )
- receivedUpdates = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_received_updates_total",
- Help: "Total number of update events received from the SD providers.",
- },
- []string{"name"},
- )
- delayedUpdates = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_updates_delayed_total",
- Help: "Total number of update events that couldn't be sent immediately.",
- },
- []string{"name"},
- )
- sentUpdates = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_updates_total",
- Help: "Total number of update events sent to the SD consumers.",
- },
- []string{"name"},
- )
-)
-
-func RegisterMetrics() {
- prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates)
-}
-
type poolKey struct {
setName string
provider string
@@ -84,7 +42,7 @@ type provider struct {
}
// NewManager is the Discovery Manager constructor.
-func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager {
+func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -96,10 +54,21 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager
ctx: ctx,
updatert: 5 * time.Second,
triggerSend: make(chan struct{}, 1),
+ registerer: registerer,
}
for _, option := range options {
option(mgr)
}
+
+ // Register the metrics.
+ // We have to do this after setting all options, so that the name of the Manager is set.
+ if metrics, err := discovery.NewMetrics(registerer, mgr.name); err == nil {
+ mgr.metrics = metrics
+ } else {
+ level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
+ return nil
+ }
+
return mgr
}
@@ -135,6 +104,11 @@ type Manager struct {
// The triggerSend channel signals to the manager that new updates have been received from providers.
triggerSend chan struct{}
+
+ // A registerer for all service discovery metrics.
+ registerer prometheus.Registerer
+
+ metrics *discovery.Metrics
}
// Run starts the background processing.
@@ -157,7 +131,7 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
for pk := range m.targets {
if _, ok := cfg[pk.setName]; !ok {
- discoveredTargets.DeleteLabelValues(m.name, pk.setName)
+ m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, pk.setName)
}
}
m.cancelDiscoverers()
@@ -168,9 +142,9 @@ func (m *Manager) ApplyConfig(cfg map[string]discovery.Configs) error {
failedCount := 0
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
- discoveredTargets.WithLabelValues(m.name, name).Set(0)
+ m.metrics.DiscoveredTargets.WithLabelValues(name).Set(0)
}
- failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
+ m.metrics.FailedConfigs.Set(float64(failedCount))
for _, prov := range m.providers {
m.startProvider(m.ctx, prov)
@@ -207,7 +181,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ
case <-ctx.Done():
return
case tgs, ok := <-updates:
- receivedUpdates.WithLabelValues(m.name).Inc()
+ m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
return
@@ -236,11 +210,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
select {
case <-m.triggerSend:
- sentUpdates.WithLabelValues(m.name).Inc()
+ m.metrics.SentUpdates.Inc()
select {
case m.syncCh <- m.allGroups():
default:
- delayedUpdates.WithLabelValues(m.name).Inc()
+ m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
@@ -288,7 +262,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
}
}
for setName, v := range n {
- discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v))
+ m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}
@@ -309,7 +283,8 @@ func (m *Manager) registerProviders(cfgs discovery.Configs, setName string) int
}
typ := cfg.Name()
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{
- Logger: log.With(m.logger, "discovery", typ, "config", setName),
+ Logger: log.With(m.logger, "discovery", typ, "config", setName),
+ Registerer: m.registerer,
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)
diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go
index 13b84e6e36..7a2e8feea4 100644
--- a/discovery/legacymanager/manager_test.go
+++ b/discovery/legacymanager/manager_test.go
@@ -22,6 +22,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -664,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
@@ -746,7 +748,8 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou
func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -774,7 +777,8 @@ func TestTargetSetRecreatesTargetGroupsEveryRun(t *testing.T) {
func TestDiscovererConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -798,7 +802,8 @@ func TestDiscovererConfigs(t *testing.T) {
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -837,7 +842,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, nil)
+ discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -868,7 +874,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -893,7 +900,8 @@ func (e errorConfig) NewDiscoverer(discovery.DiscovererOptions) (discovery.Disco
func TestGaugeFailedConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -907,7 +915,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
- failedCount := client_testutil.ToFloat64(failedConfigs)
+ failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
@@ -918,7 +926,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
- failedCount = client_testutil.ToFloat64(failedConfigs)
+ failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
@@ -1049,7 +1057,8 @@ func TestCoordinationWithReceiver(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
- mgr := NewManager(ctx, nil)
+ mgr := NewManager(ctx, nil, prometheus.NewRegistry())
+ require.NotNil(t, mgr)
mgr.updatert = updateDelay
go mgr.Run()
diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go
index a5e047b948..38a5cdad4b 100644
--- a/discovery/linode/linode.go
+++ b/discovery/linode/linode.go
@@ -67,24 +67,15 @@ const (
)
// DefaultSDConfig is the default Linode SD configuration.
-var (
- DefaultSDConfig = SDConfig{
- TagSeparator: ",",
- Port: 80,
- RefreshInterval: model.Duration(60 * time.Second),
- HTTPClientConfig: config.DefaultHTTPClientConfig,
- }
-
- failuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_linode_failures_total",
- Help: "Number of Linode service discovery refresh failures.",
- })
-)
+var DefaultSDConfig = SDConfig{
+ TagSeparator: ",",
+ Port: 80,
+ RefreshInterval: model.Duration(60 * time.Second),
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+}
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for Linode based service discovery.
@@ -101,7 +92,7 @@ func (*SDConfig) Name() string { return "linode" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -131,16 +122,22 @@ type Discovery struct {
pollCount int
lastResults []*targetgroup.Group
eventPollingEnabled bool
+ failuresCount prometheus.Counter
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
tagSeparator: conf.TagSeparator,
pollCount: 0,
lastRefreshTimestamp: time.Now().UTC(),
eventPollingEnabled: true,
+ failuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_linode_failures_total",
+ Help: "Number of Linode service discovery refresh failures.",
+ }),
}
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "linode_sd")
@@ -158,10 +155,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
d.client = &client
d.Discovery = refresh.NewDiscovery(
- logger,
- "linode",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "linode",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ Metrics: []prometheus.Collector{d.failuresCount},
+ },
)
return d, nil
}
@@ -222,14 +223,14 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro
// Gather all linode instances.
instances, err := d.client.ListInstances(ctx, &linodego.ListOptions{PageSize: 500})
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, err
}
// Gather detailed IP address info for all IPs on all linode instances.
detailedIPs, err := d.client.ListIPAddresses(ctx, &linodego.ListOptions{PageSize: 500})
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, err
}
diff --git a/discovery/linode/linode_test.go b/discovery/linode/linode_test.go
index 988313b702..536b12090c 100644
--- a/discovery/linode/linode_test.go
+++ b/discovery/linode/linode_test.go
@@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -52,7 +53,7 @@ func TestLinodeSDRefresh(t *testing.T) {
Credentials: tokenID,
Type: "Bearer",
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdmock.Mock.Endpoint())
require.NoError(t, err)
@@ -61,12 +62,12 @@ func TestLinodeSDRefresh(t *testing.T) {
tgs, err := d.refresh(context.Background())
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 4, len(tg.Targets))
+ require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/manager.go b/discovery/manager.go
index 86439d2c95..67e326c41a 100644
--- a/discovery/manager.go
+++ b/discovery/manager.go
@@ -28,48 +28,6 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup"
)
-var (
- failedConfigs = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "prometheus_sd_failed_configs",
- Help: "Current number of service discovery configurations that failed to load.",
- },
- []string{"name"},
- )
- discoveredTargets = prometheus.NewGaugeVec(
- prometheus.GaugeOpts{
- Name: "prometheus_sd_discovered_targets",
- Help: "Current number of discovered targets.",
- },
- []string{"name", "config"},
- )
- receivedUpdates = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_received_updates_total",
- Help: "Total number of update events received from the SD providers.",
- },
- []string{"name"},
- )
- delayedUpdates = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_updates_delayed_total",
- Help: "Total number of update events that couldn't be sent immediately.",
- },
- []string{"name"},
- )
- sentUpdates = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_updates_total",
- Help: "Total number of update events sent to the SD consumers.",
- },
- []string{"name"},
- )
-)
-
-func RegisterMetrics() {
- prometheus.MustRegister(failedConfigs, discoveredTargets, receivedUpdates, delayedUpdates, sentUpdates)
-}
-
type poolKey struct {
setName string
provider string
@@ -107,7 +65,7 @@ func (p *Provider) Config() interface{} {
}
// NewManager is the Discovery Manager constructor.
-func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager)) *Manager {
+func NewManager(ctx context.Context, logger log.Logger, registerer prometheus.Registerer, options ...func(*Manager)) *Manager {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -118,10 +76,21 @@ func NewManager(ctx context.Context, logger log.Logger, options ...func(*Manager
ctx: ctx,
updatert: 5 * time.Second,
triggerSend: make(chan struct{}, 1),
+ registerer: registerer,
}
for _, option := range options {
option(mgr)
}
+
+ // Register the metrics.
+ // We have to do this after setting all options, so that the name of the Manager is set.
+ if metrics, err := NewMetrics(registerer, mgr.name); err == nil {
+ mgr.metrics = metrics
+ } else {
+ level.Error(logger).Log("msg", "Failed to create discovery manager metrics", "manager", mgr.name, "err", err)
+ return nil
+ }
+
return mgr
}
@@ -170,6 +139,11 @@ type Manager struct {
// lastProvider counts providers registered during Manager's lifetime.
lastProvider uint
+
+ // A registerer for all service discovery metrics.
+ registerer prometheus.Registerer
+
+ metrics *Metrics
}
// Providers returns the currently configured SD providers.
@@ -200,7 +174,7 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
for name, scfg := range cfg {
failedCount += m.registerProviders(scfg, name)
}
- failedConfigs.WithLabelValues(m.name).Set(float64(failedCount))
+ m.metrics.FailedConfigs.Set(float64(failedCount))
var (
wg sync.WaitGroup
@@ -230,13 +204,13 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
// Remove obsolete subs' targets.
if _, ok := prov.newSubs[s]; !ok {
delete(m.targets, poolKey{s, prov.name})
- discoveredTargets.DeleteLabelValues(m.name, s)
+ m.metrics.DiscoveredTargets.DeleteLabelValues(m.name, s)
}
}
// Set metrics and targets for new subs.
for s := range prov.newSubs {
if _, ok := prov.subs[s]; !ok {
- discoveredTargets.WithLabelValues(m.name, s).Set(0)
+ m.metrics.DiscoveredTargets.WithLabelValues(s).Set(0)
}
if l := len(refTargets); l > 0 {
m.targets[poolKey{s, prov.name}] = make(map[string]*targetgroup.Group, l)
@@ -316,7 +290,7 @@ func (m *Manager) updater(ctx context.Context, p *Provider, updates chan []*targ
case <-ctx.Done():
return
case tgs, ok := <-updates:
- receivedUpdates.WithLabelValues(m.name).Inc()
+ m.metrics.ReceivedUpdates.Inc()
if !ok {
level.Debug(m.logger).Log("msg", "Discoverer channel closed", "provider", p.name)
// Wait for provider cancellation to ensure targets are cleaned up when expected.
@@ -349,11 +323,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often, so we throttle these with the ticker.
select {
case <-m.triggerSend:
- sentUpdates.WithLabelValues(m.name).Inc()
+ m.metrics.SentUpdates.Inc()
select {
case m.syncCh <- m.allGroups():
default:
- delayedUpdates.WithLabelValues(m.name).Inc()
+ m.metrics.DelayedUpdates.Inc()
level.Debug(m.logger).Log("msg", "Discovery receiver's channel was full so will retry the next cycle")
select {
case m.triggerSend <- struct{}{}:
@@ -405,7 +379,7 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
}
}
for setName, v := range n {
- discoveredTargets.WithLabelValues(m.name, setName).Set(float64(v))
+ m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
}
return tSets
}
@@ -428,6 +402,7 @@ func (m *Manager) registerProviders(cfgs Configs, setName string) int {
d, err := cfg.NewDiscoverer(DiscovererOptions{
Logger: log.With(m.logger, "discovery", typ, "config", setName),
HTTPClientOptions: m.httpOpts,
+ Registerer: m.registerer,
})
if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", typ, "config", setName)
diff --git a/discovery/manager_test.go b/discovery/manager_test.go
index 5371608112..f22de75a46 100644
--- a/discovery/manager_test.go
+++ b/discovery/manager_test.go
@@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
client_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -664,7 +665,8 @@ func TestTargetUpdatesOrder(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
@@ -778,7 +780,8 @@ func pk(provider, setName string, n int) poolKey {
func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -790,27 +793,28 @@ func TestTargetSetTargetGroupsPresentOnConfigReload(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(discoveryManager.targets))
+ require.Len(t, discoveryManager.targets, 1)
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(discoveryManager.targets))
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, discoveryManager.targets, 1)
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -822,12 +826,12 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(discoveryManager.targets))
+ require.Len(t, discoveryManager.targets, 1)
c["prometheus2"] = c["prometheus"]
delete(c, "prometheus")
@@ -836,16 +840,17 @@ func TestTargetSetTargetGroupsPresentOnConfigRename(t *testing.T) {
syncedTargets = <-discoveryManager.SyncCh()
p = pk("static", "prometheus2", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(discoveryManager.targets))
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, discoveryManager.targets, 1)
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus2"]))
+ require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -860,30 +865,31 @@ func TestTargetSetTargetGroupsPresentOnConfigDuplicateAndDeleteOriginal(t *testi
c["prometheus2"] = c["prometheus"]
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
- require.Equal(t, 2, len(syncedTargets))
+ require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus2"]))
+ require.Len(t, syncedTargets["prometheus2"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 2, len(discoveryManager.targets))
+ require.Len(t, discoveryManager.targets, 2)
delete(c, "prometheus")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
p = pk("static", "prometheus2", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(discoveryManager.targets))
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, discoveryManager.targets, 1)
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus2"]))
+ require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -895,9 +901,9 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
discoveryManager.ApplyConfig(c)
syncedTargets := <-discoveryManager.SyncCh()
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
var mu sync.Mutex
c["prometheus2"] = Configs{
@@ -912,39 +918,40 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
// Original targets should be present as soon as possible.
syncedTargets = <-discoveryManager.SyncCh()
mu.Unlock()
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
// prometheus2 configs should be ready on second sync.
syncedTargets = <-discoveryManager.SyncCh()
- require.Equal(t, 2, len(syncedTargets))
+ require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"bar:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus2"]))
+ require.Len(t, syncedTargets["prometheus2"], 1)
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
p = pk("lockstatic", "prometheus2", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
- require.Equal(t, 2, len(discoveryManager.targets))
+ require.Len(t, discoveryManager.targets, 2)
// Delete part of config and ensure only original targets exist.
delete(c, "prometheus2")
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
- require.Equal(t, 1, len(discoveryManager.targets))
+ require.Len(t, discoveryManager.targets, 1)
verifyPresence(t, discoveryManager.targets, pk("static", "prometheus", 0), "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
}
func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -959,31 +966,32 @@ func TestTargetSetRecreatesTargetGroupsOnConfigChange(t *testing.T) {
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
- require.Equal(t, 1, len(discoveryManager.targets))
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, discoveryManager.targets, 1)
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"bar:9090\"}", true)
- require.Equal(t, 2, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 2)
c["prometheus"] = Configs{
staticConfig("foo:9090"),
}
discoveryManager.ApplyConfig(c)
syncedTargets = <-discoveryManager.SyncCh()
- require.Equal(t, 1, len(discoveryManager.targets))
+ require.Len(t, discoveryManager.targets, 1)
p = pk("static", "prometheus", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", false)
- require.Equal(t, 1, len(discoveryManager.targets))
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, discoveryManager.targets, 1)
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
}
func TestDiscovererConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1001,12 +1009,12 @@ func TestDiscovererConfigs(t *testing.T) {
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"bar:9090\"}", true)
p = pk("static", "prometheus", 1)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"baz:9090\"}", true)
- require.Equal(t, 2, len(discoveryManager.targets))
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, discoveryManager.targets, 2)
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"bar:9090\"}", true)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"baz:9090\"}", true)
- require.Equal(t, 3, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 3)
}
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
@@ -1015,7 +1023,8 @@ func TestDiscovererConfigs(t *testing.T) {
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1029,9 +1038,9 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
syncedTargets := <-discoveryManager.SyncCh()
p := pk("static", "prometheus", 0)
verifyPresence(t, discoveryManager.targets, p, "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets))
+ require.Len(t, syncedTargets, 1)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
c["prometheus"] = Configs{
StaticConfig{{}},
@@ -1052,8 +1061,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
if len(group.Targets) != 0 {
t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets))
}
- require.Equal(t, 1, len(syncedTargets))
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets, 1)
+ require.Len(t, syncedTargets["prometheus"], 1)
if lbls := syncedTargets["prometheus"][0].Labels; lbls != nil {
t.Fatalf("Unexpected Group: expected nil Labels, got %v", lbls)
}
@@ -1062,7 +1071,8 @@ func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, nil)
+ discoveryManager := NewManager(ctx, nil, prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1082,11 +1092,11 @@ func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
if len(discoveryManager.providers) != 1 {
t.Fatalf("Invalid number of providers: expected 1, got %d", len(discoveryManager.providers))
}
- require.Equal(t, 2, len(syncedTargets))
+ require.Len(t, syncedTargets, 2)
verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus"]))
+ require.Len(t, syncedTargets["prometheus"], 1)
verifySyncedPresence(t, syncedTargets, "prometheus2", "{__address__=\"foo:9090\"}", true)
- require.Equal(t, 1, len(syncedTargets["prometheus2"]))
+ require.Len(t, syncedTargets["prometheus2"], 1)
}
func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
@@ -1098,7 +1108,8 @@ func TestApplyConfigDoesNotModifyStaticTargets(t *testing.T) {
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1144,7 +1155,8 @@ func (s lockStaticDiscoverer) Run(ctx context.Context, up chan<- []*targetgroup.
func TestGaugeFailedConfigs(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
@@ -1158,7 +1170,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
- failedCount := client_testutil.ToFloat64(failedConfigs)
+ failedCount := client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 3 {
t.Fatalf("Expected to have 3 failed configs, got: %v", failedCount)
}
@@ -1169,7 +1181,7 @@ func TestGaugeFailedConfigs(t *testing.T) {
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
- failedCount = client_testutil.ToFloat64(failedConfigs)
+ failedCount = client_testutil.ToFloat64(discoveryManager.metrics.FailedConfigs)
if failedCount != 0 {
t.Fatalf("Expected to get no failed config, got: %v", failedCount)
}
@@ -1300,7 +1312,8 @@ func TestCoordinationWithReceiver(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
- mgr := NewManager(ctx, nil)
+ mgr := NewManager(ctx, nil, prometheus.NewRegistry())
+ require.NotNil(t, mgr)
mgr.updatert = updateDelay
go mgr.Run()
@@ -1392,10 +1405,11 @@ func (o onceProvider) Run(_ context.Context, ch chan<- []*targetgroup.Group) {
// TestTargetSetTargetGroupsUpdateDuringApplyConfig is used to detect races when
// ApplyConfig happens at the same time as targets update.
-func TestTargetSetTargetGroupsUpdateDuringApplyConfig(*testing.T) {
+func TestTargetSetTargetGroupsUpdateDuringApplyConfig(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
- discoveryManager := NewManager(ctx, log.NewNopLogger())
+ discoveryManager := NewManager(ctx, log.NewNopLogger(), prometheus.NewRegistry())
+ require.NotNil(t, discoveryManager)
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go
index 27947fa8a8..a6a6252fd0 100644
--- a/discovery/marathon/marathon.go
+++ b/discovery/marathon/marathon.go
@@ -28,6 +28,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -83,7 +84,7 @@ func (*SDConfig) Name() string { return "marathon" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(*c, opts.Logger)
+ return NewDiscovery(*c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -132,7 +133,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Marathon Discovery.
-func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd")
if err != nil {
return nil, err
@@ -154,10 +155,13 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
appsClient: fetchApps,
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "marathon",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "marathon",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/marathon/marathon_test.go b/discovery/marathon/marathon_test.go
index 258e3c8ddf..a1ddce9309 100644
--- a/discovery/marathon/marathon_test.go
+++ b/discovery/marathon/marathon_test.go
@@ -21,6 +21,7 @@ import (
"net/http/httptest"
"testing"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
@@ -36,7 +37,7 @@ func testConfig() SDConfig {
}
func testUpdateServices(client appListClient) ([]*targetgroup.Group, error) {
- md, err := NewDiscovery(testConfig(), nil)
+ md, err := NewDiscovery(testConfig(), nil, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@@ -129,7 +130,7 @@ func TestMarathonSDSendGroup(t *testing.T) {
}
func TestMarathonSDRemoveApp(t *testing.T) {
- md, err := NewDiscovery(testConfig(), nil)
+ md, err := NewDiscovery(testConfig(), nil, prometheus.NewRegistry())
if err != nil {
t.Fatalf("%s", err)
}
diff --git a/discovery/metrics.go b/discovery/metrics.go
new file mode 100644
index 0000000000..6a60603955
--- /dev/null
+++ b/discovery/metrics.go
@@ -0,0 +1,101 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package discovery
+
+import (
+ "fmt"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+var (
+ clientGoRequestMetrics = &clientGoRequestMetricAdapter{}
+ clientGoWorkloadMetrics = &clientGoWorkqueueMetricsProvider{}
+)
+
+func init() {
+ clientGoRequestMetrics.RegisterWithK8sGoClient()
+ clientGoWorkloadMetrics.RegisterWithK8sGoClient()
+}
+
+// Metrics to be used with a discovery manager.
+type Metrics struct {
+ FailedConfigs prometheus.Gauge
+ DiscoveredTargets *prometheus.GaugeVec
+ ReceivedUpdates prometheus.Counter
+ DelayedUpdates prometheus.Counter
+ SentUpdates prometheus.Counter
+}
+
+func NewMetrics(registerer prometheus.Registerer, sdManagerName string) (*Metrics, error) {
+ m := &Metrics{}
+
+ m.FailedConfigs = prometheus.NewGauge(
+ prometheus.GaugeOpts{
+ Name: "prometheus_sd_failed_configs",
+ Help: "Current number of service discovery configurations that failed to load.",
+ ConstLabels: prometheus.Labels{"name": sdManagerName},
+ },
+ )
+
+ m.DiscoveredTargets = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Name: "prometheus_sd_discovered_targets",
+ Help: "Current number of discovered targets.",
+ ConstLabels: prometheus.Labels{"name": sdManagerName},
+ },
+ []string{"config"},
+ )
+
+ m.ReceivedUpdates = prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_received_updates_total",
+ Help: "Total number of update events received from the SD providers.",
+ ConstLabels: prometheus.Labels{"name": sdManagerName},
+ },
+ )
+
+ m.DelayedUpdates = prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_updates_delayed_total",
+ Help: "Total number of update events that couldn't be sent immediately.",
+ ConstLabels: prometheus.Labels{"name": sdManagerName},
+ },
+ )
+
+ m.SentUpdates = prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_updates_total",
+ Help: "Total number of update events sent to the SD consumers.",
+ ConstLabels: prometheus.Labels{"name": sdManagerName},
+ },
+ )
+
+ metrics := []prometheus.Collector{
+ m.FailedConfigs,
+ m.DiscoveredTargets,
+ m.ReceivedUpdates,
+ m.DelayedUpdates,
+ m.SentUpdates,
+ }
+
+ for _, collector := range metrics {
+ err := registerer.Register(collector)
+ if err != nil {
+ return nil, fmt.Errorf("failed to register discovery manager metrics: %w", err)
+ }
+ }
+
+ return m, nil
+}
diff --git a/discovery/kubernetes/client_metrics.go b/discovery/metrics_k8s_client.go
similarity index 81%
rename from discovery/kubernetes/client_metrics.go
rename to discovery/metrics_k8s_client.go
index 7b097b14a3..f16245684b 100644
--- a/discovery/kubernetes/client_metrics.go
+++ b/discovery/metrics_k8s_client.go
@@ -11,10 +11,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-package kubernetes
+package discovery
import (
"context"
+ "fmt"
"net/url"
"time"
@@ -23,13 +24,22 @@ import (
"k8s.io/client-go/util/workqueue"
)
-const workqueueMetricsNamespace = metricsNamespace + "_workqueue"
+// This file registers metrics used by the Kubernetes Go client (k8s.io/client-go).
+// Unfortunately, k8s.io/client-go metrics are global.
+// If we instantiate multiple k8s SD instances, their k8s/client-go metrics will overlap.
+// To prevent us from displaying misleading metrics, we register k8s.io/client-go metrics
+// outside of the Kubernetes SD.
+
+const (
+ KubernetesMetricsNamespace = "prometheus_sd_kubernetes"
+ workqueueMetricsNamespace = KubernetesMetricsNamespace + "_workqueue"
+)
var (
// Metrics for client-go's HTTP requests.
clientGoRequestResultMetricVec = prometheus.NewCounterVec(
prometheus.CounterOpts{
- Namespace: metricsNamespace,
+ Namespace: KubernetesMetricsNamespace,
Name: "http_request_total",
Help: "Total number of HTTP requests to the Kubernetes API by status code.",
},
@@ -37,7 +47,7 @@ var (
)
clientGoRequestLatencyMetricVec = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
- Namespace: metricsNamespace,
+ Namespace: KubernetesMetricsNamespace,
Name: "http_request_duration_seconds",
Help: "Summary of latencies for HTTP requests to the Kubernetes API by endpoint.",
Objectives: map[float64]float64{},
@@ -109,17 +119,38 @@ func (noopMetric) Set(float64) {}
// Definition of client-go metrics adapters for HTTP requests observation.
type clientGoRequestMetricAdapter struct{}
-func (f *clientGoRequestMetricAdapter) Register(registerer prometheus.Registerer) {
+// Returns all of the Prometheus metrics derived from k8s.io/client-go.
+// This may be used tu register and unregister the metrics.
+func clientGoMetrics() []prometheus.Collector {
+ return []prometheus.Collector{
+ clientGoRequestResultMetricVec,
+ clientGoRequestLatencyMetricVec,
+ clientGoWorkqueueDepthMetricVec,
+ clientGoWorkqueueAddsMetricVec,
+ clientGoWorkqueueLatencyMetricVec,
+ clientGoWorkqueueUnfinishedWorkSecondsMetricVec,
+ clientGoWorkqueueLongestRunningProcessorMetricVec,
+ clientGoWorkqueueWorkDurationMetricVec,
+ }
+}
+
+func RegisterK8sClientMetricsWithPrometheus(registerer prometheus.Registerer) error {
+ for _, collector := range clientGoMetrics() {
+ err := registerer.Register(collector)
+ if err != nil {
+ return fmt.Errorf("failed to register Kubernetes Go Client metrics: %w", err)
+ }
+ }
+ return nil
+}
+
+func (f *clientGoRequestMetricAdapter) RegisterWithK8sGoClient() {
metrics.Register(
metrics.RegisterOpts{
RequestLatency: f,
RequestResult: f,
},
)
- registerer.MustRegister(
- clientGoRequestResultMetricVec,
- clientGoRequestLatencyMetricVec,
- )
}
func (clientGoRequestMetricAdapter) Increment(_ context.Context, code, _, _ string) {
@@ -133,16 +164,8 @@ func (clientGoRequestMetricAdapter) Observe(_ context.Context, _ string, u url.U
// Definition of client-go workqueue metrics provider definition.
type clientGoWorkqueueMetricsProvider struct{}
-func (f *clientGoWorkqueueMetricsProvider) Register(registerer prometheus.Registerer) {
+func (f *clientGoWorkqueueMetricsProvider) RegisterWithK8sGoClient() {
workqueue.SetProvider(f)
- registerer.MustRegister(
- clientGoWorkqueueDepthMetricVec,
- clientGoWorkqueueAddsMetricVec,
- clientGoWorkqueueLatencyMetricVec,
- clientGoWorkqueueWorkDurationMetricVec,
- clientGoWorkqueueUnfinishedWorkSecondsMetricVec,
- clientGoWorkqueueLongestRunningProcessorMetricVec,
- )
}
func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric {
diff --git a/discovery/moby/docker.go b/discovery/moby/docker.go
index 162833ece4..a13bb8704a 100644
--- a/discovery/moby/docker.go
+++ b/discovery/moby/docker.go
@@ -26,6 +26,7 @@ import (
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -80,7 +81,7 @@ func (*DockerSDConfig) Name() string { return "docker" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *DockerSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDockerDiscovery(c, opts.Logger)
+ return NewDockerDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -114,7 +115,7 @@ type DockerDiscovery struct {
}
// NewDockerDiscovery returns a new DockerDiscovery which periodically refreshes its targets.
-func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger) (*DockerDiscovery, error) {
+func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger, reg prometheus.Registerer) (*DockerDiscovery, error) {
var err error
d := &DockerDiscovery{
@@ -165,10 +166,13 @@ func NewDockerDiscovery(conf *DockerSDConfig, logger log.Logger) (*DockerDiscove
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "docker",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "docker",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/moby/docker_test.go b/discovery/moby/docker_test.go
index bb84b1571a..1a87ad2a12 100644
--- a/discovery/moby/docker_test.go
+++ b/discovery/moby/docker_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -37,19 +38,19 @@ host: %s
var cfg DockerSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
- d, err := NewDockerDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDockerDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 3, len(tg.Targets))
+ require.Len(t, tg.Targets, 3)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/moby/dockerswarm.go b/discovery/moby/dockerswarm.go
index 371f9d5ed1..bd87fea5a3 100644
--- a/discovery/moby/dockerswarm.go
+++ b/discovery/moby/dockerswarm.go
@@ -23,6 +23,7 @@ import (
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/client"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@@ -74,7 +75,7 @@ func (*DockerSwarmSDConfig) Name() string { return "dockerswarm" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *DockerSwarmSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -117,7 +118,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
var err error
d := &Discovery{
@@ -168,10 +169,13 @@ func NewDiscovery(conf *DockerSwarmSDConfig, logger log.Logger) (*Discovery, err
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "dockerswarm",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "dockerswarm",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/moby/nodes_test.go b/discovery/moby/nodes_test.go
index 1a53321378..512ff7049d 100644
--- a/discovery/moby/nodes_test.go
+++ b/discovery/moby/nodes_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -38,19 +39,19 @@ host: %s
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 5, len(tg.Targets))
+ require.Len(t, tg.Targets, 5)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/moby/services_test.go b/discovery/moby/services_test.go
index 1bc9832c7a..816586dd7f 100644
--- a/discovery/moby/services_test.go
+++ b/discovery/moby/services_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -38,19 +39,19 @@ host: %s
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 15, len(tg.Targets))
+ require.Len(t, tg.Targets, 15)
for i, lbls := range []model.LabelSet{
{
@@ -332,19 +333,19 @@ filters:
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg, "tg should not be nil")
require.NotNil(t, tg.Targets, "tg.targets should not be nil")
- require.Equal(t, 4, len(tg.Targets))
+ require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/moby/tasks_test.go b/discovery/moby/tasks_test.go
index 2cc9322f61..764fda3436 100644
--- a/discovery/moby/tasks_test.go
+++ b/discovery/moby/tasks_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -38,19 +39,19 @@ host: %s
var cfg DockerSwarmSDConfig
require.NoError(t, yaml.Unmarshal([]byte(cfgString), &cfg))
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 27, len(tg.Targets))
+ require.Len(t, tg.Targets, 27)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/nomad/nomad.go b/discovery/nomad/nomad.go
index 7013f0737c..3fdcf714eb 100644
--- a/discovery/nomad/nomad.go
+++ b/discovery/nomad/nomad.go
@@ -49,27 +49,18 @@ const (
)
// DefaultSDConfig is the default nomad SD configuration.
-var (
- DefaultSDConfig = SDConfig{
- AllowStale: true,
- HTTPClientConfig: config.DefaultHTTPClientConfig,
- Namespace: "default",
- RefreshInterval: model.Duration(60 * time.Second),
- Region: "global",
- Server: "http://localhost:4646",
- TagSeparator: ",",
- }
-
- failuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Name: "prometheus_sd_nomad_failures_total",
- Help: "Number of nomad service discovery refresh failures.",
- })
-)
+var DefaultSDConfig = SDConfig{
+ AllowStale: true,
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+ Namespace: "default",
+ RefreshInterval: model.Duration(60 * time.Second),
+ Region: "global",
+ Server: "http://localhost:4646",
+ TagSeparator: ",",
+}
func init() {
discovery.RegisterConfig(&SDConfig{})
- prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for nomad based service discovery.
@@ -88,7 +79,7 @@ func (*SDConfig) Name() string { return "nomad" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -121,10 +112,11 @@ type Discovery struct {
region string
server string
tagSeparator string
+ failuresCount prometheus.Counter
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
allowStale: conf.AllowStale,
namespace: conf.Namespace,
@@ -132,6 +124,11 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
region: conf.Region,
server: conf.Server,
tagSeparator: conf.TagSeparator,
+ failuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_nomad_failures_total",
+ Help: "Number of nomad service discovery refresh failures.",
+ }),
}
HTTPClient, err := config.NewClientFromConfig(conf.HTTPClientConfig, "nomad_sd")
@@ -153,10 +150,14 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
d.client = client
d.Discovery = refresh.NewDiscovery(
- logger,
- "nomad",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "nomad",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ Metrics: []prometheus.Collector{d.failuresCount},
+ },
)
return d, nil
}
@@ -167,7 +168,7 @@ func (d *Discovery) refresh(context.Context) ([]*targetgroup.Group, error) {
}
stubs, _, err := d.client.Services().List(opts)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, err
}
@@ -179,7 +180,7 @@ func (d *Discovery) refresh(context.Context) ([]*targetgroup.Group, error) {
for _, service := range stub.Services {
instances, _, err := d.client.Services().Get(service.ServiceName, opts)
if err != nil {
- failuresCount.Inc()
+ d.failuresCount.Inc()
return nil, fmt.Errorf("failed to fetch services: %w", err)
}
diff --git a/discovery/nomad/nomad_test.go b/discovery/nomad/nomad_test.go
index d9aa54330d..ca67a877e1 100644
--- a/discovery/nomad/nomad_test.go
+++ b/discovery/nomad/nomad_test.go
@@ -22,6 +22,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@@ -127,7 +128,7 @@ func TestConfiguredService(t *testing.T) {
conf := &SDConfig{
Server: "http://localhost:4646",
}
- _, err := NewDiscovery(conf, nil)
+ _, err := NewDiscovery(conf, nil, prometheus.NewRegistry())
require.NoError(t, err)
}
@@ -141,18 +142,18 @@ func TestNomadSDRefresh(t *testing.T) {
cfg := DefaultSDConfig
cfg.Server = endpoint.String()
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
tgs, err := d.refresh(context.Background())
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 1, len(tg.Targets))
+ require.Len(t, tg.Targets, 1)
lbls := model.LabelSet{
"__address__": model.LabelValue("127.0.0.1:30456"),
diff --git a/discovery/openstack/hypervisor_test.go b/discovery/openstack/hypervisor_test.go
index 396d5283dc..45684b4a2e 100644
--- a/discovery/openstack/hypervisor_test.go
+++ b/discovery/openstack/hypervisor_test.go
@@ -53,12 +53,12 @@ func TestOpenstackSDHypervisorRefresh(t *testing.T) {
hypervisor, _ := mock.openstackAuthSuccess()
ctx := context.Background()
tgs, err := hypervisor.refresh(ctx)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NoError(t, err)
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 2, len(tg.Targets))
+ require.Len(t, tg.Targets, 2)
for l, v := range map[string]string{
"__address__": "172.16.70.14:0",
diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go
index b2fe1e7870..9b28c1d6e1 100644
--- a/discovery/openstack/instance.go
+++ b/discovery/openstack/instance.go
@@ -145,16 +145,16 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group,
openstackLabelUserID: model.LabelValue(s.UserID),
}
- flavorId, ok := s.Flavor["id"].(string)
+ flavorID, ok := s.Flavor["id"].(string)
if !ok {
level.Warn(i.logger).Log("msg", "Invalid type for flavor id, expected string")
continue
}
- labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorId)
+ labels[openstackLabelInstanceFlavor] = model.LabelValue(flavorID)
- imageId, ok := s.Image["id"].(string)
+ imageID, ok := s.Image["id"].(string)
if ok {
- labels[openstackLabelInstanceImage] = model.LabelValue(imageId)
+ labels[openstackLabelInstanceImage] = model.LabelValue(imageID)
}
for k, v := range s.Metadata {
diff --git a/discovery/openstack/instance_test.go b/discovery/openstack/instance_test.go
index d2da5d9681..9e124b6053 100644
--- a/discovery/openstack/instance_test.go
+++ b/discovery/openstack/instance_test.go
@@ -61,12 +61,12 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
tgs, err := instance.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 4, len(tg.Targets))
+ require.Len(t, tg.Targets, 4)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/openstack/openstack.go b/discovery/openstack/openstack.go
index 92c83a4cf4..9544a7c0f7 100644
--- a/discovery/openstack/openstack.go
+++ b/discovery/openstack/openstack.go
@@ -24,6 +24,7 @@ import (
"github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack"
"github.com/mwitkow/go-conntrack"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -70,7 +71,7 @@ func (*SDConfig) Name() string { return "openstack" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -134,16 +135,19 @@ type refresher interface {
}
// NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, l log.Logger) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, l log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, l)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
- l,
- "openstack",
- time.Duration(conf.RefreshInterval),
- r.refresh,
+ refresh.Options{
+ Logger: l,
+ Mech: "openstack",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: r.refresh,
+ Registry: reg,
+ },
), nil
}
diff --git a/discovery/ovhcloud/dedicated_server_test.go b/discovery/ovhcloud/dedicated_server_test.go
index e8ffa4a283..52311bcc87 100644
--- a/discovery/ovhcloud/dedicated_server_test.go
+++ b/discovery/ovhcloud/dedicated_server_test.go
@@ -47,11 +47,11 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr
targetGroups, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(targetGroups))
+ require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup)
require.NotNil(t, targetGroup.Targets)
- require.Equal(t, 1, len(targetGroup.Targets))
+ require.Len(t, targetGroup.Targets, 1)
for i, lbls := range []model.LabelSet{
{
diff --git a/discovery/ovhcloud/ovhcloud.go b/discovery/ovhcloud/ovhcloud.go
index 535ade4df5..eca284a85a 100644
--- a/discovery/ovhcloud/ovhcloud.go
+++ b/discovery/ovhcloud/ovhcloud.go
@@ -22,6 +22,7 @@ import (
"github.com/go-kit/log"
"github.com/ovh/go-ovh/ovh"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -93,7 +94,7 @@ func createClient(config *SDConfig) (*ovh.Client, error) {
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, options.Logger)
+ return NewDiscovery(c, options.Logger, options.Registerer)
}
func init() {
@@ -140,16 +141,19 @@ func newRefresher(conf *SDConfig, logger log.Logger) (refresher, error) {
}
// NewDiscovery returns a new OVHcloud Discoverer which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf, logger)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
- logger,
- "ovhcloud",
- time.Duration(conf.RefreshInterval),
- r.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "ovhcloud",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: r.refresh,
+ Registry: reg,
+ },
), nil
}
diff --git a/discovery/ovhcloud/ovhcloud_test.go b/discovery/ovhcloud/ovhcloud_test.go
index efcd95bb0d..9bd9ea9547 100644
--- a/discovery/ovhcloud/ovhcloud_test.go
+++ b/discovery/ovhcloud/ovhcloud_test.go
@@ -18,6 +18,7 @@ import (
"fmt"
"testing"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v2"
@@ -122,7 +123,8 @@ func TestDiscoverer(t *testing.T) {
conf, _ := getMockConf("vps")
logger := testutil.NewLogger(t)
_, err := conf.NewDiscoverer(discovery.DiscovererOptions{
- Logger: logger,
+ Logger: logger,
+ Registerer: prometheus.NewRegistry(),
})
require.NoError(t, err)
diff --git a/discovery/ovhcloud/vps_test.go b/discovery/ovhcloud/vps_test.go
index b1177f215e..2d2d6dcd21 100644
--- a/discovery/ovhcloud/vps_test.go
+++ b/discovery/ovhcloud/vps_test.go
@@ -49,11 +49,11 @@ consumer_key: %s`, mock.URL, ovhcloudApplicationKeyTest, ovhcloudApplicationSecr
targetGroups, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(targetGroups))
+ require.Len(t, targetGroups, 1)
targetGroup := targetGroups[0]
require.NotNil(t, targetGroup)
require.NotNil(t, targetGroup.Targets)
- require.Equal(t, 1, len(targetGroup.Targets))
+ require.Len(t, targetGroup.Targets, 1)
for i, lbls := range []model.LabelSet{
{
"__address__": "192.0.2.1",
diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go
index 9484a0aa63..616f2c61e6 100644
--- a/discovery/puppetdb/puppetdb.go
+++ b/discovery/puppetdb/puppetdb.go
@@ -29,6 +29,7 @@ import (
"github.com/go-kit/log"
"github.com/grafana/regexp"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@@ -83,7 +84,7 @@ func (*SDConfig) Name() string { return "puppetdb" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -130,7 +131,7 @@ type Discovery struct {
}
// NewDiscovery returns a new PuppetDB discovery for the given config.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
@@ -156,10 +157,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "http",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "http",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/puppetdb/puppetdb_test.go b/discovery/puppetdb/puppetdb_test.go
index 5514787d46..edd9b9d046 100644
--- a/discovery/puppetdb/puppetdb_test.go
+++ b/discovery/puppetdb/puppetdb_test.go
@@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -62,7 +63,7 @@ func TestPuppetSlashInURL(t *testing.T) {
Port: 80,
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
require.Equal(t, apiURL, d.url)
}
@@ -79,7 +80,7 @@ func TestPuppetDBRefresh(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
@@ -105,7 +106,7 @@ func TestPuppetDBRefresh(t *testing.T) {
Source: ts.URL + "/pdb/query/v4?query=vhosts",
},
}
- require.Equal(t, tgs, expectedTargets)
+ require.Equal(t, expectedTargets, tgs)
}
func TestPuppetDBRefreshWithParameters(t *testing.T) {
@@ -120,7 +121,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
@@ -156,7 +157,7 @@ func TestPuppetDBRefreshWithParameters(t *testing.T) {
Source: ts.URL + "/pdb/query/v4?query=vhosts",
},
}
- require.Equal(t, tgs, expectedTargets)
+ require.Equal(t, expectedTargets, tgs)
}
func TestPuppetDBInvalidCode(t *testing.T) {
@@ -172,7 +173,7 @@ func TestPuppetDBInvalidCode(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
@@ -193,7 +194,7 @@ func TestPuppetDBInvalidFormat(t *testing.T) {
RefreshInterval: model.Duration(30 * time.Second),
}
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
ctx := context.Background()
diff --git a/discovery/refresh/refresh.go b/discovery/refresh/refresh.go
index 919567a53b..0b0e5a921d 100644
--- a/discovery/refresh/refresh.go
+++ b/discovery/refresh/refresh.go
@@ -22,29 +22,17 @@ import (
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
-var (
- failuresCount = prometheus.NewCounterVec(
- prometheus.CounterOpts{
- Name: "prometheus_sd_refresh_failures_total",
- Help: "Number of refresh failures for the given SD mechanism.",
- },
- []string{"mechanism"},
- )
- duration = prometheus.NewSummaryVec(
- prometheus.SummaryOpts{
- Name: "prometheus_sd_refresh_duration_seconds",
- Help: "The duration of a refresh in seconds for the given SD mechanism.",
- Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
- },
- []string{"mechanism"},
- )
-)
-
-func init() {
- prometheus.MustRegister(duration, failuresCount)
+type Options struct {
+ Logger log.Logger
+ Mech string
+ Interval time.Duration
+ RefreshF func(ctx context.Context) ([]*targetgroup.Group, error)
+ Registry prometheus.Registerer
+ Metrics []prometheus.Collector
}
// Discovery implements the Discoverer interface.
@@ -54,25 +42,62 @@ type Discovery struct {
refreshf func(ctx context.Context) ([]*targetgroup.Group, error)
failures prometheus.Counter
- duration prometheus.Observer
+ duration prometheus.Summary
+
+ metricRegisterer discovery.MetricRegisterer
}
// NewDiscovery returns a Discoverer function that calls a refresh() function at every interval.
-func NewDiscovery(l log.Logger, mech string, interval time.Duration, refreshf func(ctx context.Context) ([]*targetgroup.Group, error)) *Discovery {
- if l == nil {
- l = log.NewNopLogger()
+func NewDiscovery(opts Options) *Discovery {
+ var logger log.Logger
+ if opts.Logger == nil {
+ logger = log.NewNopLogger()
+ } else {
+ logger = opts.Logger
}
- return &Discovery{
- logger: l,
- interval: interval,
- refreshf: refreshf,
- failures: failuresCount.WithLabelValues(mech),
- duration: duration.WithLabelValues(mech),
+
+ d := Discovery{
+ logger: logger,
+ interval: opts.Interval,
+ refreshf: opts.RefreshF,
+ failures: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Name: "prometheus_sd_refresh_failures_total",
+ Help: "Number of refresh failures for the given SD mechanism.",
+ ConstLabels: prometheus.Labels{
+ "mechanism": opts.Mech,
+ },
+ }),
+ duration: prometheus.NewSummary(
+ prometheus.SummaryOpts{
+ Name: "prometheus_sd_refresh_duration_seconds",
+ Help: "The duration of a refresh in seconds for the given SD mechanism.",
+ Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
+ ConstLabels: prometheus.Labels{
+ "mechanism": opts.Mech,
+ },
+ }),
}
+
+ metrics := []prometheus.Collector{d.failures, d.duration}
+ if opts.Metrics != nil {
+ metrics = append(metrics, opts.Metrics...)
+ }
+
+ d.metricRegisterer = discovery.NewMetricRegisterer(opts.Registry, metrics)
+
+ return &d
}
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
+ err := d.metricRegisterer.RegisterMetrics()
+ if err != nil {
+ level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
+ return
+ }
+ defer d.metricRegisterer.UnregisterMetrics()
+
// Get an initial set right away.
tgs, err := d.refresh(ctx)
if err != nil {
diff --git a/discovery/refresh/refresh_test.go b/discovery/refresh/refresh_test.go
index 6decef19fc..12e7ab3be0 100644
--- a/discovery/refresh/refresh_test.go
+++ b/discovery/refresh/refresh_test.go
@@ -19,6 +19,7 @@ import (
"testing"
"time"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@@ -65,7 +66,15 @@ func TestRefresh(t *testing.T) {
return nil, fmt.Errorf("some error")
}
interval := time.Millisecond
- d := NewDiscovery(nil, "test", interval, refresh)
+ d := NewDiscovery(
+ Options{
+ Logger: nil,
+ Mech: "test",
+ Interval: interval,
+ RefreshF: refresh,
+ Registry: prometheus.NewRegistry(),
+ },
+ )
ch := make(chan []*targetgroup.Group)
ctx, cancel := context.WithCancel(context.Background())
diff --git a/discovery/scaleway/instance_test.go b/discovery/scaleway/instance_test.go
index e7a32dd924..d2449d00c9 100644
--- a/discovery/scaleway/instance_test.go
+++ b/discovery/scaleway/instance_test.go
@@ -55,12 +55,12 @@ api_url: %s
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 2, len(tg.Targets))
+ require.Len(t, tg.Targets, 2)
for i, lbls := range []model.LabelSet{
{
@@ -161,5 +161,5 @@ api_url: %s
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
}
diff --git a/discovery/scaleway/scaleway.go b/discovery/scaleway/scaleway.go
index 90091b3172..86527b34e6 100644
--- a/discovery/scaleway/scaleway.go
+++ b/discovery/scaleway/scaleway.go
@@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/scaleway/scaleway-sdk-go/scw"
@@ -160,7 +161,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
func (c SDConfig) NewDiscoverer(options discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(&c, options.Logger)
+ return NewDiscovery(&c, options.Logger, options.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -177,17 +178,20 @@ func init() {
// the Discoverer interface.
type Discovery struct{}
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*refresh.Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*refresh.Discovery, error) {
r, err := newRefresher(conf)
if err != nil {
return nil, err
}
return refresh.NewDiscovery(
- logger,
- "scaleway",
- time.Duration(conf.RefreshInterval),
- r.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "scaleway",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: r.refresh,
+ Registry: reg,
+ },
), nil
}
diff --git a/discovery/triton/triton.go b/discovery/triton/triton.go
index c83f3b34ab..4839827ada 100644
--- a/discovery/triton/triton.go
+++ b/discovery/triton/triton.go
@@ -26,6 +26,7 @@ import (
"github.com/go-kit/log"
"github.com/mwitkow/go-conntrack"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -74,7 +75,7 @@ func (*SDConfig) Name() string { return "triton" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return New(opts.Logger, c)
+ return New(opts.Logger, c, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -138,7 +139,7 @@ type Discovery struct {
}
// New returns a new Discovery which periodically refreshes its targets.
-func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
+func New(logger log.Logger, conf *SDConfig, reg prometheus.Registerer) (*Discovery, error) {
tls, err := config.NewTLSConfig(&conf.TLSConfig)
if err != nil {
return nil, err
@@ -159,10 +160,13 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
sdConfig: conf,
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "triton",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "triton",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/triton/triton_test.go b/discovery/triton/triton_test.go
index ca38965322..fa51a2e472 100644
--- a/discovery/triton/triton_test.go
+++ b/discovery/triton/triton_test.go
@@ -24,6 +24,7 @@ import (
"strings"
"testing"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@@ -79,7 +80,7 @@ var (
)
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
- return New(nil, &c)
+ return New(nil, &c, prometheus.NewRegistry())
}
func TestTritonSDNew(t *testing.T) {
@@ -155,7 +156,7 @@ func TestTritonSDRefreshMultipleTargets(t *testing.T) {
tgts := testTritonSDRefresh(t, conf, dstr)
require.NotNil(t, tgts)
- require.Equal(t, 2, len(tgts))
+ require.Len(t, tgts, 2)
}
func TestTritonSDRefreshNoServer(t *testing.T) {
@@ -163,7 +164,7 @@ func TestTritonSDRefreshNoServer(t *testing.T) {
_, err := td.refresh(context.Background())
require.Error(t, err)
- require.Equal(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint"), true)
+ require.True(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint"))
}
func TestTritonSDRefreshCancelled(t *testing.T) {
@@ -173,7 +174,7 @@ func TestTritonSDRefreshCancelled(t *testing.T) {
cancel()
_, err := td.refresh(ctx)
require.Error(t, err)
- require.Equal(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
+ require.True(t, strings.Contains(err.Error(), context.Canceled.Error()))
}
func TestTritonSDRefreshCNsUUIDOnly(t *testing.T) {
@@ -188,7 +189,7 @@ func TestTritonSDRefreshCNsUUIDOnly(t *testing.T) {
tgts := testTritonSDRefresh(t, cnconf, dstr)
require.NotNil(t, tgts)
- require.Equal(t, 2, len(tgts))
+ require.Len(t, tgts, 2)
}
func TestTritonSDRefreshCNsWithHostname(t *testing.T) {
@@ -205,7 +206,7 @@ func TestTritonSDRefreshCNsWithHostname(t *testing.T) {
tgts := testTritonSDRefresh(t, cnconf, dstr)
require.NotNil(t, tgts)
- require.Equal(t, 2, len(tgts))
+ require.Len(t, tgts, 2)
}
func testTritonSDRefresh(t *testing.T, c SDConfig, dstr string) []model.LabelSet {
@@ -235,7 +236,7 @@ func testTritonSDRefresh(t *testing.T, c SDConfig, dstr string) []model.LabelSet
tgs, err := td.refresh(context.Background())
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
diff --git a/discovery/util.go b/discovery/util.go
new file mode 100644
index 0000000000..83cc640dd9
--- /dev/null
+++ b/discovery/util.go
@@ -0,0 +1,72 @@
+// Copyright 2020 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package discovery
+
+import (
+ "fmt"
+
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+// A utility to be used by implementations of discovery.Discoverer
+// which need to manage the lifetime of their metrics.
+type MetricRegisterer interface {
+ RegisterMetrics() error
+ UnregisterMetrics()
+}
+
+// metricRegistererImpl is an implementation of MetricRegisterer.
+type metricRegistererImpl struct {
+ reg prometheus.Registerer
+ metrics []prometheus.Collector
+}
+
+var _ MetricRegisterer = &metricRegistererImpl{}
+
+// Creates an instance of a MetricRegisterer.
+// Typically called inside the implementation of the NewDiscoverer() method.
+func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer {
+ return &metricRegistererImpl{
+ reg: reg,
+ metrics: metrics,
+ }
+}
+
+// RegisterMetrics registers the metrics with a Prometheus registerer.
+// If any metric fails to register, it will unregister all metrics that
+// were registered so far, and return an error.
+// Typically called at the start of the SD's Run() method.
+func (rh *metricRegistererImpl) RegisterMetrics() error {
+ for _, collector := range rh.metrics {
+ err := rh.reg.Register(collector)
+ if err != nil {
+ // Unregister all metrics that were registered so far.
+ // This is so that if RegisterMetrics() gets called again,
+ // there will not be an error due to a duplicate registration.
+ rh.UnregisterMetrics()
+
+ return fmt.Errorf("failed to register metric: %w", err)
+ }
+ }
+ return nil
+}
+
+// UnregisterMetrics unregisters the metrics from the same Prometheus
+// registerer which was used to register them.
+// Typically called at the end of the SD's Run() method by a defer statement.
+func (rh *metricRegistererImpl) UnregisterMetrics() {
+ for _, collector := range rh.metrics {
+ rh.reg.Unregister(collector)
+ }
+}
diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go
index bc33d28cba..744f3f96cf 100644
--- a/discovery/uyuni/uyuni.go
+++ b/discovery/uyuni/uyuni.go
@@ -25,6 +25,7 @@ import (
"github.com/go-kit/log"
"github.com/kolo/xmlrpc"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@@ -115,7 +116,7 @@ func (*SDConfig) Name() string { return "uyuni" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -203,7 +204,7 @@ func getEndpointInfoForSystems(
}
// NewDiscovery returns a uyuni discovery for the given configuration.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
apiURL, err := url.Parse(conf.Server)
if err != nil {
return nil, err
@@ -227,10 +228,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "uyuni",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "uyuni",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/uyuni/uyuni_test.go b/discovery/uyuni/uyuni_test.go
index d045cde6d7..fd03c88f1b 100644
--- a/discovery/uyuni/uyuni_test.go
+++ b/discovery/uyuni/uyuni_test.go
@@ -23,6 +23,8 @@ import (
"github.com/stretchr/testify/require"
+ "github.com/prometheus/client_golang/prometheus"
+
"github.com/prometheus/prometheus/discovery/targetgroup"
)
@@ -35,7 +37,7 @@ func testUpdateServices(respHandler http.HandlerFunc) ([]*targetgroup.Group, err
Server: ts.URL,
}
- md, err := NewDiscovery(&conf, nil)
+ md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@@ -55,7 +57,7 @@ func TestUyuniSDHandleError(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.EqualError(t, err, errTesting)
- require.Equal(t, len(tgs), 0)
+ require.Empty(t, tgs)
}
func TestUyuniSDLogin(t *testing.T) {
@@ -87,7 +89,7 @@ func TestUyuniSDLogin(t *testing.T) {
tgs, err := testUpdateServices(respHandler)
require.EqualError(t, err, errTesting)
- require.Equal(t, len(tgs), 0)
+ require.Empty(t, tgs)
}
func TestUyuniSDSkipLogin(t *testing.T) {
@@ -108,7 +110,7 @@ func TestUyuniSDSkipLogin(t *testing.T) {
Server: ts.URL,
}
- md, err := NewDiscovery(&conf, nil)
+ md, err := NewDiscovery(&conf, nil, prometheus.NewRegistry())
if err != nil {
t.Error(err)
}
@@ -119,5 +121,5 @@ func TestUyuniSDSkipLogin(t *testing.T) {
tgs, err := md.refresh(context.Background())
require.EqualError(t, err, errTesting)
- require.Equal(t, len(tgs), 0)
+ require.Empty(t, tgs)
}
diff --git a/discovery/vultr/vultr.go b/discovery/vultr/vultr.go
index 42881d3c19..129800048a 100644
--- a/discovery/vultr/vultr.go
+++ b/discovery/vultr/vultr.go
@@ -23,6 +23,7 @@ import (
"time"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@@ -78,7 +79,7 @@ func (*SDConfig) Name() string { return "vultr" }
// NewDiscoverer returns a Discoverer for the Config.
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
- return NewDiscovery(c, opts.Logger)
+ return NewDiscovery(c, opts.Logger, opts.Registerer)
}
// SetDirectory joins any relative file paths with dir.
@@ -106,7 +107,7 @@ type Discovery struct {
}
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
-func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
+func NewDiscovery(conf *SDConfig, logger log.Logger, reg prometheus.Registerer) (*Discovery, error) {
d := &Discovery{
port: conf.Port,
}
@@ -128,10 +129,13 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
}
d.Discovery = refresh.NewDiscovery(
- logger,
- "vultr",
- time.Duration(conf.RefreshInterval),
- d.refresh,
+ refresh.Options{
+ Logger: logger,
+ Mech: "vultr",
+ Interval: time.Duration(conf.RefreshInterval),
+ RefreshF: d.refresh,
+ Registry: reg,
+ },
)
return d, nil
}
diff --git a/discovery/vultr/vultr_test.go b/discovery/vultr/vultr_test.go
index b729541531..c50b11d2da 100644
--- a/discovery/vultr/vultr_test.go
+++ b/discovery/vultr/vultr_test.go
@@ -20,6 +20,7 @@ import (
"testing"
"github.com/go-kit/log"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
)
@@ -46,7 +47,7 @@ func TestVultrSDRefresh(t *testing.T) {
cfg := DefaultSDConfig
cfg.HTTPClientConfig.BearerToken = APIKey
- d, err := NewDiscovery(&cfg, log.NewNopLogger())
+ d, err := NewDiscovery(&cfg, log.NewNopLogger(), prometheus.NewRegistry())
require.NoError(t, err)
endpoint, err := url.Parse(sdMock.Mock.Endpoint())
require.NoError(t, err)
@@ -56,12 +57,12 @@ func TestVultrSDRefresh(t *testing.T) {
tgs, err := d.refresh(ctx)
require.NoError(t, err)
- require.Equal(t, 1, len(tgs))
+ require.Len(t, tgs, 1)
tg := tgs[0]
require.NotNil(t, tg)
require.NotNil(t, tg.Targets)
- require.Equal(t, 3, len(tg.Targets))
+ require.Len(t, tg.Targets, 3)
for i, k := range []model.LabelSet{
{
diff --git a/discovery/xds/client_test.go b/discovery/xds/client_test.go
index ff5217359c..b699995fb7 100644
--- a/discovery/xds/client_test.go
+++ b/discovery/xds/client_test.go
@@ -53,14 +53,14 @@ func TestMakeXDSResourceHttpEndpointEmptyServerURLScheme(t *testing.T) {
require.Empty(t, endpointURL)
require.Error(t, err)
- require.Equal(t, err.Error(), "invalid xDS server URL")
+ require.Equal(t, "invalid xDS server URL", err.Error())
}
func TestMakeXDSResourceHttpEndpointEmptyServerURLHost(t *testing.T) {
endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("grpc://127.0.0.1"), "monitoring")
require.Empty(t, endpointURL)
- require.NotNil(t, err)
+ require.Error(t, err)
require.Contains(t, err.Error(), "must be either 'http' or 'https'")
}
@@ -68,7 +68,7 @@ func TestMakeXDSResourceHttpEndpoint(t *testing.T) {
endpointURL, err := makeXDSResourceHTTPEndpointURL(ProtocolV3, urlMustParse("http://127.0.0.1:5000"), "monitoring")
require.NoError(t, err)
- require.Equal(t, endpointURL.String(), "http://127.0.0.1:5000/v3/discovery:monitoring")
+ require.Equal(t, "http://127.0.0.1:5000/v3/discovery:monitoring", endpointURL.String())
}
func TestCreateNewHTTPResourceClient(t *testing.T) {
@@ -89,8 +89,8 @@ func TestCreateNewHTTPResourceClient(t *testing.T) {
require.NoError(t, err)
- require.Equal(t, client.endpoint, "http://127.0.0.1:5000/v3/discovery:monitoring?param1=v1")
- require.Equal(t, client.client.Timeout, 1*time.Minute)
+ require.Equal(t, "http://127.0.0.1:5000/v3/discovery:monitoring?param1=v1", client.endpoint)
+ require.Equal(t, 1*time.Minute, client.client.Timeout)
}
func createTestHTTPResourceClient(t *testing.T, conf *HTTPResourceClientConfig, protocolVersion ProtocolVersion, responder discoveryResponder) (*HTTPResourceClient, func()) {
@@ -138,7 +138,7 @@ func TestHTTPResourceClientFetchFullResponse(t *testing.T) {
require.NotNil(t, res)
require.Equal(t, client.ResourceTypeURL(), res.TypeUrl)
- require.Len(t, res.Resources, 0)
+ require.Empty(t, res.Resources)
require.Equal(t, "abc", client.latestNonce, "Nonce not cached")
require.Equal(t, "1", client.latestVersion, "Version not cached")
diff --git a/discovery/xds/kuma.go b/discovery/xds/kuma.go
index bc88ba5540..c74bc552c7 100644
--- a/discovery/xds/kuma.go
+++ b/discovery/xds/kuma.go
@@ -30,35 +30,12 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
-var (
- // DefaultKumaSDConfig is the default Kuma MADS SD configuration.
- DefaultKumaSDConfig = KumaSDConfig{
- HTTPClientConfig: config.DefaultHTTPClientConfig,
- RefreshInterval: model.Duration(15 * time.Second),
- FetchTimeout: model.Duration(2 * time.Minute),
- }
-
- kumaFetchFailuresCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Namespace: namespace,
- Name: "sd_kuma_fetch_failures_total",
- Help: "The number of Kuma MADS fetch call failures.",
- })
- kumaFetchSkipUpdateCount = prometheus.NewCounter(
- prometheus.CounterOpts{
- Namespace: namespace,
- Name: "sd_kuma_fetch_skipped_updates_total",
- Help: "The number of Kuma MADS fetch calls that result in no updates to the targets.",
- })
- kumaFetchDuration = prometheus.NewSummary(
- prometheus.SummaryOpts{
- Namespace: namespace,
- Name: "sd_kuma_fetch_duration_seconds",
- Help: "The duration of a Kuma MADS fetch call.",
- Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
- },
- )
-)
+// DefaultKumaSDConfig is the default Kuma MADS SD configuration.
+var DefaultKumaSDConfig = KumaSDConfig{
+ HTTPClientConfig: config.DefaultHTTPClientConfig,
+ RefreshInterval: model.Duration(15 * time.Second),
+ FetchTimeout: model.Duration(2 * time.Minute),
+}
const (
// kumaMetaLabelPrefix is the meta prefix used for all kuma meta labels.
@@ -120,7 +97,7 @@ func (c *KumaSDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discover
logger = log.NewNopLogger()
}
- return NewKumaHTTPDiscovery(c, logger)
+ return NewKumaHTTPDiscovery(c, logger, opts.Registerer)
}
func convertKumaV1MonitoringAssignment(assignment *MonitoringAssignment) []model.LabelSet {
@@ -176,12 +153,16 @@ func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.L
return targets, nil
}
-func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Discoverer, error) {
+func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger, reg prometheus.Registerer) (discovery.Discoverer, error) {
// Default to "prometheus" if hostname is unavailable.
- clientID, err := osutil.GetFQDN()
- if err != nil {
- level.Debug(logger).Log("msg", "error getting FQDN", "err", err)
- clientID = "prometheus"
+ clientID := conf.ClientID
+ if clientID == "" {
+ var err error
+ clientID, err = osutil.GetFQDN()
+ if err != nil {
+ level.Debug(logger).Log("msg", "error getting FQDN", "err", err)
+ clientID = "prometheus"
+ }
}
clientConfig := &HTTPResourceClientConfig{
@@ -203,15 +184,41 @@ func NewKumaHTTPDiscovery(conf *KumaSDConfig, logger log.Logger) (discovery.Disc
}
d := &fetchDiscovery{
- client: client,
- logger: logger,
- refreshInterval: time.Duration(conf.RefreshInterval),
- source: "kuma",
- parseResources: kumaMadsV1ResourceParser,
- fetchFailuresCount: kumaFetchFailuresCount,
- fetchSkipUpdateCount: kumaFetchSkipUpdateCount,
- fetchDuration: kumaFetchDuration,
+ client: client,
+ logger: logger,
+ refreshInterval: time.Duration(conf.RefreshInterval),
+ source: "kuma",
+ parseResources: kumaMadsV1ResourceParser,
+ fetchFailuresCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: namespace,
+ Name: "sd_kuma_fetch_failures_total",
+ Help: "The number of Kuma MADS fetch call failures.",
+ }),
+ fetchSkipUpdateCount: prometheus.NewCounter(
+ prometheus.CounterOpts{
+ Namespace: namespace,
+ Name: "sd_kuma_fetch_skipped_updates_total",
+ Help: "The number of Kuma MADS fetch calls that result in no updates to the targets.",
+ }),
+ fetchDuration: prometheus.NewSummary(
+ prometheus.SummaryOpts{
+ Namespace: namespace,
+ Name: "sd_kuma_fetch_duration_seconds",
+ Help: "The duration of a Kuma MADS fetch call.",
+ Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
+ },
+ ),
}
+ d.metricRegisterer = discovery.NewMetricRegisterer(
+ reg,
+ []prometheus.Collector{
+ d.fetchFailuresCount,
+ d.fetchSkipUpdateCount,
+ d.fetchDuration,
+ },
+ )
+
return d, nil
}
diff --git a/discovery/xds/kuma_test.go b/discovery/xds/kuma_test.go
index 1db0a0831d..5e2417c96f 100644
--- a/discovery/xds/kuma_test.go
+++ b/discovery/xds/kuma_test.go
@@ -21,6 +21,7 @@ import (
"time"
v3 "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"google.golang.org/protobuf/proto"
@@ -107,7 +108,7 @@ func getKumaMadsV1DiscoveryResponse(resources ...*MonitoringAssignment) (*v3.Dis
}
func newKumaTestHTTPDiscovery(c KumaSDConfig) (*fetchDiscovery, error) {
- kd, err := NewKumaHTTPDiscovery(&c, nopLogger)
+ kd, err := NewKumaHTTPDiscovery(&c, nopLogger, prometheus.NewRegistry())
if err != nil {
return nil, err
}
@@ -129,7 +130,7 @@ func TestKumaMadsV1ResourceParserInvalidTypeURL(t *testing.T) {
func TestKumaMadsV1ResourceParserEmptySlice(t *testing.T) {
resources := make([]*anypb.Any, 0)
groups, err := kumaMadsV1ResourceParser(resources, KumaMadsV1ResourceTypeURL)
- require.Len(t, groups, 0)
+ require.Empty(t, groups)
require.NoError(t, err)
}
@@ -204,7 +205,7 @@ func TestNewKumaHTTPDiscovery(t *testing.T) {
require.True(t, ok)
require.Equal(t, kumaConf.Server, resClient.Server())
require.Equal(t, KumaMadsV1ResourceTypeURL, resClient.ResourceTypeURL())
- require.NotEmpty(t, resClient.ID())
+ require.Equal(t, kumaConf.ClientID, resClient.ID())
require.Equal(t, KumaMadsV1ResourceType, resClient.config.ResourceType)
}
diff --git a/discovery/xds/xds.go b/discovery/xds/xds.go
index 48bdbab02b..8b6cb7e65e 100644
--- a/discovery/xds/xds.go
+++ b/discovery/xds/xds.go
@@ -55,6 +55,7 @@ type SDConfig struct {
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
FetchTimeout model.Duration `yaml:"fetch_timeout,omitempty"`
Server string `yaml:"server,omitempty"`
+ ClientID string `yaml:"client_id,omitempty"`
}
// mustRegisterMessage registers the provided message type in the typeRegistry, and panics
@@ -69,9 +70,6 @@ func init() {
// Register top-level SD Configs.
discovery.RegisterConfig(&KumaSDConfig{})
- // Register metrics.
- prometheus.MustRegister(kumaFetchDuration, kumaFetchSkipUpdateCount, kumaFetchFailuresCount)
-
// Register protobuf types that need to be marshalled/ unmarshalled.
mustRegisterMessage(protoTypes, (&v3.DiscoveryRequest{}).ProtoReflect().Type())
mustRegisterMessage(protoTypes, (&v3.DiscoveryResponse{}).ProtoReflect().Type())
@@ -109,12 +107,20 @@ type fetchDiscovery struct {
parseResources resourceParser
logger log.Logger
- fetchDuration prometheus.Observer
+ fetchDuration prometheus.Summary
fetchSkipUpdateCount prometheus.Counter
fetchFailuresCount prometheus.Counter
+
+ metricRegisterer discovery.MetricRegisterer
}
func (d *fetchDiscovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
+ err := d.metricRegisterer.RegisterMetrics()
+ if err != nil {
+ level.Error(d.logger).Log("msg", "Unable to register metrics", "err", err.Error())
+ return
+ }
+ defer d.metricRegisterer.UnregisterMetrics()
defer d.client.Close()
ticker := time.NewTicker(d.refreshInterval)
diff --git a/discovery/xds/xds_test.go b/discovery/xds/xds_test.go
index 974a47342f..f57fff9968 100644
--- a/discovery/xds/xds_test.go
+++ b/discovery/xds/xds_test.go
@@ -36,6 +36,7 @@ var (
sdConf = SDConfig{
Server: "http://127.0.0.1",
RefreshInterval: model.Duration(10 * time.Second),
+ ClientID: "test-id",
}
testFetchFailuresCount = prometheus.NewCounter(
diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md
index a960350ec0..9b0fc7cc60 100644
--- a/docs/command-line/promtool.md
+++ b/docs/command-line/promtool.md
@@ -429,6 +429,15 @@ Unit tests for rules.
+###### Flags
+
+| Flag | Description |
+| --- | --- |
+| --run
| If set, will only run test groups whose names match the regular expression. Can be specified multiple times. |
+
+
+
+
###### Arguments
| Argument | Description | Required |
diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md
index c129c7e66c..5e2f31c1c6 100644
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@@ -1126,7 +1126,7 @@ A DNS-based service discovery configuration allows specifying a set of DNS
domain names which are periodically queried to discover a list of targets. The
DNS servers to be contacted are read from `/etc/resolv.conf`.
-This service discovery method only supports basic DNS A, AAAA, MX and SRV
+This service discovery method only supports basic DNS A, AAAA, MX, NS and SRV
record queries, but not the advanced DNS-SD approach specified in
[RFC6763](https://tools.ietf.org/html/rfc6763).
@@ -1136,13 +1136,14 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_dns_srv_record_target`: the target field of the SRV record
* `__meta_dns_srv_record_port`: the port field of the SRV record
* `__meta_dns_mx_record_target`: the target field of the MX record
+* `__meta_dns_ns_record_target`: the target field of the NS record
```yaml
# A list of DNS domain names to be queried.
names:
[ - ]
-# The type of DNS query to perform. One of SRV, A, AAAA or MX.
+# The type of DNS query to perform. One of SRV, A, AAAA, MX or NS.
[ type: | default = 'SRV' ]
# The port number used if the query type is not SRV.
@@ -2229,6 +2230,11 @@ See below for the configuration options for Kuma MonitoringAssignment discovery:
# Address of the Kuma Control Plane's MADS xDS server.
server:
+# Client id is used by Kuma Control Plane to compute Monitoring Assignment for specific Prometheus backend.
+# This is useful when migrating between multiple Prometheus backends, or having separate backend for each Mesh.
+# When not specified, system hostname/fqdn will be used if available, if not `prometheus` will be used.
+[ client_id: ]
+
# The time to wait between polling update requests.
[ refresh_interval: | default = 30s ]
diff --git a/docs/feature_flags.md b/docs/feature_flags.md
index d57763af0b..bcf8309b5c 100644
--- a/docs/feature_flags.md
+++ b/docs/feature_flags.md
@@ -119,20 +119,19 @@ also experimental) protobuf parser, through which _all_ metrics are ingested
(i.e. not only native histograms). Prometheus will try to negotiate the
protobuf format first. The instrumented target needs to support the protobuf
format, too, _and_ it needs to expose native histograms. The protobuf format
-allows to expose conventional and native histograms side by side. With this
-feature flag disabled, Prometheus will continue to parse the conventional
-histogram (albeit via the text format). With this flag enabled, Prometheus will
-still ingest those conventional histograms that do not come with a
-corresponding native histogram. However, if a native histogram is present,
-Prometheus will ignore the corresponding conventional histogram, with the
-notable exception of exemplars, which are always ingested. To keep the
-conventional histograms as well, enable `scrape_classic_histograms` in the
-scrape job.
+allows to expose classic and native histograms side by side. With this feature
+flag disabled, Prometheus will continue to parse the classic histogram (albeit
+via the text format). With this flag enabled, Prometheus will still ingest
+those classic histograms that do not come with a corresponding native
+histogram. However, if a native histogram is present, Prometheus will ignore
+the corresponding classic histogram, with the notable exception of exemplars,
+which are always ingested. To keep the classic histograms as well, enable
+`scrape_classic_histograms` in the scrape job.
_Note about the format of `le` and `quantile` label values:_
In certain situations, the protobuf parsing changes the number formatting of
-the `le` labels of conventional histograms and the `quantile` labels of
+the `le` labels of classic histograms and the `quantile` labels of
summaries. Typically, this happens if the scraped target is instrumented with
[client_golang](https://github.com/prometheus/client_golang) provided that
[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts)
@@ -195,3 +194,13 @@ won't work when you push OTLP metrics.
Enables PromQLÂ functions that are considered experimental and whose name or
semantics could change.
+
+## Created Timestamps Zero Injection
+
+`--enable-feature=created-timestamp-zero-ingestion`
+
+Enables ingestion of created timestamp. Created timestamps are injected as 0 valued samples when appropriate. See [PromCon talk](https://youtu.be/nWf0BfQ5EEA) for details.
+
+Currently Prometheus supports created timestamps only on the traditional Prometheus Protobuf protocol (WIP for other protocols). As a result, when enabling this feature, the Prometheus protobuf scrape protocol will be prioritized (See `scrape_config.scrape_protocols` settings for more details).
+
+Besides enabling this feature in Prometheus, created timestamps need to be exposed by the application being scraped.
diff --git a/docs/querying/functions.md b/docs/querying/functions.md
index 00afa1d223..dda88fccd1 100644
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@@ -238,23 +238,23 @@ boundaries are inclusive or exclusive.
## `histogram_quantile()`
`histogram_quantile(φ scalar, b instant-vector)` calculates the φ-quantile (0 ≤
-φ ≤ 1) from a [conventional
+φ ≤ 1) from a [classic
histogram](https://prometheus.io/docs/concepts/metric_types/#histogram) or from
a native histogram. (See [histograms and
summaries](https://prometheus.io/docs/practices/histograms) for a detailed
-explanation of φ-quantiles and the usage of the (conventional) histogram metric
+explanation of φ-quantiles and the usage of the (classic) histogram metric
type in general.)
_Note that native histograms are an experimental feature. The behavior of this
function when dealing with native histograms may change in future versions of
Prometheus._
-The conventional float samples in `b` are considered the counts of observations
-in each bucket of one or more conventional histograms. Each float sample must
-have a label `le` where the label value denotes the inclusive upper bound of
-the bucket. (Float samples without such a label are silently ignored.) The
-other labels and the metric name are used to identify the buckets belonging to
-each conventional histogram. The [histogram metric
+The float samples in `b` are considered the counts of observations in each
+bucket of one or more classic histograms. Each float sample must have a label
+`le` where the label value denotes the inclusive upper bound of the bucket.
+(Float samples without such a label are silently ignored.) The other labels and
+the metric name are used to identify the buckets belonging to each classic
+histogram. The [histogram metric
type](https://prometheus.io/docs/concepts/metric_types/#histogram)
automatically provides time series with the `_bucket` suffix and the
appropriate labels.
@@ -262,17 +262,17 @@ appropriate labels.
The native histogram samples in `b` are treated each individually as a separate
histogram to calculate the quantile from.
-As long as no naming collisions arise, `b` may contain a mix of conventional
+As long as no naming collisions arise, `b` may contain a mix of classic
and native histograms.
Use the `rate()` function to specify the time window for the quantile
calculation.
Example: A histogram metric is called `http_request_duration_seconds` (and
-therefore the metric name for the buckets of a conventional histogram is
+therefore the metric name for the buckets of a classic histogram is
`http_request_duration_seconds_bucket`). To calculate the 90th percentile of request
durations over the last 10m, use the following expression in case
-`http_request_duration_seconds` is a conventional histogram:
+`http_request_duration_seconds` is a classic histogram:
histogram_quantile(0.9, rate(http_request_duration_seconds_bucket[10m]))
@@ -283,9 +283,9 @@ For a native histogram, use the following expression instead:
The quantile is calculated for each label combination in
`http_request_duration_seconds`. To aggregate, use the `sum()` aggregator
around the `rate()` function. Since the `le` label is required by
-`histogram_quantile()` to deal with conventional histograms, it has to be
+`histogram_quantile()` to deal with classic histograms, it has to be
included in the `by` clause. The following expression aggregates the 90th
-percentile by `job` for conventional histograms:
+percentile by `job` for classic histograms:
histogram_quantile(0.9, sum by (job, le) (rate(http_request_duration_seconds_bucket[10m])))
@@ -293,7 +293,7 @@ When aggregating native histograms, the expression simplifies to:
histogram_quantile(0.9, sum by (job) (rate(http_request_duration_seconds[10m])))
-To aggregate all conventional histograms, specify only the `le` label:
+To aggregate all classic histograms, specify only the `le` label:
histogram_quantile(0.9, sum by (le) (rate(http_request_duration_seconds_bucket[10m])))
@@ -307,7 +307,7 @@ assuming a linear distribution within a bucket.
If `b` has 0 observations, `NaN` is returned. For φ < 0, `-Inf` is
returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned.
-The following is only relevant for conventional histograms: If `b` contains
+The following is only relevant for classic histograms: If `b` contains
fewer than two buckets, `NaN` is returned. The highest bucket must have an
upper bound of `+Inf`. (Otherwise, `NaN` is returned.) If a quantile is located
in the highest bucket, the upper bound of the second highest bucket is
@@ -586,6 +586,22 @@ in ascending order. Native histograms are sorted by their sum of observations.
Same as `sort`, but sorts in descending order.
+## `sort_by_label()`
+
+**This function has to be enabled via the [feature flag](../feature_flags/) `--enable-feature=promql-experimental-functions`.**
+
+`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by their label values and sample value in case of label values being equal, in ascending order.
+
+Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.
+
+## `sort_by_label_desc()`
+
+**This function has to be enabled via the [feature flag](../feature_flags/) `--enable-feature=promql-experimental-functions`.**
+
+Same as `sort_by_label`, but sorts in descending order.
+
+Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.
+
## `sqrt()`
`sqrt(v instant-vector)` calculates the square root of all elements in `v`.
@@ -624,6 +640,7 @@ over time and return an instant vector with per-series aggregation results:
* `quantile_over_time(scalar, range-vector)`: the φ-quantile (0 ≤ φ ≤ 1) of the values in the specified interval.
* `stddev_over_time(range-vector)`: the population standard deviation of the values in the specified interval.
* `stdvar_over_time(range-vector)`: the population standard variance of the values in the specified interval.
+* `mad_over_time(range-vector)`: the median absolute deviation of all points in the specified interval.
* `last_over_time(range-vector)`: the most recent point value in the specified interval.
* `present_over_time(range-vector)`: the value 1 for any series in the specified interval.
diff --git a/docs/storage.md b/docs/storage.md
index bcb8f7853e..b4c5b6adad 100644
--- a/docs/storage.md
+++ b/docs/storage.md
@@ -17,9 +17,9 @@ Ingested samples are grouped into blocks of two hours. Each two-hour block consi
of a directory containing a chunks subdirectory containing all the time series samples
for that window of time, a metadata file, and an index file (which indexes metric names
and labels to time series in the chunks directory). The samples in the chunks directory
-are grouped together into one or more segment files of up to 512MB each by default. When series are
-deleted via the API, deletion records are stored in separate tombstone files (instead
-of deleting the data immediately from the chunk segments).
+are grouped together into one or more segment files of up to 512MB each by default. When
+series are deleted via the API, deletion records are stored in separate tombstone files
+(instead of deleting the data immediately from the chunk segments).
The current block for incoming samples is kept in memory and is not fully
persisted. It is secured against crashes by a write-ahead log (WAL) that can be
@@ -58,15 +58,17 @@ A Prometheus server's data directory looks something like this:
  └── 00000000
```
-
Note that a limitation of local storage is that it is not clustered or
replicated. Thus, it is not arbitrarily scalable or durable in the face of
drive or node outages and should be managed like any other single node
-database. The use of RAID is suggested for storage availability, and [snapshots](querying/api.md#snapshot)
-are recommended for backups. With proper
+database. The use of RAID is suggested for storage availability, and
+[snapshots](querying/api.md#snapshot) are recommended for backups. With proper
architecture, it is possible to retain years of data in local storage.
-Alternatively, external storage may be used via the [remote read/write APIs](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage). Careful evaluation is required for these systems as they vary greatly in durability, performance, and efficiency.
+Alternatively, external storage may be used via the
+[remote read/write APIs](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
+Careful evaluation is required for these systems as they vary greatly in durability,
+performance, and efficiency.
For further details on file format, see [TSDB format](/tsdb/docs/format/README.md).
@@ -74,40 +76,61 @@ For further details on file format, see [TSDB format](/tsdb/docs/format/README.m
The initial two-hour blocks are eventually compacted into longer blocks in the background.
-Compaction will create larger blocks containing data spanning up to 10% of the retention time, or 31 days, whichever is smaller.
+Compaction will create larger blocks containing data spanning up to 10% of the retention time,
+or 31 days, whichever is smaller.
## Operational aspects
Prometheus has several flags that configure local storage. The most important are:
-* `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`.
-* `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. Overrides `storage.tsdb.retention` if this flag is set to anything other than default.
-* `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only the persistent blocks are deleted to honor this retention although WAL and m-mapped chunks are counted in the total size. So the minimum requirement for the disk is the peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` (m-mapped Head chunks) directory combined (peaks every 2 hours).
-* `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`.
-* `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL). Depending on your data, you can expect the WAL size to be halved with little extra cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0. Note that once enabled, downgrading Prometheus to a version below 2.11.0 will require deleting the WAL.
+- `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`.
+- `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`.
+ Overrides `storage.tsdb.retention` if this flag is set to anything other than default.
+- `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain.
+ The oldest data will be removed first. Defaults to `0` or disabled. Units supported:
+ B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only
+ the persistent blocks are deleted to honor this retention although WAL and m-mapped
+ chunks are counted in the total size. So the minimum requirement for the disk is the
+ peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head`
+ (m-mapped Head chunks) directory combined (peaks every 2 hours).
+- `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`.
+- `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL).
+ Depending on your data, you can expect the WAL size to be halved with little extra
+ cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0.
+ Note that once enabled, downgrading Prometheus to a version below 2.11.0 will
+ require deleting the WAL.
-Prometheus stores an average of only 1-2 bytes per sample. Thus, to plan the capacity of a Prometheus server, you can use the rough formula:
+Prometheus stores an average of only 1-2 bytes per sample. Thus, to plan the
+capacity of a Prometheus server, you can use the rough formula:
```
needed_disk_space = retention_time_seconds * ingested_samples_per_second * bytes_per_sample
```
-To lower the rate of ingested samples, you can either reduce the number of time series you scrape (fewer targets or fewer series per target), or you can increase the scrape interval. However, reducing the number of series is likely more effective, due to compression of samples within a series.
+To lower the rate of ingested samples, you can either reduce the number of
+time series you scrape (fewer targets or fewer series per target), or you
+can increase the scrape interval. However, reducing the number of series is
+likely more effective, due to compression of samples within a series.
If your local storage becomes corrupted for whatever reason, the best
strategy to address the problem is to shut down Prometheus then remove the
entire storage directory. You can also try removing individual block directories,
-or the WAL directory to resolve the problem. Note that this means losing
+or the WAL directory to resolve the problem. Note that this means losing
approximately two hours data per block directory. Again, Prometheus's local
storage is not intended to be durable long-term storage; external solutions
offer extended retention and data durability.
-CAUTION: Non-POSIX compliant filesystems are not supported for Prometheus' local storage as unrecoverable corruptions may happen. NFS filesystems (including AWS's EFS) are not supported. NFS could be POSIX-compliant, but most implementations are not. It is strongly recommended to use a local filesystem for reliability.
+CAUTION: Non-POSIX compliant filesystems are not supported for Prometheus'
+local storage as unrecoverable corruptions may happen. NFS filesystems
+(including AWS's EFS) are not supported. NFS could be POSIX-compliant,
+but most implementations are not. It is strongly recommended to use a
+local filesystem for reliability.
If both time and size retention policies are specified, whichever triggers first
will be used.
-Expired block cleanup happens in the background. It may take up to two hours to remove expired blocks. Blocks must be fully expired before they are removed.
+Expired block cleanup happens in the background. It may take up to two hours
+to remove expired blocks. Blocks must be fully expired before they are removed.
## Remote storage integrations
@@ -119,59 +142,101 @@ a set of interfaces that allow integrating with remote storage systems.
Prometheus integrates with remote storage systems in three ways:
-* Prometheus can write samples that it ingests to a remote URL in a standardized format.
-* Prometheus can receive samples from other Prometheus servers in a standardized format.
-* Prometheus can read (back) sample data from a remote URL in a standardized format.
+- Prometheus can write samples that it ingests to a remote URL in a standardized format.
+- Prometheus can receive samples from other Prometheus servers in a standardized format.
+- Prometheus can read (back) sample data from a remote URL in a standardized format.

-The read and write protocols both use a snappy-compressed protocol buffer encoding over HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC over HTTP/2 in the future, when all hops between Prometheus and the remote storage can safely be assumed to support HTTP/2.
+The read and write protocols both use a snappy-compressed protocol buffer encoding over
+HTTP. The protocols are not considered as stable APIs yet and may change to use gRPC
+over HTTP/2 in the future, when all hops between Prometheus and the remote storage can
+safely be assumed to support HTTP/2.
-For details on configuring remote storage integrations in Prometheus, see the [remote write](configuration/configuration.md#remote_write) and [remote read](configuration/configuration.md#remote_read) sections of the Prometheus configuration documentation.
+For details on configuring remote storage integrations in Prometheus, see the
+[remote write](configuration/configuration.md#remote_write) and
+[remote read](configuration/configuration.md#remote_read) sections of the Prometheus
+configuration documentation.
-The built-in remote write receiver can be enabled by setting the `--web.enable-remote-write-receiver` command line flag. When enabled, the remote write receiver endpoint is `/api/v1/write`.
+The built-in remote write receiver can be enabled by setting the
+`--web.enable-remote-write-receiver` command line flag. When enabled,
+the remote write receiver endpoint is `/api/v1/write`.
-For details on the request and response messages, see the [remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto).
+For details on the request and response messages, see the
+[remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto).
-Note that on the read path, Prometheus only fetches raw series data for a set of label selectors and time ranges from the remote end. All PromQL evaluation on the raw data still happens in Prometheus itself. This means that remote read queries have some scalability limit, since all necessary data needs to be loaded into the querying Prometheus server first and then processed there. However, supporting fully distributed evaluation of PromQL was deemed infeasible for the time being.
+Note that on the read path, Prometheus only fetches raw series data for a set of
+label selectors and time ranges from the remote end. All PromQL evaluation on the
+raw data still happens in Prometheus itself. This means that remote read queries
+have some scalability limit, since all necessary data needs to be loaded into the
+querying Prometheus server first and then processed there. However, supporting
+fully distributed evaluation of PromQL was deemed infeasible for the time being.
### Existing integrations
-To learn more about existing integrations with remote storage systems, see the [Integrations documentation](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
+To learn more about existing integrations with remote storage systems, see the
+[Integrations documentation](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
## Backfilling from OpenMetrics format
### Overview
-If a user wants to create blocks into the TSDB from data that is in [OpenMetrics](https://openmetrics.io/) format, they can do so using backfilling. However, they should be careful and note that it is not safe to backfill data from the last 3 hours (the current head block) as this time range may overlap with the current head block Prometheus is still mutating. Backfilling will create new TSDB blocks, each containing two hours of metrics data. This limits the memory requirements of block creation. Compacting the two hour blocks into larger blocks is later done by the Prometheus server itself.
+If a user wants to create blocks into the TSDB from data that is in
+[OpenMetrics](https://openmetrics.io/) format, they can do so using backfilling.
+However, they should be careful and note that it is not safe to backfill data
+from the last 3 hours (the current head block) as this time range may overlap
+with the current head block Prometheus is still mutating. Backfilling will
+create new TSDB blocks, each containing two hours of metrics data. This limits
+the memory requirements of block creation. Compacting the two hour blocks into
+larger blocks is later done by the Prometheus server itself.
-A typical use case is to migrate metrics data from a different monitoring system or time-series database to Prometheus. To do so, the user must first convert the source data into [OpenMetrics](https://openmetrics.io/) format, which is the input format for the backfilling as described below.
+A typical use case is to migrate metrics data from a different monitoring system
+or time-series database to Prometheus. To do so, the user must first convert the
+source data into [OpenMetrics](https://openmetrics.io/) format, which is the
+input format for the backfilling as described below.
### Usage
-Backfilling can be used via the Promtool command line. Promtool will write the blocks to a directory. By default this output directory is ./data/, you can change it by using the name of the desired output directory as an optional argument in the sub-command.
+Backfilling can be used via the Promtool command line. Promtool will write the blocks
+to a directory. By default this output directory is ./data/, you can change it by
+using the name of the desired output directory as an optional argument in the sub-command.
```
promtool tsdb create-blocks-from openmetrics [