Merge branch 'main' into sparsehistogram

This commit is contained in:
beorn7 2022-03-22 14:47:42 +01:00
commit 4210aac74a
246 changed files with 32930 additions and 39947 deletions

View file

@ -2,7 +2,7 @@
version: 2.1
orbs:
prometheus: prometheus/prometheus@0.15.0
prometheus: prometheus/prometheus@0.16.0
go: circleci/go@1.7.0
win: circleci/windows@2.3.0
@ -12,9 +12,9 @@ executors:
golang:
docker:
- image: quay.io/prometheus/golang-builder:1.17-base
golang_115:
golang_oldest:
docker:
- image: quay.io/prometheus/golang-builder:1.15-base
- image: quay.io/prometheus/golang-builder:1.16-base
jobs:
test_go:
@ -37,6 +37,7 @@ jobs:
GOMAXPROCS: "2"
GO111MODULE: "on"
- run: go test ./tsdb/ -test.tsdb-isolation=false
- run: make -C documentation/examples/remote_storage
- prometheus/check_proto:
version: "3.15.8"
- prometheus/store_artifact:
@ -81,18 +82,17 @@ jobs:
command: refreshenv
- run:
command: |
$env:GOARCH=""; $env:GOOS=""; cd web/ui; go generate
cd ../..
$TestTargets = go list ./... | Where-Object { $_ -NotMatch "(github.com/prometheus/prometheus/discovery.*|github.com/prometheus/prometheus/config|github.com/prometheus/prometheus/web)"}
go test $TestTargets -vet=off -v
environment:
GOGC: "20"
GOOPTS: "-p 2"
test_tsdb_go115:
executor: golang_115
test_golang_oldest:
executor: golang_oldest
steps:
- checkout
- run: make build
- run: go test ./tsdb/...
- run: go test ./tsdb/ -test.tsdb-isolation=false
@ -114,7 +114,6 @@ jobs:
steps:
- checkout
- run: ./scripts/sync_repo_files.sh
- run: ./scripts/sync_codemirror.sh
workflows:
version: 2
@ -128,7 +127,7 @@ workflows:
filters:
tags:
only: /.*/
- test_tsdb_go115:
- test_golang_oldest:
filters:
tags:
only: /.*/
@ -179,10 +178,10 @@ workflows:
branches:
ignore: /.*/
image: circleci/golang:1-node
nightly:
daily:
triggers:
- schedule:
cron: "0 0 * * *"
cron: "49 19 * * *"
filters:
branches:
only:

View file

@ -3,17 +3,16 @@ updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "weekly"
interval: "monthly"
- package-ecosystem: "npm"
directory: "/web/ui"
open-pull-requests-limit: 0
schedule:
interval: "weekly"
interval: "monthly"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
interval: "monthly"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
interval: "monthly"

20
.github/workflows/buf-lint.yml vendored Normal file
View file

@ -0,0 +1,20 @@
name: buf.build
on:
pull_request:
paths:
- ".github/workflows/buf-lint.yml"
- "**.proto"
jobs:
buf:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: bufbuild/buf-setup-action@v1.1.0
- uses: bufbuild/buf-lint-action@v1
with:
input: 'prompb'
- uses: bufbuild/buf-breaking-action@v1
with:
input: 'prompb'
against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD,subdir=prompb'

23
.github/workflows/buf.yml vendored Normal file
View file

@ -0,0 +1,23 @@
name: buf.build
on:
push:
branches:
- main
jobs:
buf:
name: lint and publish
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: bufbuild/buf-setup-action@v1.1.0
- uses: bufbuild/buf-lint-action@v1
with:
input: 'prompb'
- uses: bufbuild/buf-breaking-action@v1
with:
input: 'prompb'
against: 'https://github.com/prometheus/prometheus.git#branch=main,ref=HEAD~1,subdir=prompb'
- uses: bufbuild/buf-push-action@v1
with:
input: 'prompb'
buf_token: ${{ secrets.BUF_TOKEN }}

View file

@ -35,7 +35,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL

View file

@ -1,10 +1,6 @@
name: CIFuzz
on:
pull_request:
paths:
- "go.sum"
- "go.mod"
- "**.go"
jobs:
Fuzzing:
runs-on: ubuntu-latest
@ -22,7 +18,7 @@ jobs:
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@v2.3.0
uses: actions/upload-artifact@v2.3.1
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts

View file

@ -9,13 +9,6 @@ on:
- ".github/workflows/golangci-lint.yml"
- ".golangci.yml"
pull_request:
paths:
- "go.sum"
- "go.mod"
- "**.go"
- "scripts/errcheck_excludes.txt"
- ".github/workflows/golangci-lint.yml"
- ".golangci.yml"
jobs:
golangci:
@ -23,9 +16,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Lint
uses: golangci/golangci-lint-action@v2
uses: actions/checkout@v3
- name: install Go
uses: actions/setup-go@v2
with:
version: v1.42.0
go-version: 1.17.x
- name: Lint
uses: golangci/golangci-lint-action@v3.1.0
with:
version: v1.44.2

1
.gitignore vendored
View file

@ -23,7 +23,6 @@ benchmark.txt
npm_licenses.tar.bz2
/web/ui/static/react
/web/ui/assets_vfsdata.go
/vendor
/.build

View file

@ -13,6 +13,7 @@ linters:
- gofumpt
- goimports
- revive
- misspell
issues:
exclude-rules:
@ -24,13 +25,11 @@ linters-settings:
depguard:
list-type: blacklist
include-go-root: true
packages:
- sync/atomic
- github.com/stretchr/testify/assert
packages-with-error-message:
- sync/atomic: "Use go.uber.org/atomic instead of sync/atomic"
- github.com/stretchr/testify/assert: "Use github.com/stretchr/testify/require instead of github.com/stretchr/testify/assert"
- github.com/go-kit/kit/log: "Use github.com/go-kit/log instead of github.com/go-kit/kit/log"
- regexp: "Use github.com/grafana/regexp instead of regexp"
errcheck:
exclude: scripts/errcheck_excludes.txt
goimports:

View file

@ -1,3 +1,75 @@
## 2.34.0 / 2022-03-15
* [CHANGE] UI: Classic UI removed. #10208
* [CHANGE] Tracing: Migrate from Jaeger to OpenTelemetry based tracing. #9724, #10203, #10276
* [ENHANCEMENT] TSDB: Disable the chunk write queue by default and allow configuration with the experimental flag `--storage.tsdb.head-chunks-write-queue-size`. #10425
* [ENHANCEMENT] HTTP SD: Add a failure counter. #10372
* [ENHANCEMENT] Azure SD: Set Prometheus User-Agent on requests. #10209
* [ENHANCEMENT] Uyuni SD: Reduce the number of logins to Uyuni. #10072
* [ENHANCEMENT] Scrape: Log when an invalid media type is encountered during a scrape. #10186
* [ENHANCEMENT] Scrape: Accept application/openmetrics-text;version=1.0.0 in addition to version=0.0.1. #9431
* [ENHANCEMENT] Remote-read: Add an option to not use external labels as selectors for remote read. #10254
* [ENHANCEMENT] UI: Optimize the alerts page and add a search bar. #10142
* [ENHANCEMENT] UI: Improve graph colors that were hard to see. #10179
* [ENHANCEMENT] Config: Allow escaping of `$` with `$$` when using environment variables with external labels. #10129
* [BUGFIX] PromQL: Properly return an error from histogram_quantile when metrics have the same labelset. #10140
* [BUGFIX] UI: Fix bug that sets the range input to the resolution. #10227
* [BUGFIX] TSDB: Fix a query panic when `memory-snapshot-on-shutdown` is enabled. #10348
* [BUGFIX] Parser: Specify type in metadata parser errors. #10269
* [BUGFIX] Scrape: Fix label limit changes not applying. #10370
## 2.33.5 / 2022-03-08
The binaries published with this release are built with Go1.17.8 to avoid [CVE-2022-24921](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-24921).
* [BUGFIX] Remote-write: Fix deadlock between adding to queue and getting batch. #10395
## 2.33.4 / 2022-02-22
* [BUGFIX] TSDB: Fix panic when m-mapping head chunks onto the disk. #10316
## 2.33.3 / 2022-02-11
* [BUGFIX] Azure SD: Fix a regression when public IP Address isn't set. #10289
## 2.33.2 / 2022-02-11
* [BUGFIX] Azure SD: Fix panic when public IP Address isn't set. #10280
* [BUGFIX] Remote-write: Fix deadlock when stopping a shard. #10279
## 2.33.1 / 2022-02-02
* [BUGFIX] SD: Fix _no such file or directory_ in K8s SD when not running inside K8s. #10235
## 2.33.0 / 2022-01-29
* [CHANGE] PromQL: Promote negative offset and `@` modifer to stable features. #10121
* [CHANGE] Web: Promote remote-write-receiver to stable. #10119
* [FEATURE] Config: Add `stripPort` template function. #10002
* [FEATURE] Promtool: Add cardinality analysis to `check metrics`, enabled by flag `--extended`. #10045
* [FEATURE] SD: Enable target discovery in own K8s namespace. #9881
* [FEATURE] SD: Add provider ID label in K8s SD. #9603
* [FEATURE] Web: Add limit field to the rules API. #10152
* [ENHANCEMENT] Remote-write: Avoid allocations by buffering concrete structs instead of interfaces. #9934
* [ENHANCEMENT] Remote-write: Log time series details for out-of-order samples in remote write receiver. #9894
* [ENHANCEMENT] Remote-write: Shard up more when backlogged. #9274
* [ENHANCEMENT] TSDB: Use simpler map key to improve exemplar ingest performance. #10111
* [ENHANCEMENT] TSDB: Avoid allocations when popping from the intersected postings heap. #10092
* [ENHANCEMENT] TSDB: Make chunk writing non-blocking, avoiding latency spikes in remote-write. #10051
* [ENHANCEMENT] TSDB: Improve label matching performance. #9907
* [ENHANCEMENT] UI: Optimize the service discovery page and add a search bar. #10131
* [ENHANCEMENT] UI: Optimize the target page and add a search bar. #10103
* [BUGFIX] Promtool: Make exit codes more consistent. #9861
* [BUGFIX] Promtool: Fix flakiness of rule testing. #8818
* [BUGFIX] Remote-write: Update `prometheus_remote_storage_queue_highest_sent_timestamp_seconds` metric when write irrecoverably fails. #10102
* [BUGFIX] Storage: Avoid panic in `BufferedSeriesIterator`. #9945
* [BUGFIX] TSDB: CompactBlockMetas should produce correct mint/maxt for overlapping blocks. #10108
* [BUGFIX] TSDB: Fix logging of exemplar storage size. #9938
* [BUGFIX] UI: Fix overlapping click targets for the alert state checkboxes. #10136
* [BUGFIX] UI: Fix _Unhealthy_ filter on target page to actually display only _Unhealthy_ targets. #10103
* [BUGFIX] UI: Fix autocompletion when expression is empty. #10053
* [BUGFIX] TSDB: Fix deadlock from simultaneous GC and write. #10166
## 2.32.1 / 2021-12-17
* [BUGFIX] Scrape: Fix reporting metrics when sample limit is reached during the report. #9996
@ -13,7 +85,7 @@ Enable with `--enable-feature=agent`.
Learn more about the Prometheus Agent in our [blog post](https://prometheus.io/blog/2021/11/16/agent/).
* [CHANGE] remote-write: Change default max retry time from 100ms to 5 seconds. #9634
* [CHANGE] Remote-write: Change default max retry time from 100ms to 5 seconds. #9634
* [FEATURE] Agent: New mode of operation optimized for remote-write only scenarios, without local storage. Enable with `--enable-feature=agent`. #8785 #9851 #9664 #9939 #9941 #9943
* [FEATURE] Promtool: Add `promtool check service-discovery` command. #8970
* [FEATURE] UI: Add search in metrics dropdown. #9629
@ -36,6 +108,10 @@ Learn more about the Prometheus Agent in our [blog post](https://prometheus.io/b
* [BUGFIX] Uyuni SD: Fix null pointer exception during initialization. #9924 #9950
* [BUGFIX] TSDB: Fix queries after a failed snapshot replay. #9980
## 2.31.2 / 2021-12-09
* [BUGFIX] TSDB: Fix queries after a failed snapshot replay. #9980
## 2.31.1 / 2021-11-05
* [BUGFIX] SD: Fix a panic when the experimental discovery manager receives
@ -66,6 +142,10 @@ Learn more about the Prometheus Agent in our [blog post](https://prometheus.io/b
* [BUGFIX] TSDB: Fix memory leak in samples deletion. #9151
* [BUGFIX] UI: Use consistent margin-bottom for all alert kinds. #9318
## 2.30.4 / 2021-12-09
* [BUGFIX] TSDB: Fix queries after a failed snapshot replay. #9980
## 2.30.3 / 2021-10-05
* [BUGFIX] TSDB: Fix panic on failed snapshot replay. #9438

View file

@ -43,7 +43,7 @@ ui-build-module:
.PHONY: ui-test
ui-test:
cd $(UI_PATH) && npm run test:coverage
cd $(UI_PATH) && CI=true npm run test:coverage
.PHONY: ui-lint
ui-lint:
@ -51,12 +51,11 @@ ui-lint:
.PHONY: assets
assets: ui-install ui-build
@echo ">> writing assets"
# Un-setting GOOS and GOARCH here because the generated Go code is always the same,
# but the cached object code is incompatible between architectures and OSes (which
# breaks cross-building for different combinations on CI in the same container).
cd $(UI_PATH) && GO111MODULE=$(GO111MODULE) GOOS= GOARCH= $(GO) generate -x -v $(GOOPTS)
@$(GOFMT) -w ./$(UI_PATH)
.PHONY: assets-compress
assets-compress:
@echo '>> compressing assets'
scripts/compress_assets.sh
.PHONY: test
# If we only want to only test go code we have to change the test target
@ -80,7 +79,7 @@ tarball: npm_licenses common-tarball
docker: npm_licenses common-docker
.PHONY: build
build: assets common-build
build: assets assets-compress common-build
.PHONY: bench_tsdb
bench_tsdb: $(PROMU)

View file

@ -83,7 +83,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v1.42.0
GOLANGCI_LINT_VERSION ?= v1.44.2
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))

View file

@ -56,7 +56,7 @@ Prometheus will now be reachable at http://localhost:9090/.
### Building from source
To build Prometheus from source code, You need:
* Go [version 1.14 or greater](https://golang.org/doc/install).
* Go [version 1.16 or greater](https://golang.org/doc/install).
* NodeJS [version 16 or greater](https://nodejs.org/).
* npm [version 7 or greater](https://www.npmjs.com/).

View file

@ -37,7 +37,9 @@ Release cadence of first pre-releases being cut is 6 weeks.
| v2.30 | 2021-09-08 | Ganesh Vernekar (GitHub: @codesome) |
| v2.31 | 2021-10-20 | Julien Pivotto (GitHub: @roidelapluie) |
| v2.32 | 2021-12-01 | Julius Volz (GitHub: @juliusv) |
| v2.33 | 2022-01-12 | **searching for volunteer** |
| v2.33 | 2022-01-12 | Björn Rabenstein (GitHub: @beorn7) |
| v2.34 | 2022-02-23 | Chris Marchbanks (GitHub: @csmarchbanks) |
| v2.35 | 2022-04-06 | **searching for volunteer** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
@ -70,7 +72,7 @@ If a bug fix got accidentally merged into main after non-bug-fix changes in main
Maintaining the release branches for older minor releases happens on a best effort basis.
### 0. Updating dependencies
### 0. Updating dependencies and promoting/demoting experimental features
A few days before a major or minor release, consider updating the dependencies.
@ -85,6 +87,10 @@ you can skip the dependency update or only update select dependencies. In such a
case, you have to create an issue or pull request in the GitHub project for
later follow-up.
This is also a good time to consider any experimental features and feature
flags for promotion to stable or for deprecation or ultimately removal. Do any
of these in pull requests, one per feature.
#### Updating Go dependencies
```
@ -155,3 +161,5 @@ For release candidate versions (`v2.16.0-rc.0`), run the benchmark for 3 days us
If the release has happened in the latest release branch, merge the changes into main.
Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration.
Finally, in case there is no release shepherd listed for the next release yet, find a volunteer.

View file

@ -1 +1 @@
2.32.1
2.34.0

View file

@ -17,7 +17,6 @@ package main
import (
"context"
"fmt"
"io"
"math"
"math/bits"
"net"
@ -27,7 +26,6 @@ import (
"os"
"os/signal"
"path/filepath"
"regexp"
"runtime"
"strings"
"sync"
@ -37,9 +35,9 @@ import (
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
conntrack "github.com/mwitkow/go-conntrack"
"github.com/oklog/run"
"github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
@ -48,8 +46,6 @@ import (
"github.com/prometheus/common/version"
toolkit_web "github.com/prometheus/exporter-toolkit/web"
toolkit_webflag "github.com/prometheus/exporter-toolkit/web/kingpinflag"
jcfg "github.com/uber/jaeger-client-go/config"
jprom "github.com/uber/jaeger-lib/metrics/prometheus"
"go.uber.org/atomic"
kingpin "gopkg.in/alecthomas/kingpin.v2"
klog "k8s.io/klog"
@ -70,6 +66,7 @@ import (
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/tracing"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/tsdb/agent"
"github.com/prometheus/prometheus/util/logging"
@ -150,10 +147,9 @@ type flagConfig struct {
featureList []string
// These options are extracted from featureList
// for ease of use.
enablePromQLAtModifier bool
enablePromQLNegativeOffset bool
enableExpandExternalLabels bool
enableNewSDManager bool
enablePerStepStats bool
prometheusURL string
corsRegexString string
@ -167,15 +163,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
case "promql-at-modifier":
c.enablePromQLAtModifier = true
level.Info(logger).Log("msg", "Experimental promql-at-modifier enabled")
case "promql-negative-offset":
c.enablePromQLNegativeOffset = true
level.Info(logger).Log("msg", "Experimental promql-negative-offset enabled")
case "remote-write-receiver":
c.web.RemoteWriteReceiver = true
level.Info(logger).Log("msg", "Experimental remote-write-receiver enabled")
c.web.EnableRemoteWriteReceiver = true
level.Warn(logger).Log("msg", "Remote write receiver enabled via feature flag remote-write-receiver. This is DEPRECATED. Use --web.enable-remote-write-receiver.")
case "expand-external-labels":
c.enableExpandExternalLabels = true
level.Info(logger).Log("msg", "Experimental expand-external-labels enabled")
@ -194,8 +184,13 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
case "agent":
agentMode = true
level.Info(logger).Log("msg", "Experimental agent mode enabled.")
case "promql-per-step-stats":
c.enablePerStepStats = true
level.Info(logger).Log("msg", "Experimental per-step statistics reporting")
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
level.Warn(logger).Log("msg", "This option for --enable-feature is now permanently enabled and therefore a no-op.", "option", o)
default:
level.Warn(logger).Log("msg", "Unknown option for --enable-feature", "option", o)
}
@ -264,6 +259,9 @@ func main() {
a.Flag("web.enable-admin-api", "Enable API endpoints for admin control actions.").
Default("false").BoolVar(&cfg.web.EnableAdminAPI)
a.Flag("web.enable-remote-write-receiver", "Enable API endpoint accepting remote write requests.").
Default("false").BoolVar(&cfg.web.EnableRemoteWriteReceiver)
a.Flag("web.console.templates", "Path to the console template directory, available at /consoles.").
Default("consoles").StringVar(&cfg.web.ConsoleTemplatesPath)
@ -300,7 +298,7 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+". Units Supported: y, w, d, h, m, s, ms.").
SetValue(&newFlagRetentionDuration)
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\".").
serverOnlyFlag(a, "storage.tsdb.retention.size", "Maximum number of bytes that can be stored for blocks. A unit is required, supported units: B, KB, MB, GB, TB, PB, EB. Ex: \"512MB\". Based on powers-of-2, so 1KB is 1024B.").
BytesVar(&cfg.tsdb.MaxBytes)
serverOnlyFlag(a, "storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
@ -312,6 +310,9 @@ func main() {
serverOnlyFlag(a, "storage.tsdb.wal-compression", "Compress the tsdb WAL.").
Hidden().Default("true").BoolVar(&cfg.tsdb.WALCompression)
serverOnlyFlag(a, "storage.tsdb.head-chunks-write-queue-size", "Size of the queue through which head chunks are written to the disk to be m-mapped, 0 disables the queue completely. Experimental.").
Default("0").IntVar(&cfg.tsdb.HeadChunksWriteQueueSize)
agentOnlyFlag(a, "storage.agent.path", "Base path for metrics storage.").
Default("data-agent/").StringVar(&cfg.agentStoragePath)
@ -382,7 +383,7 @@ func main() {
serverOnlyFlag(a, "query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return.").
Default("50000000").IntVar(&cfg.queryMaxSamples)
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, remote-write-receiver, extra-scrape-metrics, new-service-discovery-manager. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-at-modifier, promql-negative-offset, promql-per-step-stats, remote-write-receiver (DEPRECATED), extra-scrape-metrics, new-service-discovery-manager. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
Default("").StringsVar(&cfg.featureList)
promlogflag.AddFlags(a, &cfg.promlogConfig)
@ -435,7 +436,11 @@ func main() {
// Throw error for invalid config before starting other components.
var cfgFile *config.Config
if cfgFile, err = config.LoadFile(cfg.configFile, agentMode, false, log.NewNopLogger()); err != nil {
level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "err", err)
absPath, pathErr := filepath.Abs(cfg.configFile)
if pathErr != nil {
absPath = cfg.configFile
}
level.Error(logger).Log("msg", fmt.Sprintf("Error loading config (--config.file=%s)", cfg.configFile), "file", absPath, "err", err)
os.Exit(2)
}
if cfg.tsdb.EnableExemplarStorage {
@ -554,6 +559,7 @@ func main() {
var (
scrapeManager = scrape.NewManager(&cfg.scrape, log.With(logger, "component", "scrape manager"), fanoutStorage)
tracingManager = tracing.NewManager(logger)
queryEngine *promql.Engine
ruleManager *rules.Manager
@ -568,8 +574,11 @@ func main() {
ActiveQueryTracker: promql.NewActiveQueryTracker(localStoragePath, cfg.queryConcurrency, log.With(logger, "component", "activeQueryTracker")),
LookbackDelta: time.Duration(cfg.lookbackDelta),
NoStepSubqueryIntervalFn: noStepSubqueryInterval.Get,
EnableAtModifier: cfg.enablePromQLAtModifier,
EnableNegativeOffset: cfg.enablePromQLNegativeOffset,
// EnableAtModifier and EnableNegativeOffset have to be
// always on for regular PromQL as of Prometheus v2.33.
EnableAtModifier: true,
EnableNegativeOffset: true,
EnablePerStepStats: cfg.enablePerStepStats,
}
queryEngine = promql.NewEngine(opts)
@ -718,6 +727,9 @@ func main() {
externalURL,
)
},
}, {
name: "tracing",
reloader: tracingManager.ApplyConfig,
},
}
@ -744,13 +756,6 @@ func main() {
})
}
closer, err := initTracing(logger)
if err != nil {
level.Error(logger).Log("msg", "Unable to init tracing", "err", err)
os.Exit(2)
}
defer closer.Close()
listener, err := webHandler.Listener()
if err != nil {
level.Error(logger).Log("msg", "Unable to start web listener", "err", err)
@ -838,6 +843,19 @@ func main() {
},
)
}
{
// Tracing manager.
g.Add(
func() error {
<-reloadReady.C
tracingManager.Run()
return nil
},
func(err error) {
tracingManager.Stop()
},
)
}
{
// Reload handler.
@ -1479,6 +1497,7 @@ type tsdbOptions struct {
NoLockfile bool
AllowOverlappingBlocks bool
WALCompression bool
HeadChunksWriteQueueSize int
StripeSize int
MinBlockDuration model.Duration
MaxBlockDuration model.Duration
@ -1496,6 +1515,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
NoLockfile: opts.NoLockfile,
AllowOverlappingBlocks: opts.AllowOverlappingBlocks,
WALCompression: opts.WALCompression,
HeadChunksWriteQueueSize: opts.HeadChunksWriteQueueSize,
StripeSize: opts.StripeSize,
MinBlockDuration: int64(time.Duration(opts.MinBlockDuration) / time.Millisecond),
MaxBlockDuration: int64(time.Duration(opts.MaxBlockDuration) / time.Millisecond),
@ -1528,47 +1548,6 @@ func (opts agentOptions) ToAgentOptions() agent.Options {
}
}
func initTracing(logger log.Logger) (io.Closer, error) {
// Set tracing configuration defaults.
cfg := &jcfg.Configuration{
ServiceName: "prometheus",
Disabled: true,
}
// Available options can be seen here:
// https://github.com/jaegertracing/jaeger-client-go#environment-variables
cfg, err := cfg.FromEnv()
if err != nil {
return nil, errors.Wrap(err, "unable to get tracing config from environment")
}
jLogger := jaegerLogger{logger: log.With(logger, "component", "tracing")}
tracer, closer, err := cfg.NewTracer(
jcfg.Logger(jLogger),
jcfg.Metrics(jprom.New()),
)
if err != nil {
return nil, errors.Wrap(err, "unable to init tracing")
}
opentracing.SetGlobalTracer(tracer)
return closer, nil
}
type jaegerLogger struct {
logger log.Logger
}
func (l jaegerLogger) Error(msg string) {
level.Error(l.logger).Log("msg", msg)
}
func (l jaegerLogger) Infof(msg string, args ...interface{}) {
keyvals := []interface{}{"msg", fmt.Sprintf(msg, args...)}
level.Info(l.logger).Log(keyvals...)
}
// discoveryManager interfaces the discovery manager. This is used to keep using
// the manager that restarts SD's on reload for a few releases until we feel
// the new manager can be enabled for all users.

View file

@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"math"
"net/http"
@ -27,6 +28,7 @@ import (
"sort"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/go-kit/log"
@ -43,6 +45,9 @@ import (
"gopkg.in/alecthomas/kingpin.v2"
yaml "gopkg.in/yaml.v2"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/file"
@ -56,6 +61,13 @@ import (
"github.com/prometheus/prometheus/scrape"
)
const (
successExitCode = 0
failureExitCode = 1
// Exit code 3 is used for "one or more lint issues detected".
lintErrExitCode = 3
)
func main() {
app := kingpin.New(filepath.Base(os.Args[0]), "Tooling for the Prometheus monitoring system.").UsageWriter(os.Stdout)
app.Version(version.Print("promtool"))
@ -88,6 +100,7 @@ func main() {
).Required().ExistingFiles()
checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()
queryCmd := app.Command("query", "Run query against a Prometheus server.")
@ -190,17 +203,14 @@ func main() {
p = &promqlPrinter{}
}
var queryOpts promql.LazyLoaderOpts
for _, f := range *featureList {
opts := strings.Split(f, ",")
for _, o := range opts {
switch o {
case "promql-at-modifier":
queryOpts.EnableAtModifier = true
case "promql-negative-offset":
queryOpts.EnableNegativeOffset = true
case "":
continue
case "promql-at-modifier", "promql-negative-offset":
fmt.Printf(" WARNING: Option for --enable-feature is a no-op after promotion to a stable feature: %q\n", o)
default:
fmt.Printf(" WARNING: Unknown option for --enable-feature: %q\n", o)
}
@ -221,7 +231,7 @@ func main() {
os.Exit(CheckRules(*ruleFiles...))
case checkMetricsCmd.FullCommand():
os.Exit(CheckMetrics())
os.Exit(CheckMetrics(*checkMetricsExtended))
case queryInstantCmd.FullCommand():
os.Exit(QueryInstant(*queryInstantServer, *queryInstantExpr, *queryInstantTime, p))
@ -245,7 +255,13 @@ func main() {
os.Exit(QueryLabels(*queryLabelsServer, *queryLabelsName, *queryLabelsBegin, *queryLabelsEnd, p))
case testRulesCmd.FullCommand():
os.Exit(RulesUnitTest(queryOpts, *testRulesFiles...))
os.Exit(RulesUnitTest(
promql.LazyLoaderOpts{
EnableAtModifier: true,
EnableNegativeOffset: true,
},
*testRulesFiles...),
)
case tsdbBenchWriteCmd.FullCommand():
os.Exit(checkErr(benchmarkWrite(*benchWriteOutPath, *benchSamplesFile, *benchWriteNumMetrics, *benchWriteNumScrapes)))
@ -298,9 +314,9 @@ func CheckConfig(agentMode, checkSyntaxOnly bool, files ...string) int {
}
}
if failed {
return 1
return failureExitCode
}
return 0
return successExitCode
}
// CheckWebConfig validates web configuration files.
@ -316,9 +332,9 @@ func CheckWebConfig(files ...string) int {
fmt.Fprintln(os.Stderr, f, "SUCCESS")
}
if failed {
return 1
return failureExitCode
}
return 0
return successExitCode
}
func checkFileExists(fn string) error {
@ -521,9 +537,9 @@ func CheckRules(files ...string) int {
fmt.Println()
}
if failed {
return 1
return failureExitCode
}
return 0
return successExitCode
}
func checkRules(filename string) (int, []error) {
@ -531,7 +547,7 @@ func checkRules(filename string) (int, []error) {
rgs, errs := rulefmt.ParseFile(filename)
if errs != nil {
return 0, errs
return successExitCode, errs
}
numRules := 0
@ -622,12 +638,14 @@ $ curl -s http://localhost:9090/metrics | promtool check metrics
`)
// CheckMetrics performs a linting pass on input metrics.
func CheckMetrics() int {
l := promlint.New(os.Stdin)
func CheckMetrics(extended bool) int {
var buf bytes.Buffer
tee := io.TeeReader(os.Stdin, &buf)
l := promlint.New(tee)
problems, err := l.Lint()
if err != nil {
fmt.Fprintln(os.Stderr, "error while linting:", err)
return 1
return failureExitCode
}
for _, p := range problems {
@ -635,10 +653,71 @@ func CheckMetrics() int {
}
if len(problems) > 0 {
return 3
return lintErrExitCode
}
return 0
if extended {
stats, total, err := checkMetricsExtended(&buf)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return failureExitCode
}
w := tabwriter.NewWriter(os.Stdout, 4, 4, 4, ' ', tabwriter.TabIndent)
fmt.Fprintf(w, "Metric\tCardinality\tPercentage\t\n")
for _, stat := range stats {
fmt.Fprintf(w, "%s\t%d\t%.2f%%\t\n", stat.name, stat.cardinality, stat.percentage*100)
}
fmt.Fprintf(w, "Total\t%d\t%.f%%\t\n", total, 100.)
w.Flush()
}
return successExitCode
}
type metricStat struct {
name string
cardinality int
percentage float64
}
func checkMetricsExtended(r io.Reader) ([]metricStat, int, error) {
p := expfmt.TextParser{}
metricFamilies, err := p.TextToMetricFamilies(r)
if err != nil {
return nil, 0, fmt.Errorf("error while parsing text to metric families: %w", err)
}
var total int
stats := make([]metricStat, 0, len(metricFamilies))
for _, mf := range metricFamilies {
var cardinality int
switch mf.GetType() {
case dto.MetricType_COUNTER, dto.MetricType_GAUGE, dto.MetricType_UNTYPED:
cardinality = len(mf.Metric)
case dto.MetricType_HISTOGRAM:
// Histogram metrics includes sum, count, buckets.
buckets := len(mf.Metric[0].Histogram.Bucket)
cardinality = len(mf.Metric) * (2 + buckets)
case dto.MetricType_SUMMARY:
// Summary metrics includes sum, count, quantiles.
quantiles := len(mf.Metric[0].Summary.Quantile)
cardinality = len(mf.Metric) * (2 + quantiles)
default:
cardinality = len(mf.Metric)
}
stats = append(stats, metricStat{name: mf.GetName(), cardinality: cardinality})
total += cardinality
}
for i := range stats {
stats[i].percentage = float64(stats[i].cardinality) / float64(total)
}
sort.SliceStable(stats, func(i, j int) bool {
return stats[i].cardinality > stats[j].cardinality
})
return stats, total, nil
}
// QueryInstant performs an instant query against a Prometheus server.
@ -654,7 +733,7 @@ func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return 1
return failureExitCode
}
eTime := time.Now()
@ -662,7 +741,7 @@ func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
eTime, err = parseTime(evalTime)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing evaluation time:", err)
return 1
return failureExitCode
}
}
@ -678,7 +757,7 @@ func QueryInstant(url *url.URL, query, evalTime string, p printer) int {
p.printValue(val)
return 0
return successExitCode
}
// QueryRange performs a range query against a Prometheus server.
@ -703,7 +782,7 @@ func QueryRange(url *url.URL, headers map[string]string, query, start, end strin
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return 1
return failureExitCode
}
var stime, etime time.Time
@ -714,7 +793,7 @@ func QueryRange(url *url.URL, headers map[string]string, query, start, end strin
etime, err = parseTime(end)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing end time:", err)
return 1
return failureExitCode
}
}
@ -724,13 +803,13 @@ func QueryRange(url *url.URL, headers map[string]string, query, start, end strin
stime, err = parseTime(start)
if err != nil {
fmt.Fprintln(os.Stderr, "error parsing start time:", err)
return 1
return failureExitCode
}
}
if !stime.Before(etime) {
fmt.Fprintln(os.Stderr, "start time is not before end time")
return 1
return failureExitCode
}
if step == 0 {
@ -751,7 +830,7 @@ func QueryRange(url *url.URL, headers map[string]string, query, start, end strin
}
p.printValue(val)
return 0
return successExitCode
}
// QuerySeries queries for a series against a Prometheus server.
@ -767,13 +846,13 @@ func QuerySeries(url *url.URL, matchers []string, start, end string, p printer)
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return 1
return failureExitCode
}
stime, etime, err := parseStartTimeAndEndTime(start, end)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return 1
return failureExitCode
}
// Run query against client.
@ -787,7 +866,7 @@ func QuerySeries(url *url.URL, matchers []string, start, end string, p printer)
}
p.printSeries(val)
return 0
return successExitCode
}
// QueryLabels queries for label values against a Prometheus server.
@ -803,13 +882,13 @@ func QueryLabels(url *url.URL, name, start, end string, p printer) int {
c, err := api.NewClient(config)
if err != nil {
fmt.Fprintln(os.Stderr, "error creating API client:", err)
return 1
return failureExitCode
}
stime, etime, err := parseStartTimeAndEndTime(start, end)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return 1
return failureExitCode
}
// Run query against client.
@ -826,7 +905,7 @@ func QueryLabels(url *url.URL, name, start, end string, p printer) int {
}
p.printLabelValues(val)
return 0
return successExitCode
}
func handleAPIError(err error) int {
@ -837,7 +916,7 @@ func handleAPIError(err error) int {
fmt.Fprintln(os.Stderr, "query error:", err)
}
return 1
return failureExitCode
}
func parseStartTimeAndEndTime(start, end string) (time.Time, time.Time, error) {
@ -929,9 +1008,9 @@ func debugPprof(url string) int {
endPointGroups: pprofEndpoints,
}); err != nil {
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
return 1
return failureExitCode
}
return 0
return successExitCode
}
func debugMetrics(url string) int {
@ -941,9 +1020,9 @@ func debugMetrics(url string) int {
endPointGroups: metricsEndpoints,
}); err != nil {
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
return 1
return failureExitCode
}
return 0
return successExitCode
}
func debugAll(url string) int {
@ -953,9 +1032,9 @@ func debugAll(url string) int {
endPointGroups: allEndpoints,
}); err != nil {
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
return 1
return failureExitCode
}
return 0
return successExitCode
}
type printer interface {

View file

@ -18,6 +18,7 @@ import (
"net/http"
"net/http/httptest"
"net/url"
"os"
"runtime"
"strings"
"testing"
@ -322,3 +323,39 @@ func TestAuthorizationConfig(t *testing.T) {
})
}
}
func TestCheckMetricsExtended(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("Skipping on windows")
}
f, err := os.Open("testdata/metrics-test.prom")
require.NoError(t, err)
defer f.Close()
stats, total, err := checkMetricsExtended(f)
require.NoError(t, err)
require.Equal(t, 27, total)
require.Equal(t, []metricStat{
{
name: "prometheus_tsdb_compaction_chunk_size_bytes",
cardinality: 15,
percentage: float64(15) / float64(27),
},
{
name: "go_gc_duration_seconds",
cardinality: 7,
percentage: float64(7) / float64(27),
},
{
name: "net_conntrack_dialer_conn_attempted_total",
cardinality: 4,
percentage: float64(4) / float64(27),
},
{
name: "go_info",
cardinality: 1,
percentage: float64(1) / float64(27),
},
}, stats)
}

View file

@ -95,7 +95,8 @@ func (importer *ruleImporter) importAll(ctx context.Context) (errs []error) {
// importRule queries a prometheus API to evaluate rules at times in the past.
func (importer *ruleImporter) importRule(ctx context.Context, ruleExpr, ruleName string, ruleLabels labels.Labels, start, end time.Time,
maxBlockDuration int64, grp *rules.Group) (err error) {
maxBlockDuration int64, grp *rules.Group,
) (err error) {
blockDuration := getCompatibleBlockDuration(maxBlockDuration)
startInMs := start.Unix() * int64(time.Second/time.Millisecond)
endInMs := end.Unix() * int64(time.Second/time.Millisecond)

View file

@ -43,7 +43,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration) int {
cfg, err := config.LoadFile(sdConfigFiles, false, false, logger)
if err != nil {
fmt.Fprintln(os.Stderr, "Cannot load config", err)
return 2
return failureExitCode
}
var scrapeConfig *config.ScrapeConfig
@ -63,7 +63,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration) int {
for _, job := range jobs {
fmt.Fprintf(os.Stderr, "\t%s\n", job)
}
return 1
return failureExitCode
}
targetGroupChan := make(chan []*targetgroup.Group)
@ -74,7 +74,7 @@ func CheckSD(sdConfigFiles, sdJobName string, sdTimeout time.Duration) int {
d, err := cfg.NewDiscoverer(discovery.DiscovererOptions{Logger: logger})
if err != nil {
fmt.Fprintln(os.Stderr, "Could not create new discoverer", err)
return 2
return failureExitCode
}
go d.Run(ctx, targetGroupChan)
}
@ -100,11 +100,11 @@ outerLoop:
res, err := json.MarshalIndent(results, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "Could not marshal result json: %s", err)
return 2
return failureExitCode
}
fmt.Printf("%s", res)
return 0
return successExitCode
}
func getSDCheckResult(targetGroups []*targetgroup.Group, scrapeConfig *config.ScrapeConfig) []sdCheckResult {

34
cmd/promtool/testdata/long-period.yml vendored Normal file
View file

@ -0,0 +1,34 @@
# Evaluate once every 100d to avoid this taking too long.
evaluation_interval: 100d
rule_files:
- rules.yml
tests:
- interval: 100d
input_series:
- series: test
# Max time in time.Duration is 106751d from 1970 (2^63/10^9), i.e. 2262.
# We use the nearest 100 days to that to ensure the unit tests can fully
# cover the expected range.
values: '0+1x1067'
promql_expr_test:
- expr: timestamp(test)
eval_time: 0m
exp_samples:
- value: 0
- expr: test
eval_time: 100d # one evaluation_interval.
exp_samples:
- labels: test
value: 1
- expr: timestamp(test)
eval_time: 106700d
exp_samples:
- value: 9218880000 # 106700d -> seconds.
- expr: fixed_data
eval_time: 106700d
exp_samples:
- labels: fixed_data
value: 1

35
cmd/promtool/testdata/metrics-test.prom vendored Normal file
View file

@ -0,0 +1,35 @@
# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 2.391e-05
go_gc_duration_seconds{quantile="0.25"} 9.4402e-05
go_gc_duration_seconds{quantile="0.5"} 0.000118953
go_gc_duration_seconds{quantile="0.75"} 0.000145884
go_gc_duration_seconds{quantile="1"} 0.005201208
go_gc_duration_seconds_sum 0.036134048
go_gc_duration_seconds_count 232
# HELP prometheus_tsdb_compaction_chunk_size_bytes Final size of chunks on their first compaction
# TYPE prometheus_tsdb_compaction_chunk_size_bytes histogram
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="32"} 662
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="48"} 1460
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="72"} 2266
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="108"} 3958
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="162"} 4861
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="243"} 5721
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="364.5"} 10493
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="546.75"} 12464
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="820.125"} 13254
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1230.1875"} 13699
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="1845.28125"} 13806
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="2767.921875"} 13852
prometheus_tsdb_compaction_chunk_size_bytes_bucket{le="+Inf"} 13867
prometheus_tsdb_compaction_chunk_size_bytes_sum 3.886707e+06
prometheus_tsdb_compaction_chunk_size_bytes_count 13867
# HELP net_conntrack_dialer_conn_attempted_total Total number of connections attempted by the given dialer a given name.
# TYPE net_conntrack_dialer_conn_attempted_total counter
net_conntrack_dialer_conn_attempted_total{dialer_name="blackbox"} 5210
net_conntrack_dialer_conn_attempted_total{dialer_name="default"} 0
net_conntrack_dialer_conn_attempted_total{dialer_name="node"} 21
net_conntrack_dialer_conn_attempted_total{dialer_name="prometheus"} 21
# HELP go_info Information about the Go environment.
# TYPE go_info gauge
go_info{version="go1.17"} 1

View file

@ -18,7 +18,6 @@ import (
"context"
"fmt"
"io"
"io/ioutil"
"math"
"os"
"path/filepath"
@ -71,7 +70,7 @@ func benchmarkWrite(outPath, samplesFile string, numMetrics, numScrapes int) err
logger: log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr)),
}
if b.outPath == "" {
dir, err := ioutil.TempDir("", "tsdb_bench")
dir, err := os.MkdirTemp("", "tsdb_bench")
if err != nil {
return err
}

View file

@ -56,9 +56,9 @@ func RulesUnitTest(queryOpts promql.LazyLoaderOpts, files ...string) int {
fmt.Println()
}
if failed {
return 1
return failureExitCode
}
return 0
return successExitCode
}
func ruleUnitTest(filename string, queryOpts promql.LazyLoaderOpts) []error {
@ -435,7 +435,7 @@ func (tg *testGroup) maxEvalTime() time.Duration {
}
func query(ctx context.Context, qs string, t time.Time, engine *promql.Engine, qu storage.Queryable) (promql.Vector, error) {
q, err := engine.NewInstantQuery(qu, qs, t)
q, err := engine.NewInstantQuery(qu, nil, qs, t)
if err != nil {
return nil, err
}

View file

@ -36,6 +36,13 @@ func TestRulesUnitTest(t *testing.T) {
},
want: 0,
},
{
name: "Long evaluation interval",
args: args{
files: []string{"./testdata/long-period.yml"},
},
want: 0,
},
{
name: "Bad input series",
args: args{

View file

@ -19,13 +19,13 @@ import (
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
@ -83,6 +83,9 @@ func Load(s string, expandExternalLabels bool, logger log.Logger) (*Config, erro
for i, v := range cfg.GlobalConfig.ExternalLabels {
newV := os.Expand(v.Value, func(s string) string {
if s == "$" {
return "$"
}
if v := os.Getenv(s); v != "" {
return v
}
@ -202,6 +205,7 @@ var (
DefaultRemoteReadConfig = RemoteReadConfig{
RemoteTimeout: model.Duration(1 * time.Minute),
HTTPClientConfig: config.DefaultHTTPClientConfig,
FilterExternalLabels: true,
}
// DefaultStorageConfig is the default TSDB/Exemplar storage configuration.
@ -221,6 +225,7 @@ type Config struct {
RuleFiles []string `yaml:"rule_files,omitempty"`
ScrapeConfigs []*ScrapeConfig `yaml:"scrape_configs,omitempty"`
StorageConfig StorageConfig `yaml:"storage,omitempty"`
TracingConfig TracingConfig `yaml:"tracing,omitempty"`
RemoteWriteConfigs []*RemoteWriteConfig `yaml:"remote_write,omitempty"`
RemoteReadConfigs []*RemoteReadConfig `yaml:"remote_read,omitempty"`
@ -230,6 +235,7 @@ type Config struct {
func (c *Config) SetDirectory(dir string) {
c.GlobalConfig.SetDirectory(dir)
c.AlertingConfig.SetDirectory(dir)
c.TracingConfig.SetDirectory(dir)
for i, file := range c.RuleFiles {
c.RuleFiles[i] = config.JoinDir(dir, file)
}
@ -499,6 +505,75 @@ type StorageConfig struct {
ExemplarsConfig *ExemplarsConfig `yaml:"exemplars,omitempty"`
}
type TracingClientType string
const (
TracingClientHTTP TracingClientType = "http"
TracingClientGRPC TracingClientType = "grpc"
GzipCompression = "gzip"
)
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (t *TracingClientType) UnmarshalYAML(unmarshal func(interface{}) error) error {
*t = TracingClientType("")
type plain TracingClientType
if err := unmarshal((*plain)(t)); err != nil {
return err
}
if *t != TracingClientHTTP && *t != TracingClientGRPC {
return fmt.Errorf("expected tracing client type to be to be %s or %s, but got %s",
TracingClientHTTP, TracingClientGRPC, *t,
)
}
return nil
}
// TracingConfig configures the tracing options.
type TracingConfig struct {
ClientType TracingClientType `yaml:"client_type,omitempty"`
Endpoint string `yaml:"endpoint,omitempty"`
SamplingFraction float64 `yaml:"sampling_fraction,omitempty"`
Insecure bool `yaml:"insecure,omitempty"`
TLSConfig config.TLSConfig `yaml:"tls_config,omitempty"`
Headers map[string]string `yaml:"headers,omitempty"`
Compression string `yaml:"compression,omitempty"`
Timeout model.Duration `yaml:"timeout,omitempty"`
}
// SetDirectory joins any relative file paths with dir.
func (t *TracingConfig) SetDirectory(dir string) {
t.TLSConfig.SetDirectory(dir)
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (t *TracingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
*t = TracingConfig{
ClientType: TracingClientGRPC,
}
type plain TracingConfig
if err := unmarshal((*plain)(t)); err != nil {
return err
}
if err := validateHeadersForTracing(t.Headers); err != nil {
return err
}
if t.Endpoint == "" {
return errors.New("tracing endpoint must be set")
}
if t.Compression != "" && t.Compression != GzipCompression {
return fmt.Errorf("invalid compression type %s provided, valid options: %s",
t.Compression, GzipCompression)
}
return nil
}
// ExemplarsConfig configures runtime reloadable configuration options.
type ExemplarsConfig struct {
// MaxExemplars sets the size, in # of exemplars stored, of the single circular buffer used to store exemplars in memory.
@ -730,6 +805,18 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return nil
}
func validateHeadersForTracing(headers map[string]string) error {
for header := range headers {
if strings.ToLower(header) == "authorization" {
return errors.New("custom authorization header configuration is not yet supported")
}
if _, ok := reservedHeaders[strings.ToLower(header)]; ok {
return errors.Errorf("%s is a reserved header. It must not be changed", header)
}
}
return nil
}
func validateHeaders(headers map[string]string) error {
for header := range headers {
if strings.ToLower(header) == "authorization" {
@ -793,6 +880,9 @@ type RemoteReadConfig struct {
// RequiredMatchers is an optional list of equality matchers which have to
// be present in a selector to query the remote read endpoint.
RequiredMatchers model.LabelSet `yaml:"required_matchers,omitempty"`
// Whether to use the external labels as selectors for the remote read endpoint.
FilterExternalLabels bool `yaml:"filter_external_labels,omitempty"`
}
// SetDirectory joins any relative file paths with dir.

View file

@ -19,12 +19,12 @@ import (
"net/url"
"os"
"path/filepath"
"regexp"
"testing"
"time"
"github.com/alecthomas/units"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -135,6 +135,7 @@ var expectedConf = &Config{
ReadRecent: true,
Name: "default",
HTTPClientConfig: config.DefaultHTTPClientConfig,
FilterExternalLabels: true,
},
{
URL: mustParseURL("http://remote3/read"),
@ -149,6 +150,7 @@ var expectedConf = &Config{
},
FollowRedirects: true,
},
FilterExternalLabels: true,
},
},
@ -985,6 +987,19 @@ var expectedConf = &Config{
},
},
},
TracingConfig: TracingConfig{
Endpoint: "localhost:4317",
ClientType: TracingClientGRPC,
Insecure: false,
Compression: "gzip",
Timeout: model.Duration(5 * time.Second),
Headers: map[string]string{"foo": "bar"},
TLSConfig: config.TLSConfig{
CertFile: "testdata/valid_cert_file",
KeyFile: "testdata/valid_key_file",
InsecureSkipVerify: true,
},
},
}
func TestYAMLRoundtrip(t *testing.T) {
@ -1181,6 +1196,14 @@ var expectedErrors = []struct {
filename: "kubernetes_http_config_without_api_server.bad.yml",
errMsg: "to use custom HTTP client configuration please provide the 'api_server' URL explicitly",
},
{
filename: "kubernetes_kubeconfig_with_own_namespace.bad.yml",
errMsg: "cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously",
},
{
filename: "kubernetes_api_server_with_own_namespace.bad.yml",
errMsg: "cannot use 'api_server' and 'namespaces.own_namespace' simultaneously",
},
{
filename: "kubernetes_kubeconfig_with_apiserver.bad.yml",
errMsg: "cannot use 'kubeconfig_file' and 'api_server' simultaneously",
@ -1433,6 +1456,22 @@ var expectedErrors = []struct {
filename: "empty_scrape_config_action.bad.yml",
errMsg: "relabel action cannot be empty",
},
{
filename: "tracing_missing_endpoint.bad.yml",
errMsg: "tracing endpoint must be set",
},
{
filename: "tracing_invalid_header.bad.yml",
errMsg: "x-prometheus-remote-write-version is a reserved header. It must not be changed",
},
{
filename: "tracing_invalid_authorization_header.bad.yml",
errMsg: "authorization header configuration is not yet supported",
},
{
filename: "tracing_invalid_compression.bad.yml",
errMsg: "invalid compression type foo provided, valid options: gzip",
},
{
filename: "uyuni_no_server.bad.yml",
errMsg: "Uyuni SD configuration requires server host",
@ -1480,12 +1519,16 @@ func TestExpandExternalLabels(t *testing.T) {
require.Equal(t, labels.Label{Name: "bar", Value: "foo"}, c.GlobalConfig.ExternalLabels[0])
require.Equal(t, labels.Label{Name: "baz", Value: "foo${TEST}bar"}, c.GlobalConfig.ExternalLabels[1])
require.Equal(t, labels.Label{Name: "foo", Value: "${TEST}"}, c.GlobalConfig.ExternalLabels[2])
require.Equal(t, labels.Label{Name: "qux", Value: "foo$${TEST}"}, c.GlobalConfig.ExternalLabels[3])
require.Equal(t, labels.Label{Name: "xyz", Value: "foo$$bar"}, c.GlobalConfig.ExternalLabels[4])
c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, labels.Label{Name: "bar", Value: "foo"}, c.GlobalConfig.ExternalLabels[0])
require.Equal(t, labels.Label{Name: "baz", Value: "foobar"}, c.GlobalConfig.ExternalLabels[1])
require.Equal(t, labels.Label{Name: "foo", Value: ""}, c.GlobalConfig.ExternalLabels[2])
require.Equal(t, labels.Label{Name: "qux", Value: "foo${TEST}"}, c.GlobalConfig.ExternalLabels[3])
require.Equal(t, labels.Label{Name: "xyz", Value: "foo$bar"}, c.GlobalConfig.ExternalLabels[4])
os.Setenv("TEST", "TestValue")
c, err = LoadFile("testdata/external_labels.good.yml", false, true, log.NewNopLogger())
@ -1493,6 +1536,8 @@ func TestExpandExternalLabels(t *testing.T) {
require.Equal(t, labels.Label{Name: "bar", Value: "foo"}, c.GlobalConfig.ExternalLabels[0])
require.Equal(t, labels.Label{Name: "baz", Value: "fooTestValuebar"}, c.GlobalConfig.ExternalLabels[1])
require.Equal(t, labels.Label{Name: "foo", Value: "TestValue"}, c.GlobalConfig.ExternalLabels[2])
require.Equal(t, labels.Label{Name: "qux", Value: "foo${TEST}"}, c.GlobalConfig.ExternalLabels[3])
require.Equal(t, labels.Label{Name: "xyz", Value: "foo$bar"}, c.GlobalConfig.ExternalLabels[4])
}
func TestEmptyGlobalBlock(t *testing.T) {

View file

@ -366,3 +366,15 @@ alerting:
- "1.2.3.4:9093"
- "1.2.3.5:9093"
- "1.2.3.6:9093"
tracing:
endpoint: "localhost:4317"
client_type: "grpc"
headers:
foo: "bar"
timeout: 5s
compression: "gzip"
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file
insecure_skip_verify: true

View file

@ -3,3 +3,5 @@ global:
bar: foo
foo: ${TEST}
baz: foo${TEST}bar
qux: foo$${TEST}
xyz: foo$$bar

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: prometheus
kubernetes_sd_configs:
- role: endpoints
api_server: 'https://localhost:1234'
namespaces:
own_namespace: true

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: prometheus
kubernetes_sd_configs:
- role: endpoints
kubeconfig_file: /home/User1/.kubeconfig
namespaces:
own_namespace: true

View file

@ -0,0 +1,5 @@
tracing:
sampling_fraction: 1
endpoint: "localhost:4317"
headers:
"authorization": foo

View file

@ -0,0 +1,4 @@
tracing:
sampling_fraction: 1
endpoint: "localhost:4317"
compression: foo

View file

@ -0,0 +1,5 @@
tracing:
sampling_fraction: 1
endpoint: "localhost:4317"
headers:
"x-prometheus-remote-write-version": foo

View file

@ -0,0 +1,2 @@
tracing:
sampling_fraction: 1

View file

@ -32,6 +32,7 @@ import (
"github.com/pkg/errors"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
"github.com/prometheus/prometheus/discovery"
"github.com/prometheus/prometheus/discovery/refresh"
@ -58,14 +59,18 @@ const (
authMethodManagedIdentity = "ManagedIdentity"
)
// DefaultSDConfig is the default Azure SD configuration.
var DefaultSDConfig = SDConfig{
var (
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
// DefaultSDConfig is the default Azure SD configuration.
DefaultSDConfig = SDConfig{
Port: 80,
RefreshInterval: model.Duration(5 * time.Minute),
Environment: azure.PublicCloud.Name,
AuthenticationMethod: authMethodOAuth,
HTTPClientConfig: config_util.DefaultHTTPClientConfig,
}
}
)
func init() {
discovery.RegisterConfig(&SDConfig{})
@ -208,24 +213,29 @@ func createAzureClient(cfg SDConfig) (azureClient, error) {
return azureClient{}, err
}
sender := autorest.DecorateSender(client)
preparer := autorest.WithUserAgent(userAgent)
bearerAuthorizer := autorest.NewBearerAuthorizer(spt)
c.vm = compute.NewVirtualMachinesClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.vm.Authorizer = bearerAuthorizer
c.vm.Sender = sender
c.vm.RequestInspector = preparer
c.nic = network.NewInterfacesClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.nic.Authorizer = bearerAuthorizer
c.nic.Sender = sender
c.nic.RequestInspector = preparer
c.vmss = compute.NewVirtualMachineScaleSetsClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.vmss.Authorizer = bearerAuthorizer
c.vm.Sender = sender
c.vmss.Sender = sender
c.vmss.RequestInspector = preparer
c.vmssvm = compute.NewVirtualMachineScaleSetVMsClientWithBaseURI(resourceManagerEndpoint, cfg.SubscriptionID)
c.vmssvm.Authorizer = bearerAuthorizer
c.vmssvm.Sender = sender
c.vmssvm.RequestInspector = preparer
return c, nil
}
@ -361,7 +371,9 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
if *networkInterface.Primary {
for _, ip := range *networkInterface.IPConfigurations {
if ip.PublicIPAddress != nil && ip.PublicIPAddress.PublicIPAddressPropertiesFormat != nil {
// IPAddress is a field defined in PublicIPAddressPropertiesFormat,
// therefore we need to validate that both are not nil.
if ip.PublicIPAddress != nil && ip.PublicIPAddress.PublicIPAddressPropertiesFormat != nil && ip.PublicIPAddress.IPAddress != nil {
labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.PublicIPAddress.IPAddress)
}
if ip.PrivateIPAddress != nil {
@ -537,7 +549,8 @@ func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkI
autorest.AsGet(),
autorest.WithBaseURL(client.nic.BaseURI),
autorest.WithPath(networkInterfaceID),
autorest.WithQueryParameters(queryParameters))
autorest.WithQueryParameters(queryParameters),
autorest.WithUserAgent(userAgent))
req, err := preparer.Prepare((&http.Request{}).WithContext(ctx))
if err != nil {
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request")

View file

@ -138,7 +138,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
Source: "DigitalOcean",
}
droplets, err := d.listDroplets()
droplets, err := d.listDroplets(ctx)
if err != nil {
return nil, err
}
@ -196,13 +196,13 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
return []*targetgroup.Group{tg}, nil
}
func (d *Discovery) listDroplets() ([]godo.Droplet, error) {
func (d *Discovery) listDroplets(ctx context.Context) ([]godo.Droplet, error) {
var (
droplets []godo.Droplet
opts = &godo.ListOptions{}
)
for {
paginatedDroplets, resp, err := d.client.Droplets.List(context.Background(), opts)
paginatedDroplets, resp, err := d.client.Droplets.List(ctx, opts)
if err != nil {
return nil, fmt.Errorf("error while listing droplets page %d: %w", opts.Page, err)
}

View file

@ -20,7 +20,6 @@ import (
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
@ -28,6 +27,7 @@ import (
"github.com/fsnotify/fsnotify"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"

View file

@ -53,12 +53,9 @@ type testRunner struct {
func newTestRunner(t *testing.T) *testRunner {
t.Helper()
tmpDir, err := ioutil.TempDir("", "prometheus-file-sd")
require.NoError(t, err)
return &testRunner{
T: t,
dir: tmpDir,
dir: t.TempDir(),
ch: make(chan []*targetgroup.Group),
done: make(chan struct{}),
stopped: make(chan struct{}),

View file

@ -21,13 +21,14 @@ import (
"io/ioutil"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/prometheus/common/version"
@ -45,10 +46,17 @@ var (
}
userAgent = fmt.Sprintf("Prometheus/%s", version.Version)
matchContentType = regexp.MustCompile(`^(?i:application\/json(;\s*charset=("utf-8"|utf-8))?)$`)
failuresCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "prometheus_sd_http_failures_total",
Help: "Number of HTTP service discovery refresh failures.",
})
)
func init() {
discovery.RegisterConfig(&SDConfig{})
prometheus.MustRegister(failuresCount)
}
// SDConfig is the configuration for HTTP based discovery.
@ -145,6 +153,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
resp, err := d.client.Do(req.WithContext(ctx))
if err != nil {
failuresCount.Inc()
return nil, err
}
defer func() {
@ -153,26 +162,31 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
}()
if resp.StatusCode != http.StatusOK {
failuresCount.Inc()
return nil, errors.Errorf("server returned HTTP status %s", resp.Status)
}
if !matchContentType.MatchString(strings.TrimSpace(resp.Header.Get("Content-Type"))) {
failuresCount.Inc()
return nil, errors.Errorf("unsupported content type %q", resp.Header.Get("Content-Type"))
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
failuresCount.Inc()
return nil, err
}
var targetGroups []*targetgroup.Group
if err := json.Unmarshal(b, &targetGroups); err != nil {
failuresCount.Inc()
return nil, err
}
for i, tg := range targetGroups {
if tg == nil {
failuresCount.Inc()
err = errors.New("nil target group item found")
return nil, err
}

View file

@ -22,6 +22,8 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -61,6 +63,7 @@ func TestHTTPValidRefresh(t *testing.T) {
},
}
require.Equal(t, tgs, expectedTargets)
require.Equal(t, 0.0, getFailureCount())
}
func TestHTTPInvalidCode(t *testing.T) {
@ -82,6 +85,7 @@ func TestHTTPInvalidCode(t *testing.T) {
ctx := context.Background()
_, err = d.refresh(ctx)
require.EqualError(t, err, "server returned HTTP status 400 Bad Request")
require.Equal(t, 1.0, getFailureCount())
}
func TestHTTPInvalidFormat(t *testing.T) {
@ -103,6 +107,32 @@ func TestHTTPInvalidFormat(t *testing.T) {
ctx := context.Background()
_, err = d.refresh(ctx)
require.EqualError(t, err, `unsupported content type "text/plain; charset=utf-8"`)
require.Equal(t, 1.0, getFailureCount())
}
var lastFailureCount float64
func getFailureCount() float64 {
failureChan := make(chan prometheus.Metric)
go func() {
failuresCount.Collect(failureChan)
close(failureChan)
}()
var counter dto.Metric
for {
metric, ok := <-failureChan
if ok == false {
break
}
metric.Write(&counter)
}
// account for failures in prior tests
count := *counter.Counter.Value - lastFailureCount
lastFailureCount = *counter.Counter.Value
return count
}
func TestContentTypeRegex(t *testing.T) {

View file

@ -122,6 +122,7 @@ type SDConfig struct {
HTTPClientConfig config.HTTPClientConfig `yaml:",inline"`
NamespaceDiscovery NamespaceDiscovery `yaml:"namespaces,omitempty"`
Selectors []SelectorConfig `yaml:"selectors,omitempty"`
AttachMetadata AttachMetadataConfig `yaml:"attach_metadata,omitempty"`
}
// Name returns the name of the Config.
@ -158,6 +159,12 @@ type resourceSelector struct {
field string
}
// AttachMetadataConfig is the configuration for attaching additional metadata
// coming from nodes on which the targets are scheduled.
type AttachMetadataConfig struct {
Node bool `yaml:"node"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
*c = DefaultSDConfig
@ -184,6 +191,12 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config.DefaultHTTPClientConfig) {
return errors.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly")
}
if c.APIServer.URL != nil && c.NamespaceDiscovery.IncludeOwnNamespace {
return errors.Errorf("cannot use 'api_server' and 'namespaces.own_namespace' simultaneously")
}
if c.KubeConfig != "" && c.NamespaceDiscovery.IncludeOwnNamespace {
return errors.Errorf("cannot use 'kubeconfig_file' and 'namespaces.own_namespace' simultaneously")
}
foundSelectorRoles := make(map[Role]struct{})
allowedSelectors := map[Role][]string{
@ -253,6 +266,7 @@ type Discovery struct {
discoverers []discovery.Discoverer
selectors roleSelector
ownNamespace string
attachMetadata AttachMetadataConfig
}
func (d *Discovery) getNamespaces() []string {
@ -263,7 +277,7 @@ func (d *Discovery) getNamespaces() []string {
return []string{apiv1.NamespaceAll}
}
if includeOwnNamespace {
if includeOwnNamespace && d.ownNamespace != "" {
return append(namespaces, d.ownNamespace)
}
@ -278,6 +292,7 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
var (
kcfg *rest.Config
err error
ownNamespace string
)
if conf.KubeConfig != "" {
kcfg, err = clientcmd.BuildConfigFromFlags("", conf.KubeConfig)
@ -291,6 +306,18 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
if err != nil {
return nil, err
}
if conf.NamespaceDiscovery.IncludeOwnNamespace {
ownNamespaceContents, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
if err != nil {
return nil, fmt.Errorf("could not determine the pod's namespace: %w", err)
}
if len(ownNamespaceContents) == 0 {
return nil, errors.New("could not read own namespace name (empty file)")
}
ownNamespace = string(ownNamespaceContents)
}
level.Info(l).Log("msg", "Using pod service account via in-cluster config")
} else {
rt, err := config.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd")
@ -310,11 +337,6 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
return nil, err
}
ownNamespace, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
if err != nil {
return nil, fmt.Errorf("could not determine the pod's namespace: %w", err)
}
return &Discovery{
client: c,
logger: l,
@ -322,7 +344,8 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
namespaceDiscovery: &conf.NamespaceDiscovery,
discoverers: make([]discovery.Discoverer, 0),
selectors: mapSelector(conf.Selectors),
ownNamespace: string(ownNamespace),
ownNamespace: ownNamespace,
attachMetadata: conf.AttachMetadata,
}, nil
}
@ -466,6 +489,12 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
go eps.podInf.Run(ctx.Done())
}
case RolePod:
var nodeInformer cache.SharedInformer
if d.attachMetadata.Node {
nodeInformer = d.newNodeInformer(ctx)
go nodeInformer.Run(ctx.Done())
}
for _, namespace := range namespaces {
p := d.client.CoreV1().Pods(namespace)
plw := &cache.ListWatch{
@ -482,10 +511,11 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
pod := NewPod(
log.With(d.logger, "role", "pod"),
cache.NewSharedInformer(plw, &apiv1.Pod{}, resyncPeriod),
d.newPodsByNodeInformer(plw),
nodeInformer,
)
d.discoverers = append(d.discoverers, pod)
go pod.informer.Run(ctx.Done())
go pod.podInf.Run(ctx.Done())
}
case RoleService:
for _, namespace := range namespaces {
@ -511,7 +541,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
}
case RoleIngress:
// Check "networking.k8s.io/v1" availability with retries.
// If "v1" is not avaiable, use "networking.k8s.io/v1beta1" for backward compatibility
// If "v1" is not available, use "networking.k8s.io/v1beta1" for backward compatibility
var v1Supported bool
if retryOnError(ctx, 10*time.Second,
func() (err error) {
@ -567,22 +597,8 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
go ingress.informer.Run(ctx.Done())
}
case RoleNode:
nlw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.node.field
options.LabelSelector = d.selectors.node.label
return d.client.CoreV1().Nodes().List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.node.field
options.LabelSelector = d.selectors.node.label
return d.client.CoreV1().Nodes().Watch(ctx, options)
},
}
node := NewNode(
log.With(d.logger, "role", "node"),
cache.NewSharedInformer(nlw, &apiv1.Node{}, resyncPeriod),
)
nodeInformer := d.newNodeInformer(ctx)
node := NewNode(log.With(d.logger, "role", "node"), nodeInformer)
d.discoverers = append(d.discoverers, node)
go node.informer.Run(ctx.Done())
default:
@ -647,3 +663,34 @@ func checkNetworkingV1Supported(client kubernetes.Interface) (bool, error) {
// https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.19.md
return semVer.Major() >= 1 && semVer.Minor() >= 19, nil
}
func (d *Discovery) newNodeInformer(ctx context.Context) cache.SharedInformer {
nlw := &cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
options.FieldSelector = d.selectors.node.field
options.LabelSelector = d.selectors.node.label
return d.client.CoreV1().Nodes().List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
options.FieldSelector = d.selectors.node.field
options.LabelSelector = d.selectors.node.label
return d.client.CoreV1().Nodes().Watch(ctx, options)
},
}
return cache.NewSharedInformer(nlw, &apiv1.Node{}, resyncPeriod)
}
func (d *Discovery) newPodsByNodeInformer(plw *cache.ListWatch) cache.SharedIndexInformer {
indexers := make(map[string]cache.IndexFunc)
if d.attachMetadata.Node {
indexers[nodeIndex] = func(obj interface{}) ([]string, error) {
pod, ok := obj.(*apiv1.Pod)
if !ok {
return nil, fmt.Errorf("object is not a pod")
}
return []string{pod.Spec.NodeName}, nil
}
}
return cache.NewSharedIndexInformer(plw, &apiv1.Pod{}, resyncPeriod, indexers)
}

View file

@ -58,6 +58,13 @@ func makeDiscoveryWithVersion(role Role, nsDiscovery NamespaceDiscovery, k8sVer
}, clientset
}
// makeDiscoveryWithMetadata creates a kubernetes.Discovery instance with the specified metadata config.
func makeDiscoveryWithMetadata(role Role, nsDiscovery NamespaceDiscovery, attachMetadata AttachMetadataConfig, objects ...runtime.Object) (*Discovery, kubernetes.Interface) {
d, k8s := makeDiscovery(role, nsDiscovery, objects...)
d.attachMetadata = attachMetadata
return d, k8s
}
type k8sDiscoveryTest struct {
// discovery is instance of discovery.Discoverer
discovery discovery.Discoverer
@ -215,7 +222,7 @@ func (i *Ingress) hasSynced() bool {
}
func (p *Pod) hasSynced() bool {
return p.informer.HasSynced()
return p.podInf.HasSynced()
}
func (s *Service) hasSynced() bool {

View file

@ -32,6 +32,8 @@ import (
"github.com/prometheus/prometheus/util/strutil"
)
const nodeIndex = "node"
var (
podAddCount = eventCount.WithLabelValues("pod", "add")
podUpdateCount = eventCount.WithLabelValues("pod", "update")
@ -40,24 +42,29 @@ var (
// Pod discovers new pod targets.
type Pod struct {
informer cache.SharedInformer
podInf cache.SharedIndexInformer
nodeInf cache.SharedInformer
withNodeMetadata bool
store cache.Store
logger log.Logger
queue *workqueue.Type
}
// NewPod creates a new pod discovery.
func NewPod(l log.Logger, pods cache.SharedInformer) *Pod {
func NewPod(l log.Logger, pods cache.SharedIndexInformer, nodes cache.SharedInformer) *Pod {
if l == nil {
l = log.NewNopLogger()
}
p := &Pod{
informer: pods,
podInf: pods,
nodeInf: nodes,
withNodeMetadata: nodes != nil,
store: pods.GetStore(),
logger: l,
queue: workqueue.NewNamed("pod"),
}
p.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
p.podInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
podAddCount.Inc()
p.enqueue(o)
@ -71,6 +78,24 @@ func NewPod(l log.Logger, pods cache.SharedInformer) *Pod {
p.enqueue(o)
},
})
if p.withNodeMetadata {
p.nodeInf.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) {
node := o.(*apiv1.Node)
p.enqueuePodsForNode(node.Name)
},
UpdateFunc: func(_, o interface{}) {
node := o.(*apiv1.Node)
p.enqueuePodsForNode(node.Name)
},
DeleteFunc: func(o interface{}) {
node := o.(*apiv1.Node)
p.enqueuePodsForNode(node.Name)
},
})
}
return p
}
@ -87,7 +112,12 @@ func (p *Pod) enqueue(obj interface{}) {
func (p *Pod) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
defer p.queue.ShutDown()
if !cache.WaitForCacheSync(ctx.Done(), p.informer.HasSynced) {
cacheSyncs := []cache.InformerSynced{p.podInf.HasSynced}
if p.withNodeMetadata {
cacheSyncs = append(cacheSyncs, p.nodeInf.HasSynced)
}
if !cache.WaitForCacheSync(ctx.Done(), cacheSyncs...) {
if ctx.Err() != context.Canceled {
level.Error(p.logger).Log("msg", "pod informer unable to sync cache")
}
@ -221,6 +251,9 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
tg.Labels = podLabels(pod)
tg.Labels[namespaceLabel] = lv(pod.Namespace)
if p.withNodeMetadata {
p.attachNodeMetadata(tg, pod)
}
containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
for i, c := range containers {
@ -257,6 +290,39 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
return tg
}
func (p *Pod) attachNodeMetadata(tg *targetgroup.Group, pod *apiv1.Pod) {
tg.Labels[nodeNameLabel] = lv(pod.Spec.NodeName)
obj, exists, err := p.nodeInf.GetStore().GetByKey(pod.Spec.NodeName)
if err != nil {
level.Error(p.logger).Log("msg", "Error getting node", "node", pod.Spec.NodeName, "err", err)
return
}
if !exists {
return
}
node := obj.(*apiv1.Node)
for k, v := range node.GetLabels() {
ln := strutil.SanitizeLabelName(k)
tg.Labels[model.LabelName(nodeLabelPrefix+ln)] = lv(v)
tg.Labels[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue
}
}
func (p *Pod) enqueuePodsForNode(nodeName string) {
pods, err := p.podInf.GetIndexer().ByIndex(nodeIndex, nodeName)
if err != nil {
level.Error(p.logger).Log("msg", "Error getting pods for node", "node", nodeName, "err", err)
return
}
for _, pod := range pods {
p.enqueue(pod.(*apiv1.Pod))
}
}
func podSource(pod *apiv1.Pod) string {
return podSourceFromNamespaceAndName(pod.Namespace, pod.Name)
}

View file

@ -190,6 +190,19 @@ func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
}
}
func expectedPodTargetGroupsWithNodeMeta(ns, nodeName string, nodeLabels map[string]string) map[string]*targetgroup.Group {
result := expectedPodTargetGroups(ns)
for _, tg := range result {
tg.Labels["__meta_kubernetes_node_name"] = lv(nodeName)
for k, v := range nodeLabels {
tg.Labels[model.LabelName("__meta_kubernetes_node_label_"+k)] = lv(v)
tg.Labels[model.LabelName("__meta_kubernetes_node_labelpresent_"+k)] = lv("true")
}
}
return result
}
func TestPodDiscoveryBeforeRun(t *testing.T) {
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
@ -407,3 +420,46 @@ func TestPodDiscoveryOwnNamespace(t *testing.T) {
expectedRes: expected,
}.Run(t)
}
func TestPodDiscoveryWithNodeMetadata(t *testing.T) {
attachMetadata := AttachMetadataConfig{Node: true}
n, c := makeDiscoveryWithMetadata(RolePod, NamespaceDiscovery{}, attachMetadata)
nodeLbls := map[string]string{"l1": "v1"}
k8sDiscoveryTest{
discovery: n,
afterStart: func() {
nodes := makeNode("testnode", "", "", nodeLbls, nil)
c.CoreV1().Nodes().Create(context.Background(), nodes, metav1.CreateOptions{})
pods := makePods()
c.CoreV1().Pods(pods.Namespace).Create(context.Background(), pods, metav1.CreateOptions{})
},
expectedMaxItems: 2,
expectedRes: expectedPodTargetGroupsWithNodeMeta("default", "testnode", nodeLbls),
}.Run(t)
}
func TestPodDiscoveryWithNodeMetadataUpdateNode(t *testing.T) {
nodeLbls := map[string]string{"l2": "v2"}
attachMetadata := AttachMetadataConfig{Node: true}
n, c := makeDiscoveryWithMetadata(RolePod, NamespaceDiscovery{}, attachMetadata)
k8sDiscoveryTest{
discovery: n,
beforeRun: func() {
oldNodeLbls := map[string]string{"l1": "v1"}
nodes := makeNode("testnode", "", "", oldNodeLbls, nil)
c.CoreV1().Nodes().Create(context.Background(), nodes, metav1.CreateOptions{})
},
afterStart: func() {
pods := makePods()
c.CoreV1().Pods(pods.Namespace).Create(context.Background(), pods, metav1.CreateOptions{})
nodes := makeNode("testnode", "", "", nodeLbls, nil)
c.CoreV1().Nodes().Update(context.Background(), nodes, metav1.UpdateOptions{})
},
expectedMaxItems: 2,
expectedRes: expectedPodTargetGroupsWithNodeMeta("default", "testnode", nodeLbls),
}.Run(t)
}

View file

@ -668,13 +668,16 @@ func TestTargetUpdatesOrder(t *testing.T) {
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
provUpdates := make(chan []*targetgroup.Group)
for _, up := range tc.updates {
go newMockDiscoveryProvider(up...).Run(ctx, provUpdates)
if len(up) > 0 {
totalUpdatesCount += len(up)
}
}
provUpdates := make(chan []*targetgroup.Group, totalUpdatesCount)
for _, up := range tc.updates {
go newMockDiscoveryProvider(up...).Run(ctx, provUpdates)
}
for x := 0; x < totalUpdatesCount; x++ {
select {

View file

@ -668,13 +668,16 @@ func TestTargetUpdatesOrder(t *testing.T) {
discoveryManager.updatert = 100 * time.Millisecond
var totalUpdatesCount int
provUpdates := make(chan []*targetgroup.Group)
for _, up := range tc.updates {
go newMockDiscoveryProvider(up...).Run(ctx, provUpdates)
if len(up) > 0 {
totalUpdatesCount += len(up)
}
}
provUpdates := make(chan []*targetgroup.Group, totalUpdatesCount)
for _, up := range tc.updates {
go newMockDiscoveryProvider(up...).Run(ctx, provUpdates)
}
for x := 0; x < totalUpdatesCount; x++ {
select {

View file

@ -30,12 +30,12 @@ import (
const (
swarmLabelTaskPrefix = swarmLabel + "task_"
swarmLabelTaskID = swarmLabelTaskPrefix + "id"
swarmLabelTaskLabelPrefix = swarmLabelTaskPrefix + "label_"
swarmLabelTaskDesiredState = swarmLabelTaskPrefix + "desired_state"
swarmLabelTaskStatus = swarmLabelTaskPrefix + "state"
swarmLabelTaskContainerID = swarmLabelTaskPrefix + "container_id"
swarmLabelTaskSlot = swarmLabelTaskPrefix + "slot"
swarmLabelTaskPortMode = swarmLabelTaskPrefix + "port_publish_mode"
swarmLabelContainerLabelPrefix = swarmLabel + "container_label_"
)
func (d *Discovery) refreshTasks(ctx context.Context) ([]*targetgroup.Group, error) {
@ -75,9 +75,11 @@ func (d *Discovery) refreshTasks(ctx context.Context) ([]*targetgroup.Group, err
commonLabels[swarmLabelTaskContainerID] = s.Status.ContainerStatus.ContainerID
}
for k, v := range s.Labels {
if s.Spec.ContainerSpec != nil {
for k, v := range s.Spec.ContainerSpec.Labels {
ln := strutil.SanitizeLabelName(k)
commonLabels[swarmLabelTaskLabelPrefix+ln] = v
commonLabels[swarmLabelContainerLabelPrefix+ln] = v
}
}
for k, v := range serviceLabels[s.ServiceID] {

View file

@ -55,6 +55,7 @@ host: %s
for i, lbls := range []model.LabelSet{
{
"__address__": model.LabelValue("10.0.0.8:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -82,6 +83,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.3:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -110,6 +112,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.88:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -137,6 +140,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.12:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -164,6 +168,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.7:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -192,6 +197,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.11:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -219,6 +225,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.6:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -247,6 +254,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.10:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -274,6 +282,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.5:9100"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -302,6 +311,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.35:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -329,6 +339,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.14:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -356,6 +367,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.20:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -383,6 +395,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.19:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -410,6 +423,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.18:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -437,6 +451,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.75:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -464,6 +479,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.19:9090"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -491,6 +507,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.19:9093"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -518,6 +535,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.19:9094"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -545,6 +563,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.81:9090"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -573,6 +592,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.81:9093"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -601,6 +621,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.81:9094"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -629,6 +650,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.24:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -656,6 +678,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.32:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -683,6 +706,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.0.16:3000"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("qvwhwd6p61k4o0ulsknqb066z"),
"__meta_dockerswarm_network_ingress": model.LabelValue("true"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -710,6 +734,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.30:3000"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),
@ -738,6 +763,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.232.3:82"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_node_address": model.LabelValue("10.0.232.3"),
"__meta_dockerswarm_node_availability": model.LabelValue("active"),
"__meta_dockerswarm_node_hostname": model.LabelValue("master-3"),
@ -760,6 +786,7 @@ host: %s
},
{
"__address__": model.LabelValue("10.0.1.22:80"),
"__meta_dockerswarm_container_label_com_docker_stack_namespace": model.LabelValue("mon"),
"__meta_dockerswarm_network_id": model.LabelValue("npq2closzy836m07eaq1425k3"),
"__meta_dockerswarm_network_ingress": model.LabelValue("false"),
"__meta_dockerswarm_network_internal": model.LabelValue("false"),

View file

@ -50,7 +50,8 @@ type HypervisorDiscovery struct {
// newHypervisorDiscovery returns a new hypervisor discovery.
func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
port int, region string, availability gophercloud.Availability, l log.Logger) *HypervisorDiscovery {
port int, region string, availability gophercloud.Availability, l log.Logger,
) *HypervisorDiscovery {
return &HypervisorDiscovery{
provider: provider, authOpts: opts,
region: region, port: port, availability: availability, logger: l,

View file

@ -59,7 +59,8 @@ type InstanceDiscovery struct {
// NewInstanceDiscovery returns a new instance discovery.
func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger) *InstanceDiscovery {
port int, region string, allTenants bool, availability gophercloud.Availability, l log.Logger,
) *InstanceDiscovery {
if l == nil {
l = log.NewNopLogger()
}

View file

@ -24,12 +24,12 @@ import (
"net/http"
"net/url"
"path"
"regexp"
"strconv"
"strings"
"time"
"github.com/go-kit/log"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/common/config"
"github.com/prometheus/common/model"

View file

@ -122,7 +122,7 @@ func mockScalewayInstance(w http.ResponseWriter, r *http.Request) {
http.Error(w, "bad token id", http.StatusUnauthorized)
return
}
if r.RequestURI != "/instance/v1/zones/fr-par-1/servers?page=1" {
if r.URL.Path != "/instance/v1/zones/fr-par-1/servers" {
http.Error(w, "bad url", http.StatusNotFound)
return
}

View file

@ -91,3 +91,18 @@ func (tg *Group) UnmarshalJSON(b []byte) error {
tg.Labels = g.Labels
return nil
}
// MarshalJSON implements the json.Marshaler interface.
func (tg Group) MarshalJSON() ([]byte, error) {
g := &struct {
Targets []string `json:"targets"`
Labels model.LabelSet `json:"labels,omitempty"`
}{
Targets: make([]string, 0, len(tg.Targets)),
Labels: tg.Labels,
}
for _, t := range tg.Targets {
g.Targets = append(g.Targets, string(t[model.AddressLabel]))
}
return json.Marshal(g)
}

View file

@ -22,7 +22,7 @@ import (
"gopkg.in/yaml.v2"
)
func TestTargetGroupStrictJsonUnmarshal(t *testing.T) {
func TestTargetGroupStrictJSONUnmarshal(t *testing.T) {
tests := []struct {
json string
expectedReply error
@ -59,6 +59,39 @@ func TestTargetGroupStrictJsonUnmarshal(t *testing.T) {
}
}
func TestTargetGroupJSONMarshal(t *testing.T) {
tests := []struct {
expectedJSON string
expectedErr error
group Group
}{
{
// labels should be omitted if empty.
group: Group{},
expectedJSON: `{"targets": []}`,
expectedErr: nil,
},
{
// targets only exposes addresses.
group: Group{
Targets: []model.LabelSet{
{"__address__": "localhost:9090"},
{"__address__": "localhost:9091"},
},
Labels: model.LabelSet{"foo": "bar", "bar": "baz"},
},
expectedJSON: `{"targets": ["localhost:9090", "localhost:9091"], "labels": {"bar": "baz", "foo": "bar"}}`,
expectedErr: nil,
},
}
for _, test := range tests {
actual, err := test.group.MarshalJSON()
require.Equal(t, test.expectedErr, err)
require.JSONEq(t, test.expectedJSON, string(actual))
}
}
func TestTargetGroupYamlMarshal(t *testing.T) {
marshal := func(g interface{}) []byte {
d, err := yaml.Marshal(g)
@ -143,8 +176,7 @@ func TestTargetGroupYamlUnmarshal(t *testing.T) {
func TestString(t *testing.T) {
// String() should return only the source, regardless of other attributes.
group1 :=
Group{
group1 := Group{
Targets: []model.LabelSet{
{"__address__": "localhost:9090"},
{"__address__": "localhost:9091"},
@ -152,8 +184,7 @@ func TestString(t *testing.T) {
Source: "<source>",
Labels: model.LabelSet{"foo": "bar", "bar": "baz"},
}
group2 :=
Group{
group2 := Group{
Targets: []model.LabelSet{},
Source: "<source>",
Labels: model.LabelSet{},

View file

@ -23,7 +23,6 @@ import (
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/kolo/xmlrpc"
"github.com/pkg/errors"
"github.com/prometheus/common/config"
@ -47,6 +46,8 @@ const (
uyuniLabelProxyModule = uyuniMetaLabelPrefix + "proxy_module"
uyuniLabelMetricsPath = uyuniMetaLabelPrefix + "metrics_path"
uyuniLabelScheme = uyuniMetaLabelPrefix + "scheme"
tokenDuration = 10 * time.Minute
)
// DefaultSDConfig is the default Uyuni SD configuration.
@ -100,6 +101,8 @@ type Discovery struct {
roundTripper http.RoundTripper
username string
password string
token string
tokenExpiration time.Time
entitlement string
separator string
interval time.Duration
@ -140,16 +143,12 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return nil
}
func login(rpcclient *xmlrpc.Client, user, pass string) (string, error) {
func login(rpcclient *xmlrpc.Client, user, pass string, duration int) (string, error) {
var result string
err := rpcclient.Call("auth.login", []interface{}{user, pass}, &result)
err := rpcclient.Call("auth.login", []interface{}{user, pass, duration}, &result)
return result, err
}
func logout(rpcclient *xmlrpc.Client, token string) error {
return rpcclient.Call("auth.logout", token, nil)
}
func getSystemGroupsInfoOfMonitoredClients(rpcclient *xmlrpc.Client, token, entitlement string) (map[int][]systemGroupID, error) {
var systemGroupsInfos []struct {
SystemID int `xmlrpc:"id"`
@ -271,12 +270,11 @@ func getSystemGroupNames(systemGroupsIDs []systemGroupID) []string {
func (d *Discovery) getTargetsForSystems(
rpcClient *xmlrpc.Client,
token string,
entitlement string,
) ([]model.LabelSet, error) {
result := make([]model.LabelSet, 0)
systemGroupIDsBySystemID, err := getSystemGroupsInfoOfMonitoredClients(rpcClient, token, entitlement)
systemGroupIDsBySystemID, err := getSystemGroupsInfoOfMonitoredClients(rpcClient, d.token, entitlement)
if err != nil {
return nil, errors.Wrap(err, "unable to get the managed system groups information of monitored clients")
}
@ -286,12 +284,12 @@ func (d *Discovery) getTargetsForSystems(
systemIDs = append(systemIDs, systemID)
}
endpointInfos, err := getEndpointInfoForSystems(rpcClient, token, systemIDs)
endpointInfos, err := getEndpointInfoForSystems(rpcClient, d.token, systemIDs)
if err != nil {
return nil, errors.Wrap(err, "unable to get endpoints information")
}
networkInfoBySystemID, err := getNetworkInformationForSystems(rpcClient, token, systemIDs)
networkInfoBySystemID, err := getNetworkInformationForSystems(rpcClient, d.token, systemIDs)
if err != nil {
return nil, errors.Wrap(err, "unable to get the systems network information")
}
@ -308,25 +306,27 @@ func (d *Discovery) getTargetsForSystems(
return result, nil
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) refresh(_ context.Context) ([]*targetgroup.Group, error) {
rpcClient, err := xmlrpc.NewClient(d.apiURL.String(), d.roundTripper)
if err != nil {
return nil, err
}
defer rpcClient.Close()
token, err := login(rpcClient, d.username, d.password)
if time.Now().After(d.tokenExpiration) {
// Uyuni API takes duration in seconds.
d.token, err = login(rpcClient, d.username, d.password, int(tokenDuration.Seconds()))
if err != nil {
return nil, errors.Wrap(err, "unable to login to Uyuni API")
}
defer func() {
if err := logout(rpcClient, token); err != nil {
level.Debug(d.logger).Log("msg", "Failed to log out from Uyuni API", "err", err)
// Login again at half the token lifetime.
d.tokenExpiration = time.Now().Add(tokenDuration / 2)
}
}()
targetsForSystems, err := d.getTargetsForSystems(rpcClient, token, d.entitlement)
targetsForSystems, err := d.getTargetsForSystems(rpcClient, d.entitlement)
if err != nil {
// Force the renewal of the token on next refresh.
d.tokenExpiration = time.Now()
return nil, err
}

View file

@ -19,6 +19,7 @@ import (
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/stretchr/testify/require"
@ -56,3 +57,67 @@ func TestUyuniSDHandleError(t *testing.T) {
require.EqualError(t, err, errTesting)
require.Equal(t, len(tgs), 0)
}
func TestUyuniSDLogin(t *testing.T) {
var (
errTesting = "unable to get the managed system groups information of monitored clients: request error: bad status code - 500"
call = 0
respHandler = func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
switch call {
case 0:
w.WriteHeader(http.StatusOK)
io.WriteString(w, `<?xml version="1.0"?>
<methodResponse>
<params>
<param>
<value>
a token
</value>
</param>
</params>
</methodResponse>`)
case 1:
w.WriteHeader(http.StatusInternalServerError)
io.WriteString(w, ``)
}
call++
}
)
tgs, err := testUpdateServices(respHandler)
require.EqualError(t, err, errTesting)
require.Equal(t, len(tgs), 0)
}
func TestUyuniSDSkipLogin(t *testing.T) {
var (
errTesting = "unable to get the managed system groups information of monitored clients: request error: bad status code - 500"
respHandler = func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, ``)
}
)
// Create a test server with mock HTTP handler.
ts := httptest.NewServer(http.HandlerFunc(respHandler))
defer ts.Close()
conf := SDConfig{
Server: ts.URL,
}
md, err := NewDiscovery(&conf, nil)
if err != nil {
t.Error(err)
}
// simulate a cached token
md.token = `a token`
md.tokenExpiration = time.Now().Add(time.Minute)
tgs, err := md.refresh(context.Background())
require.EqualError(t, err, errTesting)
require.Equal(t, len(tgs), 0)
}

View file

@ -159,7 +159,7 @@ func convertKumaUserLabels(labels map[string]string) model.LabelSet {
// kumaMadsV1ResourceParser is an xds.resourceParser.
func kumaMadsV1ResourceParser(resources []*anypb.Any, typeURL string) ([]model.LabelSet, error) {
if typeURL != KumaMadsV1ResourceTypeURL {
return nil, errors.Errorf("recieved invalid typeURL for Kuma MADS v1 Resource: %s", typeURL)
return nil, errors.Errorf("received invalid typeURL for Kuma MADS v1 Resource: %s", typeURL)
}
var targets []model.LabelSet

View file

@ -34,6 +34,7 @@ Generic placeholders are defined as follows:
* `<boolean>`: a boolean that can take the values `true` or `false`
* `<duration>`: a duration matching the regular expression `((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)`, e.g. `1d`, `1h30m`, `5m`, `10s`
* `<filename>`: a valid path in the current working directory
* `<float>`: a floating-point number
* `<host>`: a valid string consisting of a hostname or IP followed by an optional port number
* `<int>`: an integer value
* `<labelname>`: a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*`
@ -99,6 +100,10 @@ remote_read:
# Storage related settings that are runtime reloadable.
storage:
[ - <exemplars> ... ]
# Configures exporting traces.
tracing:
[ <tracing_config> ]
```
### `<scrape_config>`
@ -191,7 +196,7 @@ oauth2:
[ <oauth2> ]
# Configure whether scrape requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# Configures the scrape request's TLS settings.
tls_config:
@ -438,7 +443,7 @@ subscription_id: <string>
# instead be specified in the relabeling rule.
[ port: <int> | default = 80 ]
# Authentication information used to authenticate to the consul server.
# Authentication information used to authenticate to the Azure API.
# Note that `basic_auth`, `authorization` and `oauth2` options are
# mutually exclusive.
# `password` and `password_file` are mutually exclusive.
@ -468,7 +473,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -564,7 +569,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -639,7 +644,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -698,9 +703,7 @@ tls_config:
# Optional filters to limit the discovery process to a subset of available
# resources.
# The available filters are listed in the upstream documentation:
# Services: https://docs.docker.com/engine/api/v1.40/#operation/ServiceList
# Tasks: https://docs.docker.com/engine/api/v1.40/#operation/TaskList
# Nodes: https://docs.docker.com/engine/api/v1.40/#operation/NodeList
# https://docs.docker.com/engine/api/v1.40/#operation/ContainerList
[ filters:
[ - name: <string>
values: <string>, [...] ]
@ -736,7 +739,7 @@ oauth2:
[ <oauth2> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
```
@ -789,10 +792,10 @@ created using the `port` parameter defined in the SD configuration.
Available meta labels:
* `__meta_dockerswarm_container_label_<labelname>`: each label of the container
* `__meta_dockerswarm_task_id`: the id of the task
* `__meta_dockerswarm_task_container_id`: the container id of the task
* `__meta_dockerswarm_task_desired_state`: the desired state of the task
* `__meta_dockerswarm_task_label_<labelname>`: each label of the task
* `__meta_dockerswarm_task_slot`: the slot of the task
* `__meta_dockerswarm_task_state`: the state of the task
* `__meta_dockerswarm_task_port_publish_mode`: the publish mode of the task port
@ -863,7 +866,9 @@ role: <string>
# Optional filters to limit the discovery process to a subset of available
# resources.
# The available filters are listed in the upstream documentation:
# https://docs.docker.com/engine/api/v1.40/#operation/ContainerList
# Services: https://docs.docker.com/engine/api/v1.40/#operation/ServiceList
# Tasks: https://docs.docker.com/engine/api/v1.40/#operation/TaskList
# Nodes: https://docs.docker.com/engine/api/v1.40/#operation/NodeList
[ filters:
[ - name: <string>
values: <string>, [...] ]
@ -899,7 +904,7 @@ oauth2:
[ <oauth2> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
```
@ -1202,7 +1207,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
```
See [this example Prometheus configuration file](/documentation/examples/prometheus-puppetdb.yml)
@ -1406,7 +1411,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -1443,8 +1448,9 @@ Example response body:
]
```
The endpoint is queried periodically at the specified
refresh interval.
The endpoint is queried periodically at the specified refresh interval.
The `prometheus_sd_http_failures_total` counter metric tracks the number of
refresh failures.
Each target has a meta label `__meta_url` during the
[relabeling phase](#relabel_config). Its value is set to the
@ -1487,7 +1493,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -1649,7 +1655,7 @@ See below for the configuration options for Kubernetes discovery:
[ api_server: <host> ]
# The Kubernetes role of entities that should be discovered.
# One of endpoints, service, pod, node, or ingress.
# One of endpoints, endpointslice, service, pod, node, or ingress.
role: <string>
# Optional path to a kubeconfig file.
@ -1686,7 +1692,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -1694,15 +1700,16 @@ tls_config:
# Optional namespace discovery. If omitted, all namespaces are used.
namespaces:
own_namespace: <bool>
own_namespace: <boolean>
names:
[ - <string> ]
# Optional label and field selectors to limit the discovery process to a subset of available resources.
# See https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/
# and https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ to learn more about the possible
# filters that can be used. Endpoints role supports pod, service and endpoints selectors, other roles
# only support selectors matching the role itself (e.g. node role can only contain node selectors).
# filters that can be used. The endpoints role supports pod, service and endpoints selectors.
# The pod role supports node selectors when configured with `attach_metadata: {node: true}`.
# Other roles only support selectors matching the role itself (e.g. node role can only contain node selectors).
# Note: When making decision about using field/label selector make sure that this
# is the best approach - it will prevent Prometheus from reusing single list/watch
@ -1714,6 +1721,12 @@ namespaces:
[ - role: <string>
[ label: <string> ]
[ field: <string> ] ]]
# Optional metadata to attach to discovered targets. If omitted, no additional metadata is attached.
attach_metadata:
# Attaches node metadata to discovered targets. Only valid for role: pod.
# When set to true, Prometheus must have permissions to get Nodes.
[ node: <boolean> | default = false ]
```
See [this example Prometheus configuration file](/documentation/examples/prometheus-kubernetes.yml)
@ -1784,7 +1797,7 @@ oauth2:
[ <oauth2> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
```
The [relabeling phase](#relabel_config) is the preferred and more powerful way
@ -1900,7 +1913,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -1983,7 +1996,7 @@ oauth2:
[ <oauth2> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration for connecting to marathon servers
tls_config:
@ -2193,7 +2206,7 @@ tls_config:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# Refresh interval to re-read the app instance list.
[ refresh_interval: <duration> | default = 30s ]
@ -2297,7 +2310,7 @@ tags_filter:
[ refresh_interval: <duration> | default = 60s ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# Optional proxy URL.
[ proxy_url: <string> ]
@ -2370,7 +2383,7 @@ oauth2:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# TLS configuration.
tls_config:
@ -2552,7 +2565,7 @@ tls_config:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# List of Azure service discovery configurations.
azure_sd_configs:
@ -2655,6 +2668,38 @@ relabel_configs:
[ - <relabel_config> ... ]
```
### `<tracing_config>`
`tracing_config` configures exporting traces from Prometheus to a tracing backend via the OTLP protocol. Tracing is currently an **experimental** feature and could change in the future.
```yaml
# Client used to export the traces. Options are 'http' or 'grpc'.
[ client_type: <string> | default = grpc ]
# Endpoint to send the traces to. Should be provided in format <host>:<port>.
[ endpoint: <string> ]
# Sets the probability a given trace will be sampled. Must be a float from 0 through 1.
[ sampling_fraction: <float> | default = 0 ]
# If disabled, the client will use a secure connection.
[ insecure: <boolean> | default = false ]
# Key-value pairs to be used as headers associated with gRPC or HTTP requests.
headers:
[ <string>: <string> ... ]
# Compression key for supported compression types. Supported compression: gzip.
[ compression: <string> ]
# Maximum time the exporter will wait for each batch export.
[ timeout: <duration> | default = 10s ]
# TLS configuration.
tls_config:
[ <tls_config> ]
```
### `<remote_write>`
`write_relabel_configs` is relabeling applied to samples before sending them
@ -2739,7 +2784,7 @@ tls_config:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# Configures the queue used to write to remote storage.
queue_config:
@ -2840,7 +2885,10 @@ tls_config:
[ proxy_url: <string> ]
# Configure whether HTTP requests follow HTTP 3xx redirects.
[ follow_redirects: <bool> | default = true ]
[ follow_redirects: <boolean> | default = true ]
# Whether to use the external labels as selectors for the remote read endpoint.
[ filter_external_labels: <boolean> | default = true ]
```
There is a list of

View file

@ -84,6 +84,7 @@ versions.
| args | []interface{} | map[string]interface{} | This converts a list of objects to a map with keys arg0, arg1 etc. This is intended to allow multiple arguments to be passed to templates. |
| tmpl | string, []interface{} | nothing | Like the built-in `template`, but allows non-literals as the template name. Note that the result is assumed to be safe, and will not be auto-escaped. Only available in consoles. |
| safeHtml | string | string | Marks string as HTML not requiring auto-escaping. |
| pathPrefix | _none_ | string | The external URL [path](https://pkg.go.dev/net/url#URL) for use in console templates. |
## Template type differences

View file

@ -11,13 +11,6 @@ Their behaviour can change in future releases which will be communicated via the
You can enable them using the `--enable-feature` flag with a comma separated list of features.
They may be enabled by default in future versions.
## `@` Modifier in PromQL
`--enable-feature=promql-at-modifier`
The `@` modifier lets you specify the evaluation time for instant vector selectors,
range vector selectors, and subqueries. More details can be found [here](querying/basics.md#modifier).
## Expand environment variables in external labels
`--enable-feature=expand-external-labels`
@ -25,20 +18,7 @@ range vector selectors, and subqueries. More details can be found [here](queryin
Replace `${var}` or `$var` in the [`external_labels`](configuration/configuration.md#configuration-file)
values according to the values of the current environment variables. References
to undefined variables are replaced by the empty string.
## Negative offset in PromQL
This negative offset is disabled by default since it breaks the invariant
that PromQL does not look ahead of the evaluation time for samples.
`--enable-feature=promql-negative-offset`
In contrast to the positive offset modifier, the negative offset modifier lets
one shift a vector selector into the future. An example in which one may want
to use a negative offset is reviewing past data and making temporal comparisons
with more recent data.
More details can be found [here](querying/basics.md#offset-modifier).
The `$` character can be escaped by using `$$`.
## Remote Write Receiver
@ -46,6 +26,8 @@ More details can be found [here](querying/basics.md#offset-modifier).
The remote write receiver allows Prometheus to accept remote write requests from other Prometheus servers. More details can be found [here](storage.md#overview).
Activating the remote write receiver via a feature flag is deprecated. Use `--web.enable-remote-write-receiver` instead. This feature flag will be ignored in future versions of Prometheus.
## Exemplars storage
`--enable-feature=exemplar-storage`
@ -96,3 +78,13 @@ discovery, scrape and remote write.
This is useful when you do not need to query the Prometheus data locally, but
only from a central [remote endpoint](https://prometheus.io/docs/operating/integrations/#remote-endpoints-and-storage).
## Per-step stats
`--enable-feature=promql-per-step-stats`
When enabled, passing `stats=all` in a query request returns per-step
statistics. Currently this is limited to totalQueryableSamples.
When disabled in either the engine or the query, per-step statistics are not
computed at all.

View file

@ -81,7 +81,7 @@ navigating to its metrics endpoint:
Let us explore data that Prometheus has collected about itself. To
use Prometheus's built-in expression browser, navigate to
http://localhost:9090/graph and choose the "Console" view within the "Graph" tab.
http://localhost:9090/graph and choose the "Table" view within the "Graph" tab.
As you can gather from [localhost:9090/metrics](http://localhost:9090/metrics),
one metric that Prometheus exports about itself is named

View file

@ -40,7 +40,8 @@ an empty list `[]`. Target lists are unordered.
Prometheus caches target lists. If an error occurs while fetching an updated
targets list, Prometheus keeps using the current targets list. The targets list
is not saved across restart.
is not saved across restart. The `prometheus_sd_http_failures_total` counter
metric tracks the number of refresh failures.
The whole list of targets must be returned on every scrape. There is no support
for incremental updates. A Prometheus instance does not send its hostname and it

View file

@ -620,6 +620,7 @@ $ curl http://localhost:9090/api/v1/rules
],
"file": "/rules.yaml",
"interval": 60,
"limit": 0,
"name": "example"
}
]
@ -1145,3 +1146,17 @@ $ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/clean_tombstones
```
*New in v2.1 and supports PUT from v2.9*
## Remote Write Receiver
Prometheus can be configured as a receiver for the Prometheus remote write
protocol. This is not considered an efficient way of ingesting samples. Use it
with caution for specific low-volume use cases. It is not suitable for
replacing the ingestion via scraping and turning Prometheus into a push-based
metrics collection system.
Enable the remote write receiver by setting
`--web.enable-remote-write-receiver`. When enabled, the remote write receiver
endpoint is `/api/v1/write`. Find more details [here](../storage.md#overview).
*New in v2.33*

View file

@ -104,14 +104,15 @@ against regular expressions. The following label matching operators exist:
* `=~`: Select labels that regex-match the provided string.
* `!~`: Select labels that do not regex-match the provided string.
Regex matches are fully anchored. A match of `env=~"foo"` is treated as `env=~"^foo$"`.
For example, this selects all `http_requests_total` time series for `staging`,
`testing`, and `development` environments and HTTP methods other than `GET`.
http_requests_total{environment=~"staging|testing|development",method!="GET"}
Label matchers that match empty label values also select all time series that
do not have the specific label set at all. Regex-matches are fully anchored. It
is possible to have multiple matchers for the same label name.
do not have the specific label set at all. It is possible to have multiple matchers for the same label name.
Vector selectors must either specify a name or at least one label matcher
that does not match the empty string. The following expression is illegal:
@ -209,9 +210,7 @@ can be specified:
rate(http_requests_total[5m] offset -1w)
This feature is enabled by setting `--enable-feature=promql-negative-offset`
flag. See [feature flags](../feature_flags.md) for more details about
this flag.
Note that this allows a query to look ahead of its evaluation time.
### @ modifier
@ -249,10 +248,6 @@ These 2 queries will produce the same result.
# offset before @
http_requests_total offset 5m @ 1609746000
This modifier is disabled by default since it breaks the invariant that PromQL
does not look ahead of the evaluation time for samples. It can be enabled by setting
`--enable-feature=promql-at-modifier` flag. See [feature flags](../feature_flags.md) for more details about this flag.
Additionally, `start()` and `end()` can also be used as values for the `@` modifier as special values.
For a range query, they resolve to the start and end of the range query respectively and remain the same for all steps.
@ -262,6 +257,8 @@ For an instant query, `start()` and `end()` both resolve to the evaluation time.
http_requests_total @ start()
rate(http_requests_total[5m] @ end())
Note that the `@` modifier allows a query to look ahead of its evaluation time.
## Subquery
Subquery allows you to run an instant query for a given range and resolution. The result of a subquery is a range vector.

View file

@ -192,7 +192,7 @@ bucket. Otherwise, the upper bound of the lowest bucket is returned
for quantiles located in the lowest bucket.
If `b` has 0 observations, `NaN` is returned. If `b` contains fewer than two buckets,
`NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned.
`NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned. For φ = `NaN`, `NaN` is returned.
## `holt_winters()`

View file

@ -241,7 +241,7 @@ vector. `by` and `without` are only used to bucket the input vector.
`quantile` calculates the φ-quantile, the value that ranks at number φ*N among
the N metric values of the dimensions aggregated over. φ is provided as the
aggregation parameter. For example, `quantile(0.5, ...)` calculates the median,
`quantile(0.95, ...)` the 95th percentile.
`quantile(0.95, ...)` the 95th percentile. For φ = `NaN`, `NaN` is returned. For φ < 0, `-Inf` is returned. For φ > 1, `+Inf` is returned.
Example:

View file

@ -82,7 +82,7 @@ Prometheus has several flags that configure local storage. The most important ar
* `--storage.tsdb.path`: Where Prometheus writes its database. Defaults to `data/`.
* `--storage.tsdb.retention.time`: When to remove old data. Defaults to `15d`. Overrides `storage.tsdb.retention` if this flag is set to anything other than default.
* `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Only the persistent blocks are deleted to honor this retention although WAL and m-mapped chunks are counted in the total size. So the minimum requirement for the disk is the peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` (m-mapped Head chunks) directory combined (peaks every 2 hours).
* `--storage.tsdb.retention.size`: The maximum number of bytes of storage blocks to retain. The oldest data will be removed first. Defaults to `0` or disabled. Units supported: B, KB, MB, GB, TB, PB, EB. Ex: "512MB". Based on powers-of-2, so 1KB is 1024B. Only the persistent blocks are deleted to honor this retention although WAL and m-mapped chunks are counted in the total size. So the minimum requirement for the disk is the peak space taken by the `wal` (the WAL and Checkpoint) and `chunks_head` (m-mapped Head chunks) directory combined (peaks every 2 hours).
* `--storage.tsdb.retention`: Deprecated in favor of `storage.tsdb.retention.time`.
* `--storage.tsdb.wal-compression`: Enables compression of the write-ahead log (WAL). Depending on your data, you can expect the WAL size to be halved with little extra cpu load. This flag was introduced in 2.11.0 and enabled by default in 2.20.0. Note that once enabled, downgrading Prometheus to a version below 2.11.0 will require deleting the WAL.
@ -129,7 +129,7 @@ The read and write protocols both use a snappy-compressed protocol buffer encodi
For details on configuring remote storage integrations in Prometheus, see the [remote write](configuration/configuration.md#remote_write) and [remote read](configuration/configuration.md#remote_read) sections of the Prometheus configuration documentation.
The built-in remote write receiver can be enabled by setting the `--enable-feature=remote-write-receiver` command line flag. When enabled, the remote write receiver endpoint is `/api/v1/write`.
The built-in remote write receiver can be enabled by setting the `--web.enable-remote-write-receiver` command line flag. When enabled, the remote write receiver endpoint is `/api/v1/write`.
For details on the request and response messages, see the [remote storage protocol buffer definitions](https://github.com/prometheus/prometheus/blob/main/prompb/remote.proto).

View file

@ -84,7 +84,6 @@ func generateTargetGroups(allTargetGroups map[string][]*targetgroup.Group) map[s
}
sdGroup := customSD{
Targets: newTargets,
Labels: newLabels,
}

View file

@ -0,0 +1,21 @@
# Copyright 2022 The Prometheus Authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include ../../../Makefile.common
build:
@echo ">> building example_write_adapter"
@$(GO) build -o example_write_adapter/example_write_adapter ./example_write_adapter
@echo ">> building remote_storage_adapter"
@$(GO) build -o remote_storage_adapter/remote_storage_adapter ./remote_storage_adapter

View file

@ -0,0 +1,53 @@
module github.com/prometheus/prometheus/documentation/examples/remote_storage
go 1.17
require (
github.com/go-kit/log v0.2.0
github.com/gogo/protobuf v1.3.2
github.com/golang/snappy v0.0.4
github.com/influxdata/influxdb v1.9.5
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.12.1
github.com/prometheus/common v0.32.1
github.com/prometheus/prometheus v1.8.2-0.20220202104425-d819219dd438
github.com/stretchr/testify v1.7.0
gopkg.in/alecthomas/kingpin.v2 v2.2.6
)
require (
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137 // indirect
github.com/aws/aws-sdk-go v1.42.31 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dennwc/varint v1.0.0 // indirect
github.com/felixge/httpsnoop v1.0.2 // indirect
github.com/go-logfmt/logfmt v0.5.1 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common/sigv4 v0.1.0 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.26.1 // indirect
go.opentelemetry.io/otel v1.2.0 // indirect
go.opentelemetry.io/otel/internal/metric v0.24.0 // indirect
go.opentelemetry.io/otel/metric v0.24.0 // indirect
go.opentelemetry.io/otel/trace v1.2.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/goleak v1.1.12 // indirect
golang.org/x/net v0.0.0-20220105145211-5b0dc2dfae98 // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.27.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
)

File diff suppressed because it is too large Load diff

78
go.mod
View file

@ -1,81 +1,89 @@
module github.com/prometheus/prometheus
go 1.14
go 1.16
require (
github.com/Azure/azure-sdk-for-go v60.1.0+incompatible
github.com/Azure/go-autorest/autorest v0.11.23
github.com/Azure/go-autorest/autorest/adal v0.9.17
github.com/Azure/azure-sdk-for-go v62.0.0+incompatible
github.com/Azure/go-autorest/autorest v0.11.24
github.com/Azure/go-autorest/autorest/adal v0.9.18
github.com/Azure/go-autorest/autorest/to v0.4.0 // indirect
github.com/Azure/go-autorest/autorest/validation v0.3.1 // indirect
github.com/alecthomas/units v0.0.0-20210927113745-59d0afb8317a
github.com/aws/aws-sdk-go v1.42.23
github.com/alecthomas/units v0.0.0-20211218093645-b94a6e3cc137
github.com/armon/go-metrics v0.3.3 // indirect
github.com/aws/aws-sdk-go v1.43.10
github.com/cespare/xxhash/v2 v2.1.2
github.com/containerd/containerd v1.5.7 // indirect
github.com/containerd/containerd v1.6.1 // indirect
github.com/dennwc/varint v1.0.0
github.com/dgryski/go-sip13 v0.0.0-20200911182023-62edffca9245
github.com/digitalocean/godo v1.73.0
github.com/digitalocean/godo v1.75.0
github.com/docker/docker v20.10.12+incompatible
github.com/docker/go-connections v0.4.0 // indirect
github.com/edsrzf/mmap-go v1.0.0
github.com/edsrzf/mmap-go v1.1.0
github.com/envoyproxy/go-control-plane v0.10.1
github.com/envoyproxy/protoc-gen-validate v0.6.2
github.com/envoyproxy/protoc-gen-validate v0.6.6
github.com/fsnotify/fsnotify v1.5.1
github.com/go-kit/log v0.2.0
github.com/go-logfmt/logfmt v0.5.1
github.com/go-openapi/strfmt v0.21.1
github.com/go-openapi/strfmt v0.21.2
github.com/go-zookeeper/zk v1.0.2
github.com/gogo/protobuf v1.3.2
github.com/golang/snappy v0.0.4
github.com/google/pprof v0.0.0-20211122183932-1daafda22083
github.com/google/pprof v0.0.0-20220218203455-0368bd9e19a7
github.com/gophercloud/gophercloud v0.24.0
github.com/grafana/regexp v0.0.0-20220304095617-2e8d9baf4ac2
github.com/grpc-ecosystem/grpc-gateway v1.16.0
github.com/hashicorp/consul/api v1.11.0
github.com/hashicorp/consul/api v1.12.0
github.com/hashicorp/go-hclog v0.12.2 // indirect
github.com/hashicorp/go-immutable-radix v1.2.0 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/hetznercloud/hcloud-go v1.33.1
github.com/influxdata/influxdb v1.9.5
github.com/json-iterator/go v1.1.12
github.com/kolo/xmlrpc v0.0.0-20201022064351-38db28db192b
github.com/linode/linodego v1.2.1
github.com/miekg/dns v1.1.43
github.com/linode/linodego v1.3.0
github.com/mattn/go-colorable v0.1.8 // indirect
github.com/miekg/dns v1.1.46
github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f
github.com/oklog/run v1.1.0
github.com/oklog/ulid v1.3.1
github.com/opentracing-contrib/go-stdlib v1.0.0
github.com/opentracing/opentracing-go v1.2.0
github.com/opencontainers/image-spec v1.0.2 // indirect
github.com/pkg/errors v0.9.1
github.com/prometheus/alertmanager v0.23.0
github.com/prometheus/client_golang v1.11.0
github.com/prometheus/client_golang v1.12.1
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.32.1
github.com/prometheus/common/assets v0.1.0
github.com/prometheus/common/sigv4 v0.1.0
github.com/prometheus/exporter-toolkit v0.7.1
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.7.0.20210223165440-c65ae3540d44
github.com/scaleway/scaleway-sdk-go v1.0.0-beta.9
github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749
github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546
github.com/stretchr/testify v1.7.0
github.com/uber/jaeger-client-go v2.29.1+incompatible
github.com/uber/jaeger-lib v2.4.1+incompatible
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.29.0
go.opentelemetry.io/otel v1.4.1
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.4.1
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.4.1
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.4.1
go.opentelemetry.io/otel/sdk v1.4.1
go.opentelemetry.io/otel/trace v1.4.1
go.uber.org/atomic v1.9.0
go.uber.org/goleak v1.1.12
golang.org/x/net v0.0.0-20211209124913-491a49abca63
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/sys v0.0.0-20211210111614-af8b64212486
golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11
golang.org/x/tools v0.1.8
google.golang.org/api v0.63.0
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa
golang.org/x/sys v0.0.0-20220222172238-00053529121e
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8
golang.org/x/tools v0.1.9
google.golang.org/api v0.70.0
google.golang.org/genproto v0.0.0-20220222154240-daf995802d7b
google.golang.org/grpc v1.44.0
google.golang.org/protobuf v1.27.1
gopkg.in/alecthomas/kingpin.v2 v2.2.6
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
k8s.io/api v0.23.0
k8s.io/apimachinery v0.23.0
k8s.io/client-go v0.23.0
k8s.io/api v0.22.7
k8s.io/apimachinery v0.22.7
k8s.io/client-go v0.22.7
k8s.io/klog v1.0.0
k8s.io/klog/v2 v2.30.0
k8s.io/klog/v2 v2.40.1
)
replace (

649
go.sum

File diff suppressed because it is too large Load diff

View file

@ -307,7 +307,7 @@ func Equal(ls, o Labels) bool {
return false
}
for i, l := range ls {
if l.Name != o[i].Name || l.Value != o[i].Value {
if l != o[i] {
return false
}
}

View file

@ -548,7 +548,7 @@ func TestLabels_Get(t *testing.T) {
// BenchmarkLabels_Get was written to check whether a binary search can improve the performance vs the linear search implementation
// The results have shown that binary search would only be better when searching last labels in scenarios with more than 10 labels.
// In the following list, `old` is the linear search while `new` is the binary search implementaiton (without calling sort.Search, which performs even worse here)
// In the following list, `old` is the linear search while `new` is the binary search implementation (without calling sort.Search, which performs even worse here)
// name old time/op new time/op delta
// Labels_Get/with_5_labels/get_first_label 5.12ns ± 0% 14.24ns ± 0% ~ (p=1.000 n=1+1)
// Labels_Get/with_5_labels/get_middle_label 13.5ns ± 0% 18.5ns ± 0% ~ (p=1.000 n=1+1)
@ -586,6 +586,36 @@ func BenchmarkLabels_Get(b *testing.B) {
}
}
func BenchmarkLabels_Equals(b *testing.B) {
for _, scenario := range []struct {
desc string
base, other Labels
}{
{
"equal",
Labels{{"a_label_name", "a_label_value"}, {"another_label_name", "another_label_value"}},
Labels{{"a_label_name", "a_label_value"}, {"another_label_name", "another_label_value"}},
},
{
"not equal",
Labels{{"a_label_name", "a_label_value"}, {"another_label_name", "another_label_value"}},
Labels{{"a_label_name", "a_label_value"}, {"another_label_name", "a_different_label_value"}},
},
{
"different sizes",
Labels{{"a_label_name", "a_label_value"}, {"another_label_name", "another_label_value"}},
Labels{{"a_label_name", "a_label_value"}},
},
} {
b.Run(scenario.desc, func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = Equal(scenario.base, scenario.other)
}
})
}
}
func TestLabels_Copy(t *testing.T) {
require.Equal(t, Labels{{"aaa", "111"}, {"bbb", "222"}}, Labels{{"aaa", "111"}, {"bbb", "222"}}.Copy())
}

View file

@ -14,9 +14,10 @@
package labels
import (
"regexp"
"regexp/syntax"
"strings"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
)
type FastRegexMatcher struct {

View file

@ -14,9 +14,10 @@
package labels
import (
"regexp/syntax"
"strings"
"testing"
"github.com/grafana/regexp/syntax"
"github.com/stretchr/testify/require"
)
@ -96,3 +97,42 @@ func TestOptimizeConcatRegex(t *testing.T) {
require.Equal(t, c.contains, contains)
}
}
func BenchmarkFastRegexMatcher(b *testing.B) {
var (
x = strings.Repeat("x", 50)
y = "foo" + x
z = x + "foo"
)
regexes := []string{
"foo",
"^foo",
"(foo|bar)",
"foo.*",
".*foo",
"^.*foo$",
"^.+foo$",
".*",
".+",
"foo.+",
".+foo",
".*foo.*",
"(?i:foo)",
"(prometheus|api_prom)_api_v1_.+",
"((fo(bar))|.+foo)",
}
for _, r := range regexes {
r := r
b.Run(r, func(b *testing.B) {
m, err := NewFastRegexMatcher(r)
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = m.MatchString(x)
_ = m.MatchString(y)
_ = m.MatchString(z)
}
})
}
}

View file

@ -16,9 +16,9 @@ package relabel
import (
"crypto/md5"
"fmt"
"regexp"
"strings"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/common/model"

View file

@ -66,18 +66,25 @@ type Parser interface {
}
// New returns a new parser of the byte slice.
func New(b []byte, contentType string) Parser {
//
// This function always returns a valid parser, but might additionally
// return an error if the content type cannot be parsed.
func New(b []byte, contentType string) (Parser, error) {
if contentType == "" {
return NewPromParser(b), nil
}
mediaType, _, err := mime.ParseMediaType(contentType)
if err != nil {
return NewPromParser(b)
return NewPromParser(b), err
}
switch mediaType {
case "application/openmetrics-text":
return NewOpenMetricsParser(b)
return NewOpenMetricsParser(b), nil
case "application/vnd.google.protobuf":
return NewProtobufParser(b)
return NewProtobufParser(b), nil
default:
return NewPromParser(b)
return NewPromParser(b), nil
}
}

View file

@ -0,0 +1,104 @@
// Copyright 2022 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package textparse
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestNewParser(t *testing.T) {
t.Parallel()
requirePromParser := func(t *testing.T, p Parser) {
require.NotNil(t, p)
_, ok := p.(*PromParser)
require.True(t, ok)
}
requireOpenMetricsParser := func(t *testing.T, p Parser) {
require.NotNil(t, p)
_, ok := p.(*OpenMetricsParser)
require.True(t, ok)
}
for name, tt := range map[string]*struct {
contentType string
validateParser func(*testing.T, Parser)
err string
}{
"empty-string": {
validateParser: requirePromParser,
},
"invalid-content-type-1": {
contentType: "invalid/",
validateParser: requirePromParser,
err: "expected token after slash",
},
"invalid-content-type-2": {
contentType: "invalid/invalid/invalid",
validateParser: requirePromParser,
err: "unexpected content after media subtype",
},
"invalid-content-type-3": {
contentType: "/",
validateParser: requirePromParser,
err: "no media type",
},
"invalid-content-type-4": {
contentType: "application/openmetrics-text; charset=UTF-8; charset=utf-8",
validateParser: requirePromParser,
err: "duplicate parameter name",
},
"openmetrics": {
contentType: "application/openmetrics-text",
validateParser: requireOpenMetricsParser,
},
"openmetrics-with-charset": {
contentType: "application/openmetrics-text; charset=utf-8",
validateParser: requireOpenMetricsParser,
},
"openmetrics-with-charset-and-version": {
contentType: "application/openmetrics-text; version=1.0.0; charset=utf-8",
validateParser: requireOpenMetricsParser,
},
"plain-text": {
contentType: "text/plain",
validateParser: requirePromParser,
},
"plain-text-with-version": {
contentType: "text/plain; version=0.0.4",
validateParser: requirePromParser,
},
"some-other-valid-content-type": {
contentType: "text/html",
validateParser: requirePromParser,
},
} {
t.Run(name, func(t *testing.T) {
tt := tt // Copy to local variable before going parallel.
t.Parallel()
p, err := New([]byte{}, tt.contentType)
tt.validateParser(t, p)
if tt.err == "" {
require.NoError(t, err)
} else {
require.Error(t, err)
require.Contains(t, err.Error(), tt.err)
}
})
}
}

View file

@ -248,13 +248,13 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
case tEOF:
return EntryInvalid, errors.New("data does not end with # EOF")
case tHelp, tType, tUnit:
switch t := p.nextToken(); t {
switch t2 := p.nextToken(); t2 {
case tMName:
p.offsets = append(p.offsets, p.l.start, p.l.i)
default:
return EntryInvalid, parseError("expected metric name after HELP", t)
return EntryInvalid, parseError("expected metric name after "+t.String(), t2)
}
switch t := p.nextToken(); t {
switch t2 := p.nextToken(); t2 {
case tText:
if len(p.l.buf()) > 1 {
p.text = p.l.buf()[1 : len(p.l.buf())-1]
@ -262,7 +262,7 @@ func (p *OpenMetricsParser) Next() (Entry, error) {
p.text = []byte{}
}
default:
return EntryInvalid, parseError("expected text in HELP", t)
return EntryInvalid, fmt.Errorf("expected text in %s", t.String())
}
switch t {
case tType:

View file

@ -334,6 +334,14 @@ func TestOpenMetricsParseErrors(t *testing.T) {
input: "# TYPE c counter\n#EOF\n",
err: "\"INVALID\" \" \" is not a valid start token",
},
{
input: "# TYPE \n#EOF\n",
err: "expected metric name after TYPE, got \"INVALID\"",
},
{
input: "# TYPE m\n#EOF\n",
err: "expected text in TYPE",
},
{
input: "# UNIT metric suffix\n#EOF\n",
err: "unit not a suffix of metric \"metric\"",
@ -346,9 +354,21 @@ func TestOpenMetricsParseErrors(t *testing.T) {
input: "# UNIT m suffix\n#EOF\n",
err: "unit not a suffix of metric \"m\"",
},
{
input: "# UNIT \n#EOF\n",
err: "expected metric name after UNIT, got \"INVALID\"",
},
{
input: "# UNIT m\n#EOF\n",
err: "expected text in UNIT",
},
{
input: "# HELP \n#EOF\n",
err: "expected metric name after HELP, got \"INVALID\"",
},
{
input: "# HELP m\n#EOF\n",
err: "expected text in HELP, got \"INVALID\"",
err: "expected text in HELP",
},
{
input: "a\t1\n#EOF\n",

View file

@ -278,13 +278,13 @@ func (p *PromParser) Next() (Entry, error) {
return p.Next()
case tHelp, tType:
switch t := p.nextToken(); t {
switch t2 := p.nextToken(); t2 {
case tMName:
p.offsets = append(p.offsets, p.l.start, p.l.i)
default:
return EntryInvalid, parseError("expected metric name after HELP", t)
return EntryInvalid, parseError("expected metric name after "+t.String(), t2)
}
switch t := p.nextToken(); t {
switch t2 := p.nextToken(); t2 {
case tText:
if len(p.l.buf()) > 1 {
p.text = p.l.buf()[1:]
@ -292,7 +292,7 @@ func (p *PromParser) Next() (Entry, error) {
p.text = []byte{}
}
default:
return EntryInvalid, parseError("expected text in HELP", t)
return EntryInvalid, fmt.Errorf("expected text in %s", t.String())
}
switch t {
case tType:

View file

@ -270,6 +270,14 @@ func TestPromParseErrors(t *testing.T) {
input: `{a="ok"} 1`,
err: `"INVALID" is not a valid start token`,
},
{
input: "# TYPE #\n#EOF\n",
err: "expected metric name after TYPE, got \"INVALID\"",
},
{
input: "# HELP #\n#EOF\n",
err: "expected metric name after HELP, got \"INVALID\"",
},
}
for i, c := range cases {

10
prompb/buf.lock Normal file
View file

@ -0,0 +1,10 @@
# Generated by buf. DO NOT EDIT.
version: v1
deps:
- remote: buf.build
owner: gogo
repository: protobuf
branch: main
commit: 4df00b267f944190a229ce3695781e99
digest: b1-sjLgsg7CzrkOrIjBDh3s-l0aMjE6oqTj85-OsoopKAw=
create_time: 2021-08-10T00:14:28.345069Z

18
prompb/buf.yaml Normal file
View file

@ -0,0 +1,18 @@
version: v1
name: buf.build/prometheus/prometheus
lint:
ignore_only:
ENUM_VALUE_PREFIX:
- remote.proto
- types.proto
ENUM_ZERO_VALUE_SUFFIX:
- remote.proto
- types.proto
PACKAGE_DIRECTORY_MATCH:
- remote.proto
- types.proto
PACKAGE_VERSION_SUFFIX:
- remote.proto
- types.proto
deps:
- buf.build/gogo/protobuf

View file

@ -217,7 +217,7 @@ func BenchmarkRangeQuery(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
qry, err := engine.NewRangeQuery(
stor, c.expr,
stor, nil, c.expr,
time.Unix(int64((numIntervals-c.steps)*10), 0),
time.Unix(int64(numIntervals*10), 0), time.Second*10)
if err != nil {

View file

@ -20,7 +20,6 @@ import (
"fmt"
"math"
"reflect"
"regexp"
"runtime"
"sort"
"strconv"
@ -29,11 +28,13 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/opentracing/opentracing-go"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/uber/jaeger-client-go"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
@ -119,13 +120,18 @@ type Query interface {
// Statement returns the parsed statement of the query.
Statement() parser.Statement
// Stats returns statistics about the lifetime of the query.
Stats() *stats.QueryTimers
Stats() *stats.Statistics
// Cancel signals that a running query execution should be aborted.
Cancel()
// String returns the original query string.
String() string
}
type QueryOpts struct {
// Enables recording per-step statistics if the engine has it enabled as well. Disabled by default.
EnablePerStepStats bool
}
// query implements the Query interface.
type query struct {
// Underlying data provider.
@ -136,6 +142,8 @@ type query struct {
stmt parser.Statement
// Timer stats for the query execution.
stats *stats.QueryTimers
// Sample stats for the query execution.
sampleStats *stats.QuerySamples
// Result matrix for reuse.
matrix Matrix
// Cancellation function for the query.
@ -160,8 +168,11 @@ func (q *query) String() string {
}
// Stats implements the Query interface.
func (q *query) Stats() *stats.QueryTimers {
return q.stats
func (q *query) Stats() *stats.Statistics {
return &stats.Statistics{
Timers: q.stats,
Samples: q.sampleStats,
}
}
// Cancel implements the Query interface.
@ -180,8 +191,8 @@ func (q *query) Close() {
// Exec implements the Query interface.
func (q *query) Exec(ctx context.Context) *Result {
if span := opentracing.SpanFromContext(ctx); span != nil {
span.SetTag(queryTag, q.stmt.String())
if span := trace.SpanFromContext(ctx); span != nil {
span.SetAttributes(attribute.String(queryTag, q.stmt.String()))
}
// Exec query.
@ -208,13 +219,32 @@ func contextErr(err error, env string) error {
}
}
// QueryTracker provides access to two features:
//
// 1) Tracking of active query. If PromQL engine crashes while executing any query, such query should be present
// in the tracker on restart, hence logged. After the logging on restart, the tracker gets emptied.
//
// 2) Enforcement of the maximum number of concurrent queries.
type QueryTracker interface {
// GetMaxConcurrent returns maximum number of concurrent queries that are allowed by this tracker.
GetMaxConcurrent() int
// Insert inserts query into query tracker. This call must block if maximum number of queries is already running.
// If Insert doesn't return error then returned integer value should be used in subsequent Delete call.
// Insert should return error if context is finished before query can proceed, and integer value returned in this case should be ignored by caller.
Insert(ctx context.Context, query string) (int, error)
// Delete removes query from activity tracker. InsertIndex is value returned by Insert call.
Delete(insertIndex int)
}
// EngineOpts contains configuration options used when creating a new Engine.
type EngineOpts struct {
Logger log.Logger
Reg prometheus.Registerer
MaxSamples int
Timeout time.Duration
ActiveQueryTracker *ActiveQueryTracker
ActiveQueryTracker QueryTracker
// LookbackDelta determines the time since the last sample after which a time
// series is considered stale.
LookbackDelta time.Duration
@ -223,11 +253,21 @@ type EngineOpts struct {
// a subquery in milliseconds if no step in range vector was specified `[30m:<step>]`.
NoStepSubqueryIntervalFn func(rangeMillis int64) int64
// EnableAtModifier if true enables @ modifier. Disabled otherwise.
// EnableAtModifier if true enables @ modifier. Disabled otherwise. This
// is supposed to be enabled for regular PromQL (as of Prometheus v2.33)
// but the option to disable it is still provided here for those using
// the Engine outside of Prometheus.
EnableAtModifier bool
// EnableNegativeOffset if true enables negative (-) offset values. Disabled otherwise.
// EnableNegativeOffset if true enables negative (-) offset
// values. Disabled otherwise. This is supposed to be enabled for
// regular PromQL (as of Prometheus v2.33) but the option to disable it
// is still provided here for those using the Engine outside of
// Prometheus.
EnableNegativeOffset bool
// EnablePerStepStats if true allows for per-step stats to be computed on request. Disabled otherwise.
EnablePerStepStats bool
}
// Engine handles the lifetime of queries from beginning to end.
@ -237,13 +277,14 @@ type Engine struct {
metrics *engineMetrics
timeout time.Duration
maxSamplesPerQuery int
activeQueryTracker *ActiveQueryTracker
activeQueryTracker QueryTracker
queryLogger QueryLogger
queryLoggerLock sync.RWMutex
lookbackDelta time.Duration
noStepSubqueryIntervalFn func(rangeMillis int64) int64
enableAtModifier bool
enableNegativeOffset bool
enablePerStepStats bool
}
// NewEngine returns a new engine.
@ -326,6 +367,7 @@ func NewEngine(opts EngineOpts) *Engine {
noStepSubqueryIntervalFn: opts.NoStepSubqueryIntervalFn,
enableAtModifier: opts.EnableAtModifier,
enableNegativeOffset: opts.EnableNegativeOffset,
enablePerStepStats: opts.EnablePerStepStats,
}
}
@ -353,12 +395,12 @@ func (ng *Engine) SetQueryLogger(l QueryLogger) {
}
// NewInstantQuery returns an evaluation query for the given expression at the given time.
func (ng *Engine) NewInstantQuery(q storage.Queryable, qs string, ts time.Time) (Query, error) {
func (ng *Engine) NewInstantQuery(q storage.Queryable, opts *QueryOpts, qs string, ts time.Time) (Query, error) {
expr, err := parser.ParseExpr(qs)
if err != nil {
return nil, err
}
qry, err := ng.newQuery(q, expr, ts, ts, 0)
qry, err := ng.newQuery(q, opts, expr, ts, ts, 0)
if err != nil {
return nil, err
}
@ -369,7 +411,7 @@ func (ng *Engine) NewInstantQuery(q storage.Queryable, qs string, ts time.Time)
// NewRangeQuery returns an evaluation query for the given time range and with
// the resolution set by the interval.
func (ng *Engine) NewRangeQuery(q storage.Queryable, qs string, start, end time.Time, interval time.Duration) (Query, error) {
func (ng *Engine) NewRangeQuery(q storage.Queryable, opts *QueryOpts, qs string, start, end time.Time, interval time.Duration) (Query, error) {
expr, err := parser.ParseExpr(qs)
if err != nil {
return nil, err
@ -377,7 +419,7 @@ func (ng *Engine) NewRangeQuery(q storage.Queryable, qs string, start, end time.
if expr.Type() != parser.ValueTypeVector && expr.Type() != parser.ValueTypeScalar {
return nil, errors.Errorf("invalid expression type %q for range query, must be Scalar or instant Vector", parser.DocumentedType(expr.Type()))
}
qry, err := ng.newQuery(q, expr, start, end, interval)
qry, err := ng.newQuery(q, opts, expr, start, end, interval)
if err != nil {
return nil, err
}
@ -386,11 +428,16 @@ func (ng *Engine) NewRangeQuery(q storage.Queryable, qs string, start, end time.
return qry, nil
}
func (ng *Engine) newQuery(q storage.Queryable, expr parser.Expr, start, end time.Time, interval time.Duration) (*query, error) {
func (ng *Engine) newQuery(q storage.Queryable, opts *QueryOpts, expr parser.Expr, start, end time.Time, interval time.Duration) (*query, error) {
if err := ng.validateOpts(expr); err != nil {
return nil, err
}
// Default to empty QueryOpts if not provided.
if opts == nil {
opts = &QueryOpts{}
}
es := &parser.EvalStmt{
Expr: PreprocessExpr(expr, start, end),
Start: start,
@ -401,6 +448,7 @@ func (ng *Engine) newQuery(q storage.Queryable, expr parser.Expr, start, end tim
stmt: es,
ng: ng,
stats: stats.NewQueryTimers(),
sampleStats: stats.NewQuerySamples(ng.enablePerStepStats && opts.EnablePerStepStats),
queryable: q,
}
return qry, nil
@ -468,6 +516,7 @@ func (ng *Engine) newTestQuery(f func(context.Context) error) Query {
stmt: parser.TestStmt(f),
ng: ng,
stats: stats.NewQueryTimers(),
sampleStats: stats.NewQuerySamples(ng.enablePerStepStats),
}
return qry
}
@ -499,10 +548,8 @@ func (ng *Engine) exec(ctx context.Context, q *query) (v parser.Value, ws storag
f = append(f, "error", err)
}
f = append(f, "stats", stats.NewQueryStats(q.Stats()))
if span := opentracing.SpanFromContext(ctx); span != nil {
if spanCtx, ok := span.Context().(jaeger.SpanContext); ok {
f = append(f, "spanID", spanCtx.SpanID())
}
if span := trace.SpanFromContext(ctx); span != nil {
f = append(f, "spanID", span.SpanContext().SpanID())
}
if origin := ctx.Value(QueryOrigin{}); origin != nil {
for k, v := range origin.(map[string]interface{}) {
@ -591,8 +638,10 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval
maxSamples: ng.maxSamplesPerQuery,
logger: ng.logger,
lookbackDelta: ng.lookbackDelta,
samplesStats: query.sampleStats,
noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn,
}
query.sampleStats.InitStepTracking(start, start, 1)
val, warnings, err := evaluator.Eval(s.Expr)
if err != nil {
@ -641,8 +690,10 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval
maxSamples: ng.maxSamplesPerQuery,
logger: ng.logger,
lookbackDelta: ng.lookbackDelta,
samplesStats: query.sampleStats,
noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn,
}
query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval)
val, warnings, err := evaluator.Eval(s.Expr)
if err != nil {
return nil, warnings, err
@ -872,6 +923,7 @@ type evaluator struct {
currentSamples int
logger log.Logger
lookbackDelta time.Duration
samplesStats *stats.QuerySamples
noStepSubqueryIntervalFn func(rangeMillis int64) int64
}
@ -1068,6 +1120,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
}
}
args[i] = vectors[i]
ev.samplesStats.UpdatePeak(ev.currentSamples)
}
// Make the function call.
@ -1083,10 +1136,12 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
// When we reset currentSamples to tempNumSamples during the next iteration of the loop it also
// needs to include the samples from the result here, as they're still in memory.
tempNumSamples += len(result)
ev.samplesStats.UpdatePeak(ev.currentSamples)
if ev.currentSamples > ev.maxSamples {
ev.error(ErrTooManySamples(env))
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
// If this could be an instant query, shortcut so as not to change sort order.
if ev.endTimestamp == ev.startTimestamp {
@ -1096,6 +1151,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
mat[i] = Series{Metric: s.Metric, Points: []Point{s.Point}}
}
ev.currentSamples = originalNumSamples + mat.TotalSamples()
ev.samplesStats.UpdatePeak(ev.currentSamples)
return mat, warnings
}
@ -1128,13 +1184,20 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
mat = append(mat, ss)
}
ev.currentSamples = originalNumSamples + mat.TotalSamples()
ev.samplesStats.UpdatePeak(ev.currentSamples)
return mat, warnings
}
// evalSubquery evaluates given SubqueryExpr and returns an equivalent
// evaluated MatrixSelector in its place. Note that the Name and LabelMatchers are not set.
func (ev *evaluator) evalSubquery(subq *parser.SubqueryExpr) (*parser.MatrixSelector, int, storage.Warnings) {
samplesStats := ev.samplesStats
// Avoid double counting samples when running a subquery, those samples will be counted in later stage.
ev.samplesStats = ev.samplesStats.NewChild()
val, ws := ev.eval(subq)
// But do incorporate the peak from the subquery
samplesStats.UpdatePeakFromSubquery(ev.samplesStats)
ev.samplesStats = samplesStats
mat := val.(Matrix)
vs := &parser.VectorSelector{
OriginalOffset: subq.OriginalOffset,
@ -1169,8 +1232,9 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
numSteps := int((ev.endTimestamp-ev.startTimestamp)/ev.interval) + 1
// Create a new span to help investigate inner evaluation performances.
span, _ := opentracing.StartSpanFromContext(ev.ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String())
defer span.Finish()
ctxWithSpan, span := otel.Tracer("").Start(ev.ctx, stats.InnerEvalTime.SpanOperation()+" eval "+reflect.TypeOf(expr).String())
ev.ctx = ctxWithSpan
defer span.End()
switch e := expr.(type) {
case *parser.AggregateExpr:
@ -1339,6 +1403,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
enh.Ts = ts
// Make the function call.
outVec := call(inArgs, e.Args, enh)
ev.samplesStats.IncrementSamplesAtStep(step, len(points))
enh.Out = outVec[:0]
if len(outVec) > 0 {
ss.Points = append(ss.Points, Point{V: outVec[0].Point.V, H: outVec[0].Point.H, T: ts})
@ -1356,7 +1421,9 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
} else {
putPointSlice(ss.Points)
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
ev.currentSamples -= len(points)
putPointSlice(points)
@ -1490,11 +1557,13 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
Points: getPointSlice(numSteps),
}
for ts := ev.startTimestamp; ts <= ev.endTimestamp; ts += ev.interval {
for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval {
step++
_, v, h, ok := ev.vectorSelectorSingle(it, e, ts)
if ok {
if ev.currentSamples < ev.maxSamples {
ss.Points = append(ss.Points, Point{V: v, H: h, T: ts})
ev.samplesStats.IncrementSamplesAtStep(step, 1)
ev.currentSamples++
} else {
ev.error(ErrTooManySamples(env))
@ -1508,6 +1577,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
putPointSlice(ss.Points)
}
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
return mat, ws
case *parser.MatrixSelector:
@ -1526,6 +1596,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
maxSamples: ev.maxSamples,
logger: ev.logger,
lookbackDelta: ev.lookbackDelta,
samplesStats: ev.samplesStats.NewChild(),
noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn,
}
@ -1551,6 +1622,8 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
res, ws := newEv.eval(e.Expr)
ev.currentSamples = newEv.currentSamples
ev.samplesStats.UpdatePeakFromSubquery(newEv.samplesStats)
ev.samplesStats.IncrementSamplesAtTimestamp(ev.endTimestamp, newEv.samplesStats.TotalSamples)
return res, ws
case *parser.StepInvariantExpr:
switch ce := e.Expr.(type) {
@ -1567,10 +1640,16 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
maxSamples: ev.maxSamples,
logger: ev.logger,
lookbackDelta: ev.lookbackDelta,
samplesStats: ev.samplesStats.NewChild(),
noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn,
}
res, ws := newEv.eval(e.Expr)
ev.currentSamples = newEv.currentSamples
ev.samplesStats.UpdatePeakFromSubquery(newEv.samplesStats)
for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts = ts + ev.interval {
step++
ev.samplesStats.IncrementSamplesAtStep(step, newEv.samplesStats.TotalSamples)
}
switch e.Expr.(type) {
case *parser.MatrixSelector, *parser.SubqueryExpr:
// We do not duplicate results for range selectors since result is a matrix
@ -1601,6 +1680,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, storage.Warnings) {
}
}
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
return res, ws
}
@ -1626,12 +1706,14 @@ func (ev *evaluator) vectorSelector(node *parser.VectorSelector, ts int64) (Vect
})
ev.currentSamples++
ev.samplesStats.IncrementSamplesAtTimestamp(ts, 1)
if ev.currentSamples > ev.maxSamples {
ev.error(ErrTooManySamples(env))
}
}
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
return vec, ws
}
@ -1713,6 +1795,7 @@ func (ev *evaluator) matrixSelector(node *parser.MatrixSelector) (Matrix, storag
}
ss.Points = ev.matrixIterSlice(it, mint, maxt, getPointSlice(16))
ev.samplesStats.IncrementSamplesAtTimestamp(ev.startTimestamp, len(ss.Points))
if len(ss.Points) > 0 {
matrix = append(matrix, ss)
@ -1813,6 +1896,7 @@ loop:
ev.currentSamples++
}
}
ev.samplesStats.UpdatePeak(ev.currentSamples)
return out
}

View file

@ -25,6 +25,9 @@ import (
"time"
"github.com/go-kit/log"
"github.com/prometheus/prometheus/util/stats"
"github.com/stretchr/testify/require"
"go.uber.org/goleak"
@ -229,14 +232,14 @@ func TestQueryError(t *testing.T) {
ctx, cancelCtx := context.WithCancel(context.Background())
defer cancelCtx()
vectorQuery, err := engine.NewInstantQuery(queryable, "foo", time.Unix(1, 0))
vectorQuery, err := engine.NewInstantQuery(queryable, nil, "foo", time.Unix(1, 0))
require.NoError(t, err)
res := vectorQuery.Exec(ctx)
require.Error(t, res.Err, "expected error on failed select but got none")
require.True(t, errors.Is(res.Err, errStorage), "expected error doesn't match")
matrixQuery, err := engine.NewInstantQuery(queryable, "foo[1m]", time.Unix(1, 0))
matrixQuery, err := engine.NewInstantQuery(queryable, nil, "foo[1m]", time.Unix(1, 0))
require.NoError(t, err)
res = matrixQuery.Exec(ctx)
@ -563,9 +566,9 @@ func TestSelectHintsSetCorrectly(t *testing.T) {
err error
)
if tc.end == 0 {
query, err = engine.NewInstantQuery(hintsRecorder, tc.query, timestamp.Time(tc.start))
query, err = engine.NewInstantQuery(hintsRecorder, nil, tc.query, timestamp.Time(tc.start))
} else {
query, err = engine.NewRangeQuery(hintsRecorder, tc.query, timestamp.Time(tc.start), timestamp.Time(tc.end), time.Second)
query, err = engine.NewRangeQuery(hintsRecorder, nil, tc.query, timestamp.Time(tc.start), timestamp.Time(tc.end), time.Second)
}
require.NoError(t, err)
@ -723,9 +726,9 @@ load 10s
var err error
var qry Query
if c.Interval == 0 {
qry, err = test.QueryEngine().NewInstantQuery(test.Queryable(), c.Query, c.Start)
qry, err = test.QueryEngine().NewInstantQuery(test.Queryable(), nil, c.Query, c.Start)
} else {
qry, err = test.QueryEngine().NewRangeQuery(test.Queryable(), c.Query, c.Start, c.End, c.Interval)
qry, err = test.QueryEngine().NewRangeQuery(test.Queryable(), nil, c.Query, c.Start, c.End, c.Interval)
}
require.NoError(t, err)
@ -740,6 +743,493 @@ load 10s
}
}
func TestQueryStatistics(t *testing.T) {
test, err := NewTest(t, `
load 10s
metricWith1SampleEvery10Seconds 1+1x100
metricWith3SampleEvery10Seconds{a="1",b="1"} 1+1x100
metricWith3SampleEvery10Seconds{a="2",b="2"} 1+1x100
metricWith3SampleEvery10Seconds{a="3",b="2"} 1+1x100
`)
require.NoError(t, err)
defer test.Close()
err = test.Run()
require.NoError(t, err)
cases := []struct {
Query string
SkipMaxCheck bool
TotalSamples int
TotalSamplesPerStep stats.TotalSamplesPerStep
PeakSamples int
Start time.Time
End time.Time
Interval time.Duration
}{
{
Query: `"literal string"`,
SkipMaxCheck: true, // This can't fail from a max samples limit.
Start: time.Unix(21, 0),
TotalSamples: 0,
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 0,
},
},
{
Query: "1",
Start: time.Unix(21, 0),
TotalSamples: 0,
PeakSamples: 1,
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 0,
},
},
{
Query: "metricWith1SampleEvery10Seconds",
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
// timestamp function has a special handling.
Query: "timestamp(metricWith1SampleEvery10Seconds)",
Start: time.Unix(21, 0),
PeakSamples: 2,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: "metricWith1SampleEvery10Seconds",
Start: time.Unix(22, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
22000: 1, // Aligned to the step time, not the sample time.
},
},
{
Query: "metricWith1SampleEvery10Seconds offset 10s",
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: "metricWith1SampleEvery10Seconds @ 15",
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}`,
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"} @ 19`,
Start: time.Unix(21, 0),
PeakSamples: 1,
TotalSamples: 1, // 1 sample / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}[20s] @ 19`,
Start: time.Unix(21, 0),
PeakSamples: 2,
TotalSamples: 2, // (1 sample / 10 seconds) * 20s
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 2,
},
},
{
Query: "metricWith3SampleEvery10Seconds",
Start: time.Unix(21, 0),
PeakSamples: 3,
TotalSamples: 3, // 3 samples / 10 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
21000: 3,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s]",
Start: time.Unix(201, 0),
PeakSamples: 6,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[59s])[20s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 10,
TotalSamples: 24, // (1 sample / 10 seconds * 60 seconds) * 60/5 (using 59s so we always return 6 samples
// as if we run a query on 00 looking back 60 seconds we will return 7 samples;
// see next test).
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 24,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])[20s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 11,
TotalSamples: 26, // (1 sample / 10 seconds * 60 seconds) + 2 as
// max_over_time(metricWith1SampleEvery10Seconds[60s]) @ 190 and 200 will return 7 samples.
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 26,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s] @ 30",
Start: time.Unix(201, 0),
PeakSamples: 4,
TotalSamples: 4, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 1 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 4,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s] @ 30))",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 12, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 3 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "sum by (b) (max_over_time(metricWith3SampleEvery10Seconds[60s] @ 30))",
Start: time.Unix(201, 0),
PeakSamples: 8,
TotalSamples: 12, // @ modifier force the evaluation to at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 3 series
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s] offset 10s",
Start: time.Unix(201, 0),
PeakSamples: 6,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "metricWith3SampleEvery10Seconds[60s]",
Start: time.Unix(201, 0),
PeakSamples: 18,
TotalSamples: 18, // 3 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 18,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[60s])",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "absent_over_time(metricWith1SampleEvery10Seconds[60s])",
Start: time.Unix(201, 0),
PeakSamples: 7,
TotalSamples: 6, // 1 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 6,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s])",
Start: time.Unix(201, 0),
PeakSamples: 9,
TotalSamples: 18, // 3 sample / 10 seconds * 60 seconds
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 18,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s:5s]",
Start: time.Unix(201, 0),
PeakSamples: 12,
TotalSamples: 12, // 1 sample per query * 12 queries (60/5)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "metricWith1SampleEvery10Seconds[60s:5s] offset 10s",
Start: time.Unix(201, 0),
PeakSamples: 12,
TotalSamples: 12, // 1 sample per query * 12 queries (60/5)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s:5s])",
Start: time.Unix(201, 0),
PeakSamples: 51,
TotalSamples: 36, // 3 sample per query * 12 queries (60/5)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 36,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s])) + sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
PeakSamples: 52,
TotalSamples: 72, // 2 * (3 sample per query * 12 queries (60/5))
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 72,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 4,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"}`,
Start: time.Unix(204, 0),
End: time.Unix(223, 0),
Interval: 5 * time.Second,
PeakSamples: 4,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
204000: 1, // aligned to the step time, not the sample time
209000: 1,
214000: 1,
219000: 1,
},
},
{
// timestamp function as a special handling
Query: "timestamp(metricWith1SampleEvery10Seconds)",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 5,
TotalSamples: 4, // (1 sample / 10 seconds) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
Query: `max_over_time(metricWith3SampleEvery10Seconds{a="1"}[10s])`,
Start: time.Unix(991, 0),
End: time.Unix(1021, 0),
Interval: 10 * time.Second,
PeakSamples: 2,
TotalSamples: 2, // 1 sample per query * 2 steps with data
TotalSamplesPerStep: stats.TotalSamplesPerStep{
991000: 1,
1001000: 1,
1011000: 0,
1021000: 0,
},
},
{
Query: `metricWith3SampleEvery10Seconds{a="1"} offset 10s`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 4,
TotalSamples: 4, // 1 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 1,
206000: 1,
211000: 1,
216000: 1,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s] @ 30)",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 12,
TotalSamples: 48, // @ modifier force the evaluation timestamp at 30 seconds - So it brings 4 datapoints (0, 10, 20, 30 seconds) * 3 series * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
206000: 12,
211000: 12,
216000: 12,
},
},
{
Query: `metricWith3SampleEvery10Seconds`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
PeakSamples: 12,
Interval: 5 * time.Second,
TotalSamples: 12, // 3 sample per query * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 3,
206000: 3,
211000: 3,
216000: 3,
},
},
{
Query: `max_over_time(metricWith3SampleEvery10Seconds[60s])`,
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 18,
TotalSamples: 72, // (3 sample / 10 seconds * 60 seconds) * 4 steps = 72
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 18,
206000: 18,
211000: 18,
216000: 18,
},
},
{
Query: "max_over_time(metricWith3SampleEvery10Seconds[60s:5s])",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 72,
TotalSamples: 144, // 3 sample per query * 12 queries (60/5) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 36,
206000: 36,
211000: 36,
216000: 36,
},
},
{
Query: "max_over_time(metricWith1SampleEvery10Seconds[60s:5s])",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 32,
TotalSamples: 48, // 1 sample per query * 12 queries (60/5) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
206000: 12,
211000: 12,
216000: 12,
},
},
{
Query: "sum by (b) (max_over_time(metricWith1SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 32,
TotalSamples: 48, // 1 sample per query * 12 queries (60/5) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 12,
206000: 12,
211000: 12,
216000: 12,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s])) + sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 76,
TotalSamples: 288, // 2 * (3 sample per query * 12 queries (60/5) * 4 steps)
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 72,
206000: 72,
211000: 72,
216000: 72,
},
},
{
Query: "sum(max_over_time(metricWith3SampleEvery10Seconds[60s:5s])) + sum(max_over_time(metricWith1SampleEvery10Seconds[60s:5s]))",
Start: time.Unix(201, 0),
End: time.Unix(220, 0),
Interval: 5 * time.Second,
PeakSamples: 72,
TotalSamples: 192, // (1 sample per query * 12 queries (60/5) + 3 sample per query * 12 queries (60/5)) * 4 steps
TotalSamplesPerStep: stats.TotalSamplesPerStep{
201000: 48,
206000: 48,
211000: 48,
216000: 48,
},
},
}
engine := test.QueryEngine()
engine.enablePerStepStats = true
origMaxSamples := engine.maxSamplesPerQuery
for _, c := range cases {
t.Run(c.Query, func(t *testing.T) {
opts := &QueryOpts{EnablePerStepStats: true}
engine.maxSamplesPerQuery = origMaxSamples
runQuery := func(expErr error) *stats.Statistics {
var err error
var qry Query
if c.Interval == 0 {
qry, err = engine.NewInstantQuery(test.Queryable(), opts, c.Query, c.Start)
} else {
qry, err = engine.NewRangeQuery(test.Queryable(), opts, c.Query, c.Start, c.End, c.Interval)
}
require.NoError(t, err)
res := qry.Exec(test.Context())
require.Equal(t, expErr, res.Err)
return qry.Stats()
}
stats := runQuery(nil)
require.Equal(t, c.TotalSamples, stats.Samples.TotalSamples, "Total samples mismatch")
require.Equal(t, &c.TotalSamplesPerStep, stats.Samples.TotalSamplesPerStepMap(), "Total samples per time mismatch")
require.Equal(t, c.PeakSamples, stats.Samples.PeakSamples, "Peak samples mismatch")
// Check that the peak is correct by setting the max to one less.
if c.SkipMaxCheck {
return
}
engine.maxSamplesPerQuery = stats.Samples.PeakSamples - 1
runQuery(ErrTooManySamples(env))
})
}
}
func TestMaxQuerySamples(t *testing.T) {
test, err := NewTest(t, `
load 10s
@ -896,14 +1386,19 @@ load 10s
var err error
var qry Query
if c.Interval == 0 {
qry, err = engine.NewInstantQuery(test.Queryable(), c.Query, c.Start)
qry, err = engine.NewInstantQuery(test.Queryable(), nil, c.Query, c.Start)
} else {
qry, err = engine.NewRangeQuery(test.Queryable(), c.Query, c.Start, c.End, c.Interval)
qry, err = engine.NewRangeQuery(test.Queryable(), nil, c.Query, c.Start, c.End, c.Interval)
}
require.NoError(t, err)
res := qry.Exec(test.Context())
stats := qry.Stats()
require.Equal(t, expError, res.Err)
require.NotNil(t, stats)
if expError == nil {
require.Equal(t, c.MaxSamples, stats.Samples.PeakSamples, "peak samples mismatch for query %q", c.Query)
}
}
// Within limit.
@ -1132,9 +1627,9 @@ load 1ms
var err error
var qry Query
if c.end == 0 {
qry, err = test.QueryEngine().NewInstantQuery(test.Queryable(), c.query, start)
qry, err = test.QueryEngine().NewInstantQuery(test.Queryable(), nil, c.query, start)
} else {
qry, err = test.QueryEngine().NewRangeQuery(test.Queryable(), c.query, start, end, interval)
qry, err = test.QueryEngine().NewRangeQuery(test.Queryable(), nil, c.query, start, end, interval)
}
require.NoError(t, err)
@ -1455,7 +1950,7 @@ func TestSubquerySelector(t *testing.T) {
engine := test.QueryEngine()
for _, c := range tst.cases {
t.Run(c.Query, func(t *testing.T) {
qry, err := engine.NewInstantQuery(test.Queryable(), c.Query, c.Start)
qry, err := engine.NewInstantQuery(test.Queryable(), nil, c.Query, c.Start)
require.NoError(t, err)
res := qry.Exec(test.Context())
@ -2462,8 +2957,8 @@ func TestEngineOptsValidation(t *testing.T) {
for _, c := range cases {
eng := NewEngine(c.opts)
_, err1 := eng.NewInstantQuery(nil, c.query, time.Unix(10, 0))
_, err2 := eng.NewRangeQuery(nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second)
_, err1 := eng.NewInstantQuery(nil, nil, c.query, time.Unix(10, 0))
_, err2 := eng.NewRangeQuery(nil, nil, c.query, time.Unix(0, 0), time.Unix(10, 0), time.Second)
if c.fail {
require.Equal(t, c.expError, err1)
require.Equal(t, c.expError, err2)
@ -2610,7 +3105,7 @@ func TestRangeQuery(t *testing.T) {
err = test.Run()
require.NoError(t, err)
qry, err := test.QueryEngine().NewRangeQuery(test.Queryable(), c.Query, c.Start, c.End, c.Interval)
qry, err := test.QueryEngine().NewRangeQuery(test.Queryable(), nil, c.Query, c.Start, c.End, c.Interval)
require.NoError(t, err)
res := qry.Exec(test.Context())
@ -2641,7 +3136,7 @@ func TestSparseHistogramRate(t *testing.T) {
engine := test.QueryEngine()
queryString := fmt.Sprintf("rate(%s[1m])", seriesName)
qry, err := engine.NewInstantQuery(test.Queryable(), queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond)))
qry, err := engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond)))
require.NoError(t, err)
res := qry.Exec(test.Context())
require.NoError(t, res.Err)
@ -2882,7 +3377,7 @@ func TestSparseHistogram_HistogramQuantile(t *testing.T) {
for j, sc := range c.subCases {
t.Run(fmt.Sprintf("%d %s", j, sc.quantile), func(t *testing.T) {
queryString := fmt.Sprintf("histogram_quantile(%s, %s)", sc.quantile, seriesName)
qry, err := engine.NewInstantQuery(test.Queryable(), queryString, timestamp.Time(ts))
qry, err := engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res := qry.Exec(test.Context())
@ -3007,7 +3502,7 @@ func TestSparseHistogram_Sum_AddOperator(t *testing.T) {
require.NoError(t, app.Commit())
queryAndCheck := func(queryString string) {
qry, err := engine.NewInstantQuery(test.Queryable(), queryString, timestamp.Time(ts))
qry, err := engine.NewInstantQuery(test.Queryable(), nil, queryString, timestamp.Time(ts))
require.NoError(t, err)
res := qry.Exec(test.Context())

View file

@ -15,12 +15,12 @@ package promql
import (
"math"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
@ -868,7 +868,7 @@ func funcPredictLinear(vals []parser.Value, args parser.Expressions, enh *EvalNo
func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) Vector {
q := vals[0].(Vector)[0].V
inVec := vals[1].(Vector)
sigf := signatureFunc(false, enh.lblBuf, excludedLabels...)
sigf := signatureFunc(false, enh.lblBuf, labels.BucketLabel)
ignoreSignature := make(map[string]bool) // For signatures having both new and old histograms.
if enh.signatureToMetricWithBuckets == nil {
@ -934,11 +934,14 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
ignoreSignature[l] = true
continue
}
// Add the metric name (which is always removed) to the signature to prevent combining multiple histograms
// with the same label set. See https://github.com/prometheus/prometheus/issues/9910
l = l + el.Metric.Get(model.MetricNameLabel)
mb, ok := enh.signatureToMetricWithBuckets[l]
if !ok {
el.Metric = labels.NewBuilder(el.Metric).
Del(labels.BucketLabel, labels.MetricName).
Del(excludedLabels...).
Labels()
mb = &metricWithBuckets{el.Metric, nil}

View file

@ -55,7 +55,7 @@ func TestDeriv(t *testing.T) {
require.NoError(t, a.Commit())
query, err := engine.NewInstantQuery(storage, "deriv(foo[30m])", timestamp.Time(1493712846939))
query, err := engine.NewInstantQuery(storage, nil, "deriv(foo[30m])", timestamp.Time(1493712846939))
require.NoError(t, err)
result := query.Exec(context.Background())

View file

@ -57,7 +57,13 @@ const (
)
func fuzzParseMetricWithContentType(in []byte, contentType string) int {
p := textparse.New(in, contentType)
p, warning := textparse.New(in, contentType)
if warning != nil {
// An invalid content type is being passed, which should not happen
// in this context.
panic(warning)
}
var err error
for {
_, err = p.Next()

39
promql/fuzz_test.go Normal file
View file

@ -0,0 +1,39 @@
// Copyright 2022 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Only build when go-fuzz is in use
//go:build gofuzz
// +build gofuzz
package promql
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestfuzzParseMetricWithContentTypePanicOnInvalid(t *testing.T) {
defer func() {
if p := recover(); p == nil {
t.Error("invalid content type should panic")
} else {
err, ok := p.(error)
require.True(t, ok)
require.Contains(t, err.Error(), "duplicate parameter name")
}
}()
const invalidContentType = "application/openmetrics-text; charset=UTF-8; charset=utf-8"
fuzzParseMetricWithContentType([]byte{}, invalidContentType)
}

View file

@ -611,7 +611,6 @@ var tests = []struct {
{ // Nested Subquery.
input: `min_over_time(rate(foo{bar="baz"}[2s])[5m:])[4m:3s]`,
expected: []Item{
{IDENTIFIER, 0, `min_over_time`},
{LEFT_PAREN, 13, `(`},
{IDENTIFIER, 14, `rate`},
@ -660,7 +659,6 @@ var tests = []struct {
{
input: `min_over_time(rate(foo{bar="baz"}[2s])[5m:] offset 6m)[4m:3s]`,
expected: []Item{
{IDENTIFIER, 0, `min_over_time`},
{LEFT_PAREN, 13, `(`},
{IDENTIFIER, 14, `rate`},

View file

@ -73,10 +73,15 @@ type metricWithHistograms struct {
//
// If the highest bucket is not +Inf, NaN is returned.
//
// If q==NaN, NaN is returned.
//
// If q<0, -Inf is returned.
//
// If q>1, +Inf is returned.
func bucketQuantile(q float64, buckets buckets) float64 {
if math.IsNaN(q) {
return math.NaN()
}
if q < 0 {
return math.Inf(-1)
}
@ -254,10 +259,11 @@ func ensureMonotonic(buckets buckets) {
//
// The Vector will be sorted.
// If 'values' has zero elements, NaN is returned.
// If q==NaN, NaN is returned.
// If q<0, -Inf is returned.
// If q>1, +Inf is returned.
func quantile(q float64, values vectorByValueHeap) float64 {
if len(values) == 0 {
if len(values) == 0 || math.IsNaN(q) {
return math.NaN()
}
if q < 0 {

View file

@ -83,7 +83,11 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) {
func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, error) {
file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666)
if err != nil {
level.Error(logger).Log("msg", "Error opening query log file", "file", filename, "err", err)
absPath, pathErr := filepath.Abs(filename)
if pathErr != nil {
absPath = filename
}
level.Error(logger).Log("msg", "Error opening query log file", "file", absPath, "err", err)
return nil, err
}

View file

@ -17,9 +17,9 @@ import (
"context"
"io/ioutil"
"os"
"regexp"
"testing"
"github.com/grafana/regexp"
"github.com/stretchr/testify/require"
)

View file

@ -18,11 +18,11 @@ import (
"fmt"
"io/ioutil"
"math"
"regexp"
"strconv"
"strings"
"time"
"github.com/grafana/regexp"
"github.com/pkg/errors"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
@ -533,7 +533,7 @@ func (t *Test) exec(tc testCommand) error {
}
queries = append([]atModifierTestCase{{expr: cmd.expr, evalTime: cmd.start}}, queries...)
for _, iq := range queries {
q, err := t.QueryEngine().NewInstantQuery(t.storage, iq.expr, iq.evalTime)
q, err := t.QueryEngine().NewInstantQuery(t.storage, nil, iq.expr, iq.evalTime)
if err != nil {
return err
}
@ -555,7 +555,7 @@ func (t *Test) exec(tc testCommand) error {
// Check query returns same result in range mode,
// by checking against the middle step.
q, err = t.queryEngine.NewRangeQuery(t.storage, iq.expr, iq.evalTime.Add(-time.Minute), iq.evalTime.Add(time.Minute), time.Minute)
q, err = t.queryEngine.NewRangeQuery(t.storage, nil, iq.expr, iq.evalTime.Add(-time.Minute), iq.evalTime.Add(time.Minute), time.Minute)
if err != nil {
return err
}
@ -613,6 +613,8 @@ func (t *Test) clear() {
Timeout: 100 * time.Second,
NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(1 * time.Minute) },
EnableAtModifier: true,
EnableNegativeOffset: true,
EnablePerStepStats: true,
}
t.queryEngine = NewEngine(opts)
@ -680,7 +682,9 @@ type LazyLoader struct {
// LazyLoaderOpts are options for the lazy loader.
type LazyLoaderOpts struct {
// Disabled PromQL engine features.
// Both of these must be set to true for regular PromQL (as of
// Prometheus v2.33). They can still be disabled here for legacy and
// other uses.
EnableAtModifier, EnableNegativeOffset bool
}

View file

@ -399,6 +399,11 @@ eval instant at 1m quantile without(point)((scalar(foo)), data)
{test="three samples"} 1.6
{test="uneven samples"} 2.8
eval instant at 1m quantile without(point)(NaN, data)
{test="two samples"} NaN
{test="three samples"} NaN
{test="uneven samples"} NaN
# Tests for group.
clear

Some files were not shown because too many files have changed in this diff Show more