mirror of
https://github.com/prometheus/prometheus.git
synced 2025-02-21 03:16:00 -08:00
Merge pull request #5681 from prometheus/beorn7/mixin
Merge master into mixin
This commit is contained in:
commit
498d31e178
|
@ -6,17 +6,28 @@ executors:
|
|||
# should also be updated.
|
||||
golang:
|
||||
docker:
|
||||
- image: circleci/golang:1.11
|
||||
- image: circleci/golang:1.12
|
||||
|
||||
jobs:
|
||||
test:
|
||||
executor: golang
|
||||
resource_class: large
|
||||
|
||||
steps:
|
||||
- checkout
|
||||
- run: make promu
|
||||
- run: make check_license style unused staticcheck build check_assets
|
||||
- run:
|
||||
command: make check_license style unused lint build check_assets
|
||||
environment:
|
||||
# Run garbage collection more aggresively to avoid getting OOMed during the lint phase.
|
||||
GOGC: "20"
|
||||
- run:
|
||||
command: |
|
||||
curl -s -L https://github.com/protocolbuffers/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip > /tmp/protoc.zip
|
||||
unzip -d /tmp /tmp/protoc.zip
|
||||
chmod +x /tmp/bin/protoc
|
||||
echo 'export PATH=/tmp/bin:$PATH' >> $BASH_ENV
|
||||
source $BASH_ENV
|
||||
make proto
|
||||
- run: git diff --exit-code
|
||||
- store_artifacts:
|
||||
path: prometheus
|
||||
|
@ -46,11 +57,11 @@ jobs:
|
|||
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
- setup_remote_docker:
|
||||
version: 18.06.0-ce
|
||||
- run: docker run --privileged linuxkit/binfmt:v0.6
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run: ln -s .build/linux-amd64/prometheus prometheus
|
||||
- run: ln -s .build/linux-amd64/promtool promtool
|
||||
- run: make docker
|
||||
- run: make docker DOCKER_REPO=quay.io/prometheus
|
||||
- run: docker images
|
||||
|
@ -58,16 +69,17 @@ jobs:
|
|||
- run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io
|
||||
- run: make docker-publish
|
||||
- run: make docker-publish DOCKER_REPO=quay.io/prometheus
|
||||
- run: make docker-manifest
|
||||
- run: make docker-manifest DOCKER_REPO=quay.io/prometheus
|
||||
|
||||
docker_hub_release_tags:
|
||||
executor: golang
|
||||
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
- run: mkdir -v -p ${HOME}/bin
|
||||
- run: curl -L 'https://github.com/aktau/github-release/releases/download/v0.7.2/linux-amd64-github-release.tar.bz2' | tar xvjf - --strip-components 3 -C ${HOME}/bin
|
||||
- run: echo 'export PATH=${HOME}/bin:${PATH}' >> ${BASH_ENV}
|
||||
- setup_remote_docker:
|
||||
version: 18.06.0-ce
|
||||
- run: docker run --privileged linuxkit/binfmt:v0.6
|
||||
- attach_workspace:
|
||||
at: .
|
||||
- run: make promu
|
||||
|
@ -77,19 +89,29 @@ jobs:
|
|||
- store_artifacts:
|
||||
path: .tarballs
|
||||
destination: releases
|
||||
- run: ln -s .build/linux-amd64/prometheus prometheus
|
||||
- run: ln -s .build/linux-amd64/promtool promtool
|
||||
- run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG
|
||||
- run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG DOCKER_REPO=quay.io/prometheus
|
||||
- run: docker login -u $DOCKER_LOGIN -p $DOCKER_PASSWORD
|
||||
- run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io
|
||||
- run: make docker-publish DOCKER_IMAGE_TAG="$CIRCLE_TAG"
|
||||
- run: make docker-publish DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus
|
||||
- run: make docker-manifest DOCKER_IMAGE_TAG="$CIRCLE_TAG"
|
||||
- run: make docker-manifest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus
|
||||
- run: |
|
||||
if [[ "$CIRCLE_TAG" =~ ^v[0-9]+(\.[0-9]+){2}$ ]]; then
|
||||
make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG"
|
||||
make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus
|
||||
make docker-publish DOCKER_IMAGE_TAG="latest"
|
||||
make docker-publish DOCKER_IMAGE_TAG="latest" DOCKER_REPO=quay.io/prometheus
|
||||
make docker-manifest DOCKER_IMAGE_TAG="latest"
|
||||
make docker-manifest DOCKER_IMAGE_TAG="latest" DOCKER_REPO=quay.io/prometheus
|
||||
fi
|
||||
- run: make docker-publish
|
||||
- run: make docker-publish DOCKER_REPO=quay.io/prometheus
|
||||
|
||||
makefile_sync:
|
||||
executor: golang
|
||||
steps:
|
||||
- checkout
|
||||
- run: ./scripts/sync_makefiles.sh
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
|
@ -104,6 +126,7 @@ workflows:
|
|||
tags:
|
||||
only: /.*/
|
||||
- docker_hub_master:
|
||||
context: org-context
|
||||
requires:
|
||||
- test
|
||||
- build
|
||||
|
@ -111,6 +134,7 @@ workflows:
|
|||
branches:
|
||||
only: master
|
||||
- docker_hub_release_tags:
|
||||
context: org-context
|
||||
requires:
|
||||
- test
|
||||
- build
|
||||
|
@ -119,3 +143,14 @@ workflows:
|
|||
only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/
|
||||
branches:
|
||||
ignore: /.*/
|
||||
nightly:
|
||||
triggers:
|
||||
- schedule:
|
||||
cron: "0 0 * * *"
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
jobs:
|
||||
- makefile_sync:
|
||||
context: org-context
|
||||
|
|
|
@ -3,3 +3,5 @@ data/
|
|||
.tarballs/
|
||||
|
||||
!.build/linux-amd64/
|
||||
!.build/linux-armv7/
|
||||
!.build/linux-arm64/
|
||||
|
|
35
.github/lock.yml
vendored
Normal file
35
.github/lock.yml
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
# Configuration for Lock Threads - https://github.com/dessant/lock-threads
|
||||
|
||||
# Number of days of inactivity before a closed issue or pull request is locked
|
||||
daysUntilLock: 180
|
||||
|
||||
# Skip issues and pull requests created before a given timestamp. Timestamp must
|
||||
# follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable
|
||||
skipCreatedBefore: false
|
||||
|
||||
# Issues and pull requests with these labels will be ignored. Set to `[]` to disable
|
||||
exemptLabels: []
|
||||
|
||||
# Label to add before locking, such as `outdated`. Set to `false` to disable
|
||||
lockLabel: false
|
||||
|
||||
# Comment to post before locking. Set to `false` to disable
|
||||
lockComment: false
|
||||
|
||||
# Assign `resolved` as the reason for locking. Set to `false` to disable
|
||||
setLockReason: false
|
||||
|
||||
# Limit to only `issues` or `pulls`
|
||||
only: issues
|
||||
|
||||
# Optionally, specify configuration settings just for `issues` or `pulls`
|
||||
# issues:
|
||||
# exemptLabels:
|
||||
# - help-wanted
|
||||
# lockLabel: outdated
|
||||
|
||||
# pulls:
|
||||
# daysUntilLock: 30
|
||||
|
||||
# Repository to extend settings from
|
||||
# _extends: repo
|
7
.gitignore
vendored
7
.gitignore
vendored
|
@ -1,9 +1,7 @@
|
|||
*#
|
||||
.#*
|
||||
*-stamp
|
||||
/*.yaml
|
||||
/*.yml
|
||||
/*.rules
|
||||
*.exe
|
||||
|
||||
/prometheus
|
||||
|
@ -12,12 +10,9 @@ benchmark.txt
|
|||
/data
|
||||
/cmd/prometheus/data
|
||||
/cmd/prometheus/debug
|
||||
/.build
|
||||
/.release
|
||||
/.tarballs
|
||||
|
||||
!/circle.yml
|
||||
!/.travis.yml
|
||||
!/.promu.yml
|
||||
!/.golangci.yml
|
||||
/documentation/examples/remote_storage/remote_storage_adapter/remote_storage_adapter
|
||||
/documentation/examples/remote_storage/example_write_adapter/example_writer_adapter
|
||||
|
|
13
.golangci.yml
Normal file
13
.golangci.yml
Normal file
|
@ -0,0 +1,13 @@
|
|||
run:
|
||||
modules-download-mode: vendor
|
||||
deadline: 5m
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- path: _test.go
|
||||
linters:
|
||||
- errcheck
|
||||
|
||||
linters-settings:
|
||||
errcheck:
|
||||
exclude: scripts/errcheck_excludes.txt
|
12
.promu.yml
12
.promu.yml
|
@ -1,7 +1,7 @@
|
|||
go:
|
||||
# Whenever the Go version is updated here, .travis.yml and
|
||||
# .circle/config.yml should also be updated.
|
||||
version: 1.11
|
||||
version: 1.12
|
||||
repository:
|
||||
path: github.com/prometheus/prometheus
|
||||
build:
|
||||
|
@ -12,11 +12,11 @@ build:
|
|||
path: ./cmd/promtool
|
||||
flags: -mod=vendor -a -tags netgo
|
||||
ldflags: |
|
||||
-X {{repoPath}}/vendor/github.com/prometheus/common/version.Version={{.Version}}
|
||||
-X {{repoPath}}/vendor/github.com/prometheus/common/version.Revision={{.Revision}}
|
||||
-X {{repoPath}}/vendor/github.com/prometheus/common/version.Branch={{.Branch}}
|
||||
-X {{repoPath}}/vendor/github.com/prometheus/common/version.BuildUser={{user}}@{{host}}
|
||||
-X {{repoPath}}/vendor/github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}}
|
||||
-X github.com/prometheus/common/version.Version={{.Version}}
|
||||
-X github.com/prometheus/common/version.Revision={{.Revision}}
|
||||
-X github.com/prometheus/common/version.Branch={{.Branch}}
|
||||
-X github.com/prometheus/common/version.BuildUser={{user}}@{{host}}
|
||||
-X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}}
|
||||
tarball:
|
||||
files:
|
||||
- consoles
|
||||
|
|
12
.travis.yml
12
.travis.yml
|
@ -1,14 +1,18 @@
|
|||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
# Whenever the Go version is updated here, .circleci/config.yml and .promu.yml
|
||||
# should also be updated.
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
|
||||
go_import_path: github.com/prometheus/prometheus
|
||||
|
||||
# This ensures that the local cache is filled before running the CI.
|
||||
# travis_retry retries the command 3 times if it fails as we've experienced
|
||||
# random issues on Travis.
|
||||
before_install:
|
||||
- travis_retry make deps
|
||||
|
||||
script:
|
||||
- make check_license style unused test staticcheck check_assets
|
||||
- make check_license style unused test lint check_assets
|
||||
- git diff --exit-code
|
||||
|
|
198
CHANGELOG.md
198
CHANGELOG.md
|
@ -1,3 +1,185 @@
|
|||
## 2.10.0 / 2019-05-25
|
||||
|
||||
* [CHANGE/BUGFIX] API: Encode alert values as string to correctly represent Inf/NaN. #5582
|
||||
* [FEATURE] Template expansion: Make external labels available as `$externalLabels` in alert and console template expansion. #5463
|
||||
* [FEATURE] TSDB: Add `prometheus_tsdb_wal_segment_current` metric for the WAL segment index that TSDB is currently writing to. tsdb#601
|
||||
* [FEATURE] Scrape: Add `scrape_series_added` per-scrape metric. #5546
|
||||
* [ENHANCEMENT] Discovery/kubernetes: Add labels `__meta_kubernetes_endpoint_node_name` and `__meta_kubernetes_endpoint_hostname`. #5571
|
||||
* [ENHANCEMENT] Discovery/azure: Add label `__meta_azure_machine_public_ip`. #5475
|
||||
* [ENHANCEMENT] TSDB: Simplify mergedPostings.Seek, resulting in better performance if there are many posting lists. tsdb#595
|
||||
* [ENHANCEMENT] Log filesystem type on startup. #5558
|
||||
* [ENHANCEMENT] Cmd/promtool: Use POST requests for Query and QueryRange. client_golang#557
|
||||
* [ENHANCEMENT] Web: Sort alerts by group name. #5448
|
||||
* [ENHANCEMENT] Console templates: Add convenience variables `$rawParams`, `$params`, `$path`. #5463
|
||||
* [BUGFIX] TSDB: Don't panic when running out of disk space and recover nicely from the condition. tsdb#582
|
||||
* [BUGFIX] TSDB: Correctly handle empty labels. tsdb#594
|
||||
* [BUGFIX] TSDB: Don't crash on an unknown tombstone reference. tsdb#604
|
||||
* [BUGFIX] Storage/remote: Remove queue-manager specific metrics if queue no longer exists. #5445 #5485 #5555
|
||||
* [BUGFIX] PromQL: Correctly display `{__name__="a"}`. #5552
|
||||
* [BUGFIX] Discovery/kubernetes: Use `service` rather than `ingress` as the name for the service workqueue. #5520
|
||||
* [BUGFIX] Discovery/azure: Don't panic on a VM with a public IP. #5587
|
||||
* [BUGFIX] Discovery/triton: Always read HTTP body to completion. #5596
|
||||
* [BUGFIX] Web: Fixed Content-Type for js and css instead of using `/etc/mime.types`. #5551
|
||||
|
||||
## 2.9.2 / 2019-04-24
|
||||
|
||||
* [BUGFIX] Make sure subquery range is taken into account for selection #5467
|
||||
* [BUGFIX] Exhaust every request body before closing it #5166
|
||||
* [BUGFIX] Cmd/promtool: return errors from rule evaluations #5483
|
||||
* [BUGFIX] Remote Storage: string interner should not panic in release #5487
|
||||
* [BUGFIX] Fix memory allocation regression in mergedPostings.Seek tsdb#586
|
||||
|
||||
## 2.9.1 / 2019-04-16
|
||||
|
||||
* [BUGFIX] Discovery/kubernetes: fix missing label sanitization #5462
|
||||
* [BUGFIX] Remote_write: Prevent reshard concurrent with calling stop #5460
|
||||
|
||||
## 2.9.0 / 2019-04-15
|
||||
|
||||
This releases uses Go 1.12, which includes a change in how memory is released
|
||||
to Linux. This will cause RSS to be reported as higher, however this is harmless
|
||||
and the memory is available to the kernel when it needs it.
|
||||
|
||||
* [CHANGE/ENHANCEMENT] Update Consul to support catalog.ServiceMultipleTags. #5151
|
||||
* [FEATURE] Add honor_timestamps scrape option. #5304
|
||||
* [ENHANCEMENT] Discovery/kubernetes: add present labels for labels/annotations. #5443
|
||||
* [ENHANCEMENT] OpenStack SD: Add ProjectID and UserID meta labels. #5431
|
||||
* [ENHANCEMENT] Add GODEBUG and retention to the runtime page. #5324 #5322
|
||||
* [ENHANCEMENT] Add support for POSTing to /series endpoint. #5422
|
||||
* [ENHANCEMENT] Support PUT methods for Lifecycle and Admin APIs. #5376
|
||||
* [ENHANCEMENT] Scrape: Add global jitter for HA server. #5181
|
||||
* [ENHANCEMENT] Check for cancellation on every step of a range evaluation. #5131
|
||||
* [ENHANCEMENT] String interning for labels & values in the remote_write path. #5316
|
||||
* [ENHANCEMENT] Don't lose the scrape cache on a failed scrape. #5414
|
||||
* [ENHANCEMENT] Reload cert files from disk automatically. common#173
|
||||
* [ENHANCEMENT] Use fixed length millisecond timestamp format for logs. common#172
|
||||
* [ENHANCEMENT] Performance improvements for postings. tsdb#509 tsdb#572
|
||||
* [BUGFIX] Remote Write: fix checkpoint reading. #5429
|
||||
* [BUGFIX] Check if label value is valid when unmarshaling external labels from YAML. #5316
|
||||
* [BUGFIX] Promparse: sort all labels when parsing. #5372
|
||||
* [BUGFIX] Reload rules: copy state on both name and labels. #5368
|
||||
* [BUGFIX] Exponentation operator to drop metric name in result of operation. #5329
|
||||
* [BUGFIX] Config: resolve more file paths. #5284
|
||||
* [BUGFIX] Promtool: resolve relative paths in alert test files. #5336
|
||||
* [BUGFIX] Set TLSHandshakeTimeout in HTTP transport. common#179
|
||||
* [BUGFIX] Use fsync to be more resilient to machine crashes. tsdb#573 tsdb#578
|
||||
* [BUGFIX] Keep series that are still in WAL in checkpoints. tsdb#577
|
||||
* [BUGFIX] Fix output sample values for scalar-to-vector comparison operations. #5454
|
||||
|
||||
## 2.8.1 / 2019-03-28
|
||||
|
||||
* [BUGFIX] Display the job labels in `/targets` which was removed accidentally. #5406
|
||||
|
||||
## 2.8.0 / 2019-03-12
|
||||
|
||||
This release uses Write-Ahead Logging (WAL) for the remote_write API. This currently causes a slight increase in memory usage, which will be addressed in future releases.
|
||||
|
||||
* [CHANGE] Default time retention is used only when no size based retention is specified. These are flags where time retention is specified by the flag `--storage.tsdb.retention` and size retention by `--storage.tsdb.retention.size`. #5216
|
||||
* [CHANGE] `prometheus_tsdb_storage_blocks_bytes_total` is now `prometheus_tsdb_storage_blocks_bytes`. prometheus/tsdb#506
|
||||
* [FEATURE] [EXPERIMENTAL] Time overlapping blocks are now allowed; vertical compaction and vertical query merge. It is an optional feature which is controlled by the `--storage.tsdb.allow-overlapping-blocks` flag, disabled by default. prometheus/tsdb#370
|
||||
* [ENHANCEMENT] Use the WAL for remote_write API. #4588
|
||||
* [ENHANCEMENT] Query performance improvements. prometheus/tsdb#531
|
||||
* [ENHANCEMENT] UI enhancements with upgrade to Bootstrap 4. #5226
|
||||
* [ENHANCEMENT] Reduce time that Alertmanagers are in flux when reloaded. #5126
|
||||
* [ENHANCEMENT] Limit number of metrics displayed on UI to 10000. #5139
|
||||
* [ENHANCEMENT] (1) Remember All/Unhealthy choice on target-overview when reloading page. (2) Resize text-input area on Graph page on mouseclick. #5201
|
||||
* [ENHANCEMENT] In `histogram_quantile` merge buckets with equivalent le values. #5158.
|
||||
* [ENHANCEMENT] Show list of offending labels in the error message in many-to-many scenarios. #5189
|
||||
* [ENHANCEMENT] Show `Storage Retention` criteria in effect on `/status` page. #5322
|
||||
* [BUGFIX] Fix sorting of rule groups. #5260
|
||||
* [BUGFIX] Fix support for password_file and bearer_token_file in Kubernetes SD. #5211
|
||||
* [BUGFIX] Scrape: catch errors when creating HTTP clients #5182. Adds new metrics:
|
||||
* `prometheus_target_scrape_pools_total`
|
||||
* `prometheus_target_scrape_pools_failed_total`
|
||||
* `prometheus_target_scrape_pool_reloads_total`
|
||||
* `prometheus_target_scrape_pool_reloads_failed_total`
|
||||
* [BUGFIX] Fix panic when aggregator param is not a literal. #5290
|
||||
|
||||
## 2.7.2 / 2019-03-02
|
||||
|
||||
* [BUGFIX] `prometheus_rule_group_last_evaluation_timestamp_seconds` is now a unix timestamp. #5186
|
||||
|
||||
## 2.7.1 / 2019-01-31
|
||||
|
||||
This release has a fix for a Stored DOM XSS vulnerability that can be triggered when using the query history functionality. Thanks to Dor Tumarkin from Checkmarx for reporting it.
|
||||
|
||||
* [BUGFIX/SECURITY] Fix a Stored DOM XSS vulnerability with query history. #5163
|
||||
* [BUGFIX] `prometheus_rule_group_last_duration_seconds` now reports seconds instead of nanoseconds. #5153
|
||||
* [BUGFIX] Make sure the targets are consistently sorted in the targets page. #5161
|
||||
|
||||
## 2.7.0 / 2019-01-28
|
||||
|
||||
We're rolling back the Dockerfile changes introduced in 2.6.0. If you made changes to your docker deployment in 2.6.0, you will need to roll them back. This release also adds experimental support for disk size based retention. To accommodate that we are deprecating the flag `storage.tsdb.retention` in favour of `storage.tsdb.retention.time`. We print a warning if the flag is in use, but it will function without breaking until Prometheus 3.0.
|
||||
|
||||
* [CHANGE] Rollback Dockerfile to version at 2.5.0. Rollback of the breaking change introduced in 2.6.0. #5122
|
||||
* [FEATURE] Add subqueries to PromQL. #4831
|
||||
* [FEATURE] [EXPERIMENTAL] Add support for disk size based retention. Note that we don't consider the WAL size which could be significant and the time based retention policy also applies. #5109 prometheus/tsdb#343
|
||||
* [FEATURE] Add CORS origin flag. #5011
|
||||
* [ENHANCEMENT] Consul SD: Add tagged address to the discovery metadata. #5001
|
||||
* [ENHANCEMENT] Kubernetes SD: Add service external IP and external name to the discovery metadata. #4940
|
||||
* [ENHANCEMENT] Azure SD: Add support for Managed Identity authentication. #4590
|
||||
* [ENHANCEMENT] Azure SD: Add tenant and subscription IDs to the discovery metadata. #4969
|
||||
* [ENHANCEMENT] OpenStack SD: Add support for application credentials based authentication. #4968
|
||||
* [ENHANCEMENT] Add metric for number of rule groups loaded. #5090
|
||||
* [BUGFIX] Avoid duplicate tests for alert unit tests. #4964
|
||||
* [BUGFIX] Don't depend on given order when comparing samples in alert unit testing. #5049
|
||||
* [BUGFIX] Make sure the retention period doesn't overflow. #5112
|
||||
* [BUGFIX] Make sure the blocks don't get very large. #5112
|
||||
* [BUGFIX] Don't generate blocks with no samples. prometheus/tsdb#374
|
||||
* [BUGFIX] Reintroduce metric for WAL corruptions. prometheus/tsdb#473
|
||||
|
||||
## 2.6.1 / 2019-01-15
|
||||
|
||||
* [BUGFIX] Azure SD: Fix discovery getting stuck sometimes. #5088
|
||||
* [BUGFIX] Marathon SD: Use `Tasks.Ports` when `RequirePorts` is `false`. #5026
|
||||
* [BUGFIX] Promtool: Fix "out-of-order sample" errors when testing rules. #5069
|
||||
|
||||
## 2.6.0 / 2018-12-17
|
||||
|
||||
* [CHANGE] Remove default flags from the container's entrypoint, run Prometheus from `/etc/prometheus` and symlink the storage directory to `/etc/prometheus/data`. #4976
|
||||
* [CHANGE] Promtool: Remove the `update` command. #3839
|
||||
* [FEATURE] Add JSON log format via the `--log.format` flag. #4876
|
||||
* [FEATURE] API: Add /api/v1/labels endpoint to get all label names. #4835
|
||||
* [FEATURE] Web: Allow setting the page's title via the `--web.ui-title` flag. #4841
|
||||
* [ENHANCEMENT] Add `prometheus_tsdb_lowest_timestamp_seconds`, `prometheus_tsdb_head_min_time_seconds` and `prometheus_tsdb_head_max_time_seconds` metrics. #4888
|
||||
* [ENHANCEMENT] Add `rule_group_last_evaluation_timestamp_seconds` metric. #4852
|
||||
* [ENHANCEMENT] Add `prometheus_template_text_expansion_failures_total` and `prometheus_template_text_expansions_total` metrics. #4747
|
||||
* [ENHANCEMENT] Set consistent User-Agent header in outgoing requests. #4891
|
||||
* [ENHANCEMENT] Azure SD: Error out at load time when authentication parameters are missing. #4907
|
||||
* [ENHANCEMENT] EC2 SD: Add the machine's private DNS name to the discovery metadata. #4693
|
||||
* [ENHANCEMENT] EC2 SD: Add the operating system's platform to the discovery metadata. #4663
|
||||
* [ENHANCEMENT] Kubernetes SD: Add the pod's phase to the discovery metadata. #4824
|
||||
* [ENHANCEMENT] Kubernetes SD: Log Kubernetes messages. #4931
|
||||
* [ENHANCEMENT] Promtool: Collect CPU and trace profiles. #4897
|
||||
* [ENHANCEMENT] Promtool: Support writing output as JSON. #4848
|
||||
* [ENHANCEMENT] Remote Read: Return available data if remote read fails partially. #4832
|
||||
* [ENHANCEMENT] Remote Write: Improve queue performance. #4772
|
||||
* [ENHANCEMENT] Remote Write: Add min_shards parameter to set the minimum number of shards. #4924
|
||||
* [ENHANCEMENT] TSDB: Improve WAL reading. #4953
|
||||
* [ENHANCEMENT] TSDB: Memory improvements. #4953
|
||||
* [ENHANCEMENT] Web: Log stack traces on panic. #4221
|
||||
* [ENHANCEMENT] Web UI: Add copy to clipboard button for configuration. #4410
|
||||
* [ENHANCEMENT] Web UI: Support console queries at specific times. #4764
|
||||
* [ENHANCEMENT] Web UI: group targets by job then instance. #4898 #4806
|
||||
* [BUGFIX] Deduplicate handler labels for HTTP metrics. #4732
|
||||
* [BUGFIX] Fix leaked queriers causing shutdowns to hang. #4922
|
||||
* [BUGFIX] Fix configuration loading panics on nil pointer slice elements. #4942
|
||||
* [BUGFIX] API: Correctly skip mismatching targets on /api/v1/targets/metadata. #4905
|
||||
* [BUGFIX] API: Better rounding for incoming query timestamps. #4941
|
||||
* [BUGFIX] Azure SD: Fix panic. #4867
|
||||
* [BUGFIX] Console templates: Fix hover when the metric has a null value. #4906
|
||||
* [BUGFIX] Discovery: Remove all targets when the scrape configuration gets empty. #4819
|
||||
* [BUGFIX] Marathon SD: Fix leaked connections. #4915
|
||||
* [BUGFIX] Marathon SD: Use 'hostPort' member of portMapping to construct target endpoints. #4887
|
||||
* [BUGFIX] PromQL: Fix a goroutine leak in the lexer/parser. #4858
|
||||
* [BUGFIX] Scrape: Pass through content-type for non-compressed output. #4912
|
||||
* [BUGFIX] Scrape: Fix deadlock in the scrape's manager. #4894
|
||||
* [BUGFIX] Scrape: Scrape targets at fixed intervals even after Prometheus restarts. #4926
|
||||
* [BUGFIX] TSDB: Support restored snapshots including the head properly. #4953
|
||||
* [BUGFIX] TSDB: Repair WAL when the last record in a segment is torn. #4953
|
||||
* [BUGFIX] TSDB: Fix unclosed file readers on Windows systems. #4997
|
||||
* [BUGFIX] Web: Avoid proxy to connect to the local gRPC server. #4572
|
||||
|
||||
## 2.5.0 / 2018-11-06
|
||||
|
||||
* [CHANGE] Group targets by scrape config instead of job name. #4806 #4526
|
||||
|
@ -60,7 +242,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement
|
|||
* [ENHANCEMENT] Send EndsAt along with the alert to Alertmanager #4550
|
||||
* [ENHANCEMENT] Limit the samples returned by remote read endpoint #4532
|
||||
* [ENHANCEMENT] Limit the data read in through remote read #4239
|
||||
* [ENHANCEMENT] Coalesce identical SD configuations #3912
|
||||
* [ENHANCEMENT] Coalesce identical SD configurations #3912
|
||||
* [ENHANCEMENT] `promtool`: Add new commands for debugging and querying #4247 #4308 #4346 #4454
|
||||
* [ENHANCEMENT] Update console examples for node_exporter v0.16.0 #4208
|
||||
* [ENHANCEMENT] Optimize PromQL aggregations #4248
|
||||
|
@ -90,13 +272,13 @@ This release includes multiple bugfixes and features. Further, the WAL implement
|
|||
* [BUGFIX] discovery/kubernetes/ingress: fix scheme discovery #4329
|
||||
* [BUGFIX] Fix race in zookeeper sd #4355
|
||||
* [BUGFIX] Better timeout handling in promql #4291 #4300
|
||||
* [BUGFIX] Propogate errors when selecting series from the tsdb #4136
|
||||
* [BUGFIX] Propagate errors when selecting series from the tsdb #4136
|
||||
|
||||
## 2.3.1 / 2018-06-19
|
||||
|
||||
* [BUGFIX] Avoid infinite loop on duplicate NaN values. #4275
|
||||
* [BUGFIX] Fix nil pointer deference when using various API endpoints #4282
|
||||
* [BUGFIX] config: set target group source index during unmarshalling #4245
|
||||
* [BUGFIX] config: set target group source index during unmarshaling #4245
|
||||
* [BUGFIX] discovery/file: fix logging #4178
|
||||
* [BUGFIX] kubernetes_sd: fix namespace filtering #4285
|
||||
* [BUGFIX] web: restore old path prefix behavior #4273
|
||||
|
@ -110,7 +292,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement
|
|||
* [FEATURE] Add security headers to HTTP server responses
|
||||
* [FEATURE] Pass query hints via remote read API
|
||||
* [FEATURE] Basic auth passwords can now be configured via file across all configuration
|
||||
* [ENHANCEMENT] Optimise PromQL and API serialization for memory usage and allocations
|
||||
* [ENHANCEMENT] Optimize PromQL and API serialization for memory usage and allocations
|
||||
* [ENHANCEMENT] Limit number of dropped targets in web UI
|
||||
* [ENHANCEMENT] Consul and EC2 service discovery allow using server-side filtering for performance improvement
|
||||
* [ENHANCEMENT] Add advanced filtering configuration to EC2 service discovery
|
||||
|
@ -133,7 +315,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement
|
|||
|
||||
* [BUGFIX] Fix data loss in TSDB on compaction
|
||||
* [BUGFIX] Correctly stop timer in remote-write path
|
||||
* [BUGFIX] Fix deadlock triggerd by loading targets page
|
||||
* [BUGFIX] Fix deadlock triggered by loading targets page
|
||||
* [BUGFIX] Fix incorrect buffering of samples on range selection queries
|
||||
* [BUGFIX] Handle large index files on windows properly
|
||||
|
||||
|
@ -526,7 +708,7 @@ This is a breaking change to the Kubernetes service discovery.
|
|||
* [ENHANCEMENT] Message on empty Alerts page.
|
||||
* [ENHANCEMENT] Various internal code refactorings and clean-ups.
|
||||
* [ENHANCEMENT] Various improvements in the build system.
|
||||
* [BUGFIX] Catch errors when unmarshalling delta/doubleDelta encoded chunks.
|
||||
* [BUGFIX] Catch errors when unmarshaling delta/doubleDelta encoded chunks.
|
||||
* [BUGFIX] Fix data race in lexer and lexer test.
|
||||
* [BUGFIX] Trim stray whitespace from bearer token file.
|
||||
* [BUGFIX] Avoid divide-by-zero panic on query_range?step=0.
|
||||
|
@ -1118,7 +1300,7 @@ All changes:
|
|||
from embedding into the binary. Those files are only used for debugging,
|
||||
and then you can use -web.use-local-assets. By including fewer files, the
|
||||
RAM usage during compilation is much more manageable.
|
||||
* [ENHANCEMENT] Help link points to http://prometheus.github.io now.
|
||||
* [ENHANCEMENT] Help link points to https://prometheus.github.io now.
|
||||
* [FEATURE] Consoles for haproxy and cloudwatch.
|
||||
* [BUGFIX] Several fixes to graphs in consoles.
|
||||
* [CLEANUP] Removed a file size check that did not check anything.
|
||||
|
@ -1211,4 +1393,4 @@ All changes:
|
|||
* [BUGFIX] Built from Go 1.2.1, which has internal fixes to race conditions in garbage collection handling.
|
||||
* [ENHANCEMENT] Internal storage interface refactoring that allows building e.g. the `rule_checker` tool without LevelDB dynamic library dependencies.
|
||||
* [ENHANCEMENT] Cleanups around shutdown handling.
|
||||
* [PERFORMANCE] Preparations for better memory reuse during marshalling / unmarshalling.
|
||||
* [PERFORMANCE] Preparations for better memory reuse during marshaling / unmarshaling.
|
||||
|
|
|
@ -17,7 +17,7 @@ Prometheus uses GitHub to manage reviews of pull requests.
|
|||
Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments)
|
||||
and the _Formatting and style_ section of Peter Bourgon's [Go: Best
|
||||
Practices for Production
|
||||
Environments](http://peter.bourgon.org/go-in-production/#formatting-and-style).
|
||||
Environments](https://peter.bourgon.org/go-in-production/#formatting-and-style).
|
||||
|
||||
* Be sure to sign off on the [DCO](https://github.com/probot/dco#how-it-works)
|
||||
|
||||
|
@ -40,7 +40,9 @@ go build ./cmd/prometheus/
|
|||
make test # Make sure all the tests pass before you commit and push :)
|
||||
```
|
||||
|
||||
All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labelling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions).
|
||||
We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action.
|
||||
|
||||
All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions).
|
||||
|
||||
## Pull Request Checklist
|
||||
|
||||
|
@ -54,7 +56,7 @@ All our issues are regularly tagged so that you can also filter down the issues
|
|||
|
||||
## Dependency management
|
||||
|
||||
The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.11 or greater installed.
|
||||
The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.12 or greater installed.
|
||||
|
||||
All dependencies are vendored in the `vendor/` directory.
|
||||
|
||||
|
|
18
Dockerfile
18
Dockerfile
|
@ -1,8 +1,12 @@
|
|||
FROM quay.io/prometheus/busybox:latest
|
||||
ARG ARCH="amd64"
|
||||
ARG OS="linux"
|
||||
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
|
||||
LABEL maintainer="The Prometheus Authors <prometheus-developers@googlegroups.com>"
|
||||
|
||||
COPY prometheus /bin/prometheus
|
||||
COPY promtool /bin/promtool
|
||||
ARG ARCH="amd64"
|
||||
ARG OS="linux"
|
||||
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
|
||||
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
|
||||
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
|
||||
COPY console_libraries/ /usr/share/prometheus/console_libraries/
|
||||
COPY consoles/ /usr/share/prometheus/consoles/
|
||||
|
@ -15,8 +19,8 @@ USER nobody
|
|||
EXPOSE 9090
|
||||
VOLUME [ "/prometheus" ]
|
||||
WORKDIR /prometheus
|
||||
ENTRYPOINT [ "/bin/prometheus", \
|
||||
ENTRYPOINT [ "/bin/prometheus" ]
|
||||
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
|
||||
"--storage.tsdb.path=/prometheus", \
|
||||
"--web.console.libraries=/etc/prometheus/console_libraries", \
|
||||
"--web.console.templates=/etc/prometheus/consoles", \
|
||||
"--config.file=/etc/prometheus/prometheus.yml" ]
|
||||
"--web.console.libraries=/usr/share/prometheus/console_libraries", \
|
||||
"--web.console.templates=/usr/share/prometheus/consoles" ]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
Maintainers of this repository with their focus areas:
|
||||
|
||||
* Brian Brazil <brian.brazil@robustperception.io> @brian-brazil: Console templates; semantics of PromQL, service discovery, and relabeling.
|
||||
* Fabian Reinartz <fabian.reinartz@coreos.com> @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery.
|
||||
* Fabian Reinartz <freinartz@google.com> @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery.
|
||||
* Julius Volz <julius.volz@gmail.com> @juliusv: Remote storage integrations; web UI.
|
||||
|
||||
|
|
17
Makefile
17
Makefile
|
@ -11,24 +11,13 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Needs to be defined before including Makefile.common to auto-generate targets
|
||||
DOCKER_ARCHS ?= amd64 armv7 arm64
|
||||
|
||||
include Makefile.common
|
||||
|
||||
STATICCHECK_IGNORE = \
|
||||
github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:SA1019 \
|
||||
github.com/prometheus/prometheus/discovery/kubernetes/node.go:SA1019 \
|
||||
github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/main.go:SA1019 \
|
||||
github.com/prometheus/prometheus/pkg/textparse/promlex.l.go:SA4006 \
|
||||
github.com/prometheus/prometheus/pkg/textparse/openmetricslex.l.go:SA4006 \
|
||||
github.com/prometheus/prometheus/pkg/pool/pool.go:SA6002 \
|
||||
github.com/prometheus/prometheus/promql/engine.go:SA6002 \
|
||||
github.com/prometheus/prometheus/prompb/rpc.pb.gw.go:SA1019
|
||||
|
||||
DOCKER_IMAGE_NAME ?= prometheus
|
||||
|
||||
# Go modules needs the bzr binary because of the dependency on launchpad.net/gocheck.
|
||||
$(eval $(call PRECHECK_COMMAND_template,bzr))
|
||||
PRECHECK_OPTIONS_bzr = version
|
||||
|
||||
.PHONY: assets
|
||||
assets:
|
||||
@echo ">> writing assets"
|
||||
|
|
142
Makefile.common
142
Makefile.common
|
@ -29,12 +29,15 @@ GO ?= go
|
|||
GOFMT ?= $(GO)fmt
|
||||
FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH)))
|
||||
GOOPTS ?=
|
||||
GOHOSTOS ?= $(shell $(GO) env GOHOSTOS)
|
||||
GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH)
|
||||
|
||||
GO_VERSION ?= $(shell $(GO) version)
|
||||
GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION))
|
||||
PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.')
|
||||
|
||||
unexport GOVENDOR
|
||||
GOVENDOR :=
|
||||
GO111MODULE :=
|
||||
ifeq (, $(PRE_GO_111))
|
||||
ifneq (,$(wildcard go.mod))
|
||||
# Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI).
|
||||
|
@ -55,32 +58,58 @@ $(warning Some recipes may not work as expected as the current Go runtime is '$(
|
|||
# This repository isn't using Go modules (yet).
|
||||
GOVENDOR := $(FIRST_GOPATH)/bin/govendor
|
||||
endif
|
||||
|
||||
unexport GO111MODULE
|
||||
endif
|
||||
PROMU := $(FIRST_GOPATH)/bin/promu
|
||||
STATICCHECK := $(FIRST_GOPATH)/bin/staticcheck
|
||||
pkgs = ./...
|
||||
|
||||
GO_VERSION ?= $(shell $(GO) version)
|
||||
GO_BUILD_PLATFORM ?= $(subst /,-,$(lastword $(GO_VERSION)))
|
||||
ifeq (arm, $(GOHOSTARCH))
|
||||
GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM)
|
||||
GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM)
|
||||
else
|
||||
GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)
|
||||
endif
|
||||
|
||||
PROMU_VERSION ?= 0.2.0
|
||||
PROMU_VERSION ?= 0.4.0
|
||||
PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz
|
||||
|
||||
GOLANGCI_LINT :=
|
||||
GOLANGCI_LINT_OPTS ?=
|
||||
GOLANGCI_LINT_VERSION ?= v1.16.0
|
||||
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
|
||||
# windows isn't included here because of the path separator being different.
|
||||
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
|
||||
ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386))
|
||||
GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint
|
||||
endif
|
||||
endif
|
||||
|
||||
PREFIX ?= $(shell pwd)
|
||||
BIN_DIR ?= $(shell pwd)
|
||||
DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))
|
||||
DOCKERFILE_PATH ?= ./
|
||||
DOCKER_REPO ?= prom
|
||||
|
||||
.PHONY: all
|
||||
all: precheck style staticcheck unused build test
|
||||
DOCKER_ARCHS ?= amd64
|
||||
|
||||
BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS))
|
||||
PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS))
|
||||
TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS))
|
||||
|
||||
ifeq ($(GOHOSTARCH),amd64)
|
||||
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows))
|
||||
# Only supported on amd64
|
||||
test-flags := -race
|
||||
endif
|
||||
endif
|
||||
|
||||
# This rule is used to forward a target like "build" to "common-build". This
|
||||
# allows a new "build" target to be defined in a Makefile which includes this
|
||||
# one and override "common-build" without override warnings.
|
||||
%: common-% ;
|
||||
|
||||
.PHONY: common-all
|
||||
common-all: precheck style check_license lint unused build test
|
||||
|
||||
.PHONY: common-style
|
||||
common-style:
|
||||
@echo ">> checking code style"
|
||||
|
@ -102,6 +131,15 @@ common-check_license:
|
|||
exit 1; \
|
||||
fi
|
||||
|
||||
.PHONY: common-deps
|
||||
common-deps:
|
||||
@echo ">> getting dependencies"
|
||||
ifdef GO111MODULE
|
||||
GO111MODULE=$(GO111MODULE) $(GO) mod download
|
||||
else
|
||||
$(GO) get $(GOOPTS) -t ./...
|
||||
endif
|
||||
|
||||
.PHONY: common-test-short
|
||||
common-test-short:
|
||||
@echo ">> running short tests"
|
||||
|
@ -110,26 +148,35 @@ common-test-short:
|
|||
.PHONY: common-test
|
||||
common-test:
|
||||
@echo ">> running all tests"
|
||||
GO111MODULE=$(GO111MODULE) $(GO) test -race $(GOOPTS) $(pkgs)
|
||||
GO111MODULE=$(GO111MODULE) $(GO) test $(test-flags) $(GOOPTS) $(pkgs)
|
||||
|
||||
.PHONY: common-format
|
||||
common-format:
|
||||
@echo ">> formatting code"
|
||||
GO111MODULE=$(GO111MODULE) $(GO) fmt $(GOOPTS) $(pkgs)
|
||||
GO111MODULE=$(GO111MODULE) $(GO) fmt $(pkgs)
|
||||
|
||||
.PHONY: common-vet
|
||||
common-vet:
|
||||
@echo ">> vetting code"
|
||||
GO111MODULE=$(GO111MODULE) $(GO) vet $(GOOPTS) $(pkgs)
|
||||
|
||||
.PHONY: common-staticcheck
|
||||
common-staticcheck: $(STATICCHECK)
|
||||
@echo ">> running staticcheck"
|
||||
.PHONY: common-lint
|
||||
common-lint: $(GOLANGCI_LINT)
|
||||
ifdef GOLANGCI_LINT
|
||||
@echo ">> running golangci-lint"
|
||||
ifdef GO111MODULE
|
||||
GO111MODULE=$(GO111MODULE) $(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" -checks "SA*" $(pkgs)
|
||||
# 'go list' needs to be executed before staticcheck to prepopulate the modules cache.
|
||||
# Otherwise staticcheck might fail randomly for some reason not yet explained.
|
||||
GO111MODULE=$(GO111MODULE) $(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null
|
||||
GO111MODULE=$(GO111MODULE) $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs)
|
||||
else
|
||||
$(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" $(pkgs)
|
||||
$(GOLANGCI_LINT) run $(pkgs)
|
||||
endif
|
||||
endif
|
||||
|
||||
# For backward-compatibility.
|
||||
.PHONY: common-staticcheck
|
||||
common-staticcheck: lint
|
||||
|
||||
.PHONY: common-unused
|
||||
common-unused: $(GOVENDOR)
|
||||
|
@ -140,8 +187,9 @@ else
|
|||
ifdef GO111MODULE
|
||||
@echo ">> running check for unused/missing packages in go.mod"
|
||||
GO111MODULE=$(GO111MODULE) $(GO) mod tidy
|
||||
ifeq (,$(wildcard vendor))
|
||||
@git diff --exit-code -- go.sum go.mod
|
||||
ifneq (,$(wildcard vendor))
|
||||
else
|
||||
@echo ">> running check for unused packages in vendor/"
|
||||
GO111MODULE=$(GO111MODULE) $(GO) mod vendor
|
||||
@git diff --exit-code -- go.sum go.mod vendor/
|
||||
|
@ -159,45 +207,50 @@ common-tarball: promu
|
|||
@echo ">> building release tarball"
|
||||
$(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR)
|
||||
|
||||
.PHONY: common-docker
|
||||
common-docker:
|
||||
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" .
|
||||
.PHONY: common-docker $(BUILD_DOCKER_ARCHS)
|
||||
common-docker: $(BUILD_DOCKER_ARCHS)
|
||||
$(BUILD_DOCKER_ARCHS): common-docker-%:
|
||||
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" \
|
||||
--build-arg ARCH="$*" \
|
||||
--build-arg OS="linux" \
|
||||
$(DOCKERFILE_PATH)
|
||||
|
||||
.PHONY: common-docker-publish
|
||||
common-docker-publish:
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)"
|
||||
.PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS)
|
||||
common-docker-publish: $(PUBLISH_DOCKER_ARCHS)
|
||||
$(PUBLISH_DOCKER_ARCHS): common-docker-publish-%:
|
||||
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)"
|
||||
|
||||
.PHONY: common-docker-tag-latest
|
||||
common-docker-tag-latest:
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):latest"
|
||||
.PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS)
|
||||
common-docker-tag-latest: $(TAG_DOCKER_ARCHS)
|
||||
$(TAG_DOCKER_ARCHS): common-docker-tag-latest-%:
|
||||
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"
|
||||
|
||||
.PHONY: common-docker-manifest
|
||||
common-docker-manifest:
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(DOCKER_IMAGE_TAG))
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)"
|
||||
|
||||
.PHONY: promu
|
||||
promu: $(PROMU)
|
||||
|
||||
$(PROMU):
|
||||
curl -s -L $(PROMU_URL) | tar -xvz -C /tmp
|
||||
mkdir -v -p $(FIRST_GOPATH)/bin
|
||||
cp -v /tmp/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(PROMU)
|
||||
$(eval PROMU_TMP := $(shell mktemp -d))
|
||||
curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP)
|
||||
mkdir -p $(FIRST_GOPATH)/bin
|
||||
cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu
|
||||
rm -r $(PROMU_TMP)
|
||||
|
||||
.PHONY: proto
|
||||
proto:
|
||||
@echo ">> generating code from proto files"
|
||||
@./scripts/genproto.sh
|
||||
|
||||
.PHONY: $(STATICCHECK)
|
||||
$(STATICCHECK):
|
||||
ifdef GO111MODULE
|
||||
# Get staticcheck from a temporary directory to avoid modifying the local go.{mod,sum}.
|
||||
# See https://github.com/golang/go/issues/27643.
|
||||
# For now, we are using the next branch of staticcheck because master isn't compatible yet with Go modules.
|
||||
tmpModule=$$(mktemp -d 2>&1) && \
|
||||
mkdir -p $${tmpModule}/staticcheck && \
|
||||
cd "$${tmpModule}"/staticcheck && \
|
||||
GO111MODULE=on $(GO) mod init example.com/staticcheck && \
|
||||
GO111MODULE=on GOOS= GOARCH= $(GO) get -u honnef.co/go/tools/cmd/staticcheck@next && \
|
||||
rm -rf $${tmpModule};
|
||||
else
|
||||
GOOS= GOARCH= GO111MODULE=off $(GO) get -u honnef.co/go/tools/cmd/staticcheck
|
||||
ifdef GOLANGCI_LINT
|
||||
$(GOLANGCI_LINT):
|
||||
mkdir -p $(FIRST_GOPATH)/bin
|
||||
curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/$(GOLANGCI_LINT_VERSION)/install.sh \
|
||||
| sed -e '/install -d/d' \
|
||||
| sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION)
|
||||
endif
|
||||
|
||||
ifdef GOVENDOR
|
||||
|
@ -212,9 +265,8 @@ precheck::
|
|||
define PRECHECK_COMMAND_template =
|
||||
precheck:: $(1)_precheck
|
||||
|
||||
|
||||
PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1)))
|
||||
.PHONE: $(1)_precheck
|
||||
.PHONY: $(1)_precheck
|
||||
$(1)_precheck:
|
||||
@if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \
|
||||
echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \
|
||||
|
|
8
NOTICE
8
NOTICE
|
@ -2,13 +2,13 @@ The Prometheus systems and service monitoring server
|
|||
Copyright 2012-2015 The Prometheus Authors
|
||||
|
||||
This product includes software developed at
|
||||
SoundCloud Ltd. (http://soundcloud.com/).
|
||||
SoundCloud Ltd. (https://soundcloud.com/).
|
||||
|
||||
|
||||
The following components are included in this product:
|
||||
|
||||
Bootstrap
|
||||
http://getbootstrap.com
|
||||
https://getbootstrap.com
|
||||
Copyright 2011-2014 Twitter, Inc.
|
||||
Licensed under the MIT License
|
||||
|
||||
|
@ -52,7 +52,7 @@ Copyright jQuery Foundation and other contributors
|
|||
Licensed under the MIT License
|
||||
|
||||
Protocol Buffers for Go with Gadgets
|
||||
http://github.com/gogo/protobuf/
|
||||
https://github.com/gogo/protobuf/
|
||||
Copyright (c) 2013, The GoGo Authors.
|
||||
See source code for license details.
|
||||
|
||||
|
@ -67,7 +67,7 @@ Copyright 2013 Matt T. Proud
|
|||
Licensed under the Apache License, Version 2.0
|
||||
|
||||
DNS library in Go
|
||||
http://miek.nl/posts/2014/Aug/16/go-dns-package/
|
||||
https://miek.nl/2014/august/16/go-dns-package/
|
||||
Copyright 2009 The Go Authors, 2011 Miek Gieben
|
||||
See https://github.com/miekg/dns/blob/master/LICENSE for license details.
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
# Prometheus [][travis]
|
||||
# Prometheus
|
||||
|
||||
[][travis]
|
||||
[][circleci]
|
||||
[][quay]
|
||||
[][hub]
|
||||
|
@ -57,7 +58,7 @@ Prometheus will now be reachable at http://localhost:9090/.
|
|||
### Building from source
|
||||
|
||||
To build Prometheus from the source code yourself you need to have a working
|
||||
Go environment with [version 1.11 or greater installed](http://golang.org/doc/install).
|
||||
Go environment with [version 1.12 or greater installed](https://golang.org/doc/install).
|
||||
|
||||
You can directly use the `go` tool to download and install the `prometheus`
|
||||
and `promtool` binaries into your `GOPATH`:
|
||||
|
@ -86,7 +87,7 @@ The Makefile provides several targets:
|
|||
|
||||
## More information
|
||||
|
||||
* The source code is periodically indexed: [Prometheus Core](http://godoc.org/github.com/prometheus/prometheus).
|
||||
* The source code is periodically indexed: [Prometheus Core](https://godoc.org/github.com/prometheus/prometheus).
|
||||
* You will find a Travis CI configuration in `.travis.yml`.
|
||||
* See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels.
|
||||
|
||||
|
|
35
RELEASE.md
35
RELEASE.md
|
@ -1,28 +1,35 @@
|
|||
# Releases
|
||||
|
||||
This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release schepherds. Release shepards are chosen on a voluntary basis.
|
||||
This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release shepherd. Release shepherds are chosen on a voluntary basis.
|
||||
|
||||
## Release schedule
|
||||
|
||||
Release cadence of first pre-releases being cut is 6 weeks.
|
||||
|
||||
| release series | date of first pre-release (year-month-day) | release shepard |
|
||||
| release series | date of first pre-release (year-month-day) | release shepherd |
|
||||
|----------------|--------------------------------------------|---------------------------------------------|
|
||||
| v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) |
|
||||
| v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) |
|
||||
| v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) |
|
||||
| v2.7 | 2019-01-16 | **searching for volunteer** |
|
||||
| v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) |
|
||||
| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) |
|
||||
| v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) |
|
||||
| v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) |
|
||||
| v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) |
|
||||
| v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) |
|
||||
| v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) |
|
||||
| v2.14 | 2019-11-06 | **searching for volunteer** |
|
||||
|
||||
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
|
||||
|
||||
## Release shepard responsibilities
|
||||
## Release shepherd responsibilities
|
||||
|
||||
The release shepard is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process starts with the initial pre-release.
|
||||
The release shepherd is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process formally starts with the initial pre-release, but some preparations should be done a few days in advance.
|
||||
|
||||
* The first pre-release is scheduled according to the above schedule.
|
||||
* With the pre-release the release shepard is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release.
|
||||
* Once a pre-release has been released, the `master` branch of the repository is frozen for any feature work, only critical bug fix work concerning the minor release is merged.
|
||||
* Pre-releases are done from `master`, after pre-releases are promoted to the stable release a `release-major.minor` branch is created.
|
||||
* We aim to keep the master branch in a working state at all times. In principle, it should be possible to cut a release from master at any time. In practice, things might not work out as nicely. A few days before the pre-release is scheduled, the shepherd should check the state of master. Following their best judgement, the shepherd should try to expedite bug fixes that are still in progress but should make it into the release. On the other hand, the shepherd may hold back merging last-minute invasive and risky changes that are better suited for the next minor release.
|
||||
* On the date listed in the table above, the release shepherd cuts the first pre-release (using the suffix `-rc.0`) and creates a new branch called `release-<major>.<minor>` starting at the commit tagged for the pre-release. In general, a pre-release is considered a release candidate (that's what `rc` stands for) and should therefore not contain any known bugs that are planned to be fixed in the final release.
|
||||
* With the pre-release, the release shepherd is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release.
|
||||
* If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.).
|
||||
|
||||
See the next section for details on cutting an individual release.
|
||||
|
||||
|
@ -32,13 +39,13 @@ These instructions are currently valid for the Prometheus server, i.e. the [prom
|
|||
|
||||
### Branch management and versioning strategy
|
||||
|
||||
We use [Semantic Versioning](http://semver.org/).
|
||||
We use [Semantic Versioning](https://semver.org/).
|
||||
|
||||
We maintain a separate branch for each minor release, named `release-<major>.<minor>`, e.g. `release-1.1`, `release-2.0`.
|
||||
|
||||
The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. Whether merging master back into a release branch makes more sense is left up to the shepard's judgement.
|
||||
The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. As long as master hasn't deviated from the release branch, new commits can also go to master, followed by merging master back into the release branch.
|
||||
|
||||
If a bug fix got accidentally merged into master, cherry-pick commits have to be created in the latest release branch, which then have to be merged back into master. Try to avoid that situation.
|
||||
If a bug fix got accidentally merged into master after non-bug-fix changes in master, the bug-fix commits have to be cherry-picked into the release branch, which then have to be merged back into master. Try to avoid that situation.
|
||||
|
||||
Maintaining the release branches for older minor releases happens on a best effort basis.
|
||||
|
||||
|
@ -88,13 +95,13 @@ If the release has happened in the latest release branch, merge the changes into
|
|||
|
||||
To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration.
|
||||
|
||||
Once the binaries have been uploaded, announce the release on `prometheus-users@googlegroups.com`. Start the subject with `[ANN]`. Check out previous announcement mails for inspiration.
|
||||
Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration.
|
||||
|
||||
### Pre-releases
|
||||
|
||||
The following changes to the above procedures apply:
|
||||
|
||||
* In line with [Semantic Versioning](http://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.).
|
||||
* In line with [Semantic Versioning](https://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.).
|
||||
* Tick the _This is a pre-release_ box when drafting the release in the Github UI.
|
||||
* Still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update.
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
|
@ -34,22 +35,23 @@ import (
|
|||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/oklog/oklog/pkg/group"
|
||||
conntrack "github.com/mwitkow/go-conntrack"
|
||||
"github.com/oklog/run"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/version"
|
||||
prom_runtime "github.com/prometheus/prometheus/pkg/runtime"
|
||||
"gopkg.in/alecthomas/kingpin.v2"
|
||||
k8s_runtime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
|
||||
"github.com/mwitkow/go-conntrack"
|
||||
"github.com/prometheus/common/promlog"
|
||||
"github.com/prometheus/common/version"
|
||||
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
||||
"k8s.io/klog"
|
||||
|
||||
promlogflag "github.com/prometheus/common/promlog/flag"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/discovery"
|
||||
sd_config "github.com/prometheus/prometheus/discovery/config"
|
||||
"github.com/prometheus/prometheus/notifier"
|
||||
"github.com/prometheus/prometheus/pkg/relabel"
|
||||
prom_runtime "github.com/prometheus/prometheus/pkg/runtime"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/rules"
|
||||
"github.com/prometheus/prometheus/scrape"
|
||||
|
@ -69,10 +71,19 @@ var (
|
|||
Name: "prometheus_config_last_reload_success_timestamp_seconds",
|
||||
Help: "Timestamp of the last successful configuration reload.",
|
||||
})
|
||||
|
||||
defaultRetentionString = "15d"
|
||||
defaultRetentionDuration model.Duration
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(version.NewCollector("prometheus"))
|
||||
|
||||
var err error
|
||||
defaultRetentionDuration, err = model.ParseDuration(defaultRetentionString)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
@ -81,6 +92,11 @@ func main() {
|
|||
runtime.SetMutexProfileFraction(20)
|
||||
}
|
||||
|
||||
var (
|
||||
oldFlagRetentionDuration model.Duration
|
||||
newFlagRetentionDuration model.Duration
|
||||
)
|
||||
|
||||
cfg := struct {
|
||||
configFile string
|
||||
|
||||
|
@ -99,13 +115,15 @@ func main() {
|
|||
queryMaxSamples int
|
||||
RemoteFlushDeadline model.Duration
|
||||
|
||||
prometheusURL string
|
||||
prometheusURL string
|
||||
corsRegexString string
|
||||
|
||||
logLevel promlog.AllowedLevel
|
||||
promlogConfig promlog.Config
|
||||
}{
|
||||
notifier: notifier.Options{
|
||||
Registerer: prometheus.DefaultRegisterer,
|
||||
},
|
||||
promlogConfig: promlog.Config{},
|
||||
}
|
||||
|
||||
a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server")
|
||||
|
@ -150,6 +168,12 @@ func main() {
|
|||
a.Flag("web.console.libraries", "Path to the console library directory.").
|
||||
Default("console_libraries").StringVar(&cfg.web.ConsoleLibrariesPath)
|
||||
|
||||
a.Flag("web.page-title", "Document title of Prometheus instance.").
|
||||
Default("Prometheus Time Series Collection and Processing Server").StringVar(&cfg.web.PageTitle)
|
||||
|
||||
a.Flag("web.cors.origin", `Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1|domain2)\.com'`).
|
||||
Default(".*").StringVar(&cfg.corsRegexString)
|
||||
|
||||
a.Flag("storage.tsdb.path", "Base path for metrics storage.").
|
||||
Default("data/").StringVar(&cfg.localStoragePath)
|
||||
|
||||
|
@ -157,15 +181,28 @@ func main() {
|
|||
Hidden().Default("2h").SetValue(&cfg.tsdb.MinBlockDuration)
|
||||
|
||||
a.Flag("storage.tsdb.max-block-duration",
|
||||
"Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period).").
|
||||
"Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period.)").
|
||||
Hidden().PlaceHolder("<duration>").SetValue(&cfg.tsdb.MaxBlockDuration)
|
||||
|
||||
a.Flag("storage.tsdb.retention", "How long to retain samples in storage.").
|
||||
Default("15d").SetValue(&cfg.tsdb.Retention)
|
||||
a.Flag("storage.tsdb.wal-segment-size",
|
||||
"Size at which to split the tsdb WAL segment files. Example: 100MB").
|
||||
Hidden().PlaceHolder("<bytes>").BytesVar(&cfg.tsdb.WALSegmentSize)
|
||||
|
||||
a.Flag("storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead.").
|
||||
SetValue(&oldFlagRetentionDuration)
|
||||
|
||||
a.Flag("storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+".").
|
||||
SetValue(&newFlagRetentionDuration)
|
||||
|
||||
a.Flag("storage.tsdb.retention.size", "[EXPERIMENTAL] Maximum number of bytes that can be stored for blocks. Units supported: KB, MB, GB, TB, PB. This flag is experimental and can be changed in future releases.").
|
||||
BytesVar(&cfg.tsdb.MaxBytes)
|
||||
|
||||
a.Flag("storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
|
||||
Default("false").BoolVar(&cfg.tsdb.NoLockfile)
|
||||
|
||||
a.Flag("storage.tsdb.allow-overlapping-blocks", "[EXPERIMENTAL] Allow overlapping blocks, which in turn enables vertical compaction and vertical query merge.").
|
||||
Default("false").BoolVar(&cfg.tsdb.AllowOverlappingBlocks)
|
||||
|
||||
a.Flag("storage.remote.flush-deadline", "How long to wait flushing sample on shutdown or config reload.").
|
||||
Default("1m").PlaceHolder("<duration>").SetValue(&cfg.RemoteFlushDeadline)
|
||||
|
||||
|
@ -175,10 +212,10 @@ func main() {
|
|||
a.Flag("storage.remote.read-concurrent-limit", "Maximum number of concurrent remote read calls. 0 means no limit.").
|
||||
Default("10").IntVar(&cfg.web.RemoteReadConcurrencyLimit)
|
||||
|
||||
a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring 'for' state of alert.").
|
||||
a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring \"for\" state of alert.").
|
||||
Default("1h").SetValue(&cfg.outageTolerance)
|
||||
|
||||
a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored 'for' state. This is maintained only for alerts with configured 'for' time greater than grace period.").
|
||||
a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored \"for\" state. This is maintained only for alerts with configured \"for\" time greater than grace period.").
|
||||
Default("10m").SetValue(&cfg.forGracePeriod)
|
||||
|
||||
a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager.").
|
||||
|
@ -190,7 +227,7 @@ func main() {
|
|||
a.Flag("alertmanager.timeout", "Timeout for sending alerts to Alertmanager.").
|
||||
Default("10s").SetValue(&cfg.notifierTimeout)
|
||||
|
||||
a.Flag("query.lookback-delta", "The delta difference allowed for retrieving metrics during expression evaluations.").
|
||||
a.Flag("query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations.").
|
||||
Default("5m").SetValue(&cfg.lookbackDelta)
|
||||
|
||||
a.Flag("query.timeout", "Maximum time a query may take before being aborted.").
|
||||
|
@ -198,10 +235,11 @@ func main() {
|
|||
|
||||
a.Flag("query.max-concurrency", "Maximum number of queries executed concurrently.").
|
||||
Default("20").IntVar(&cfg.queryConcurrency)
|
||||
a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they would load more samples than this into memory, so this also limits the number of samples a query can return.").
|
||||
|
||||
a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return.").
|
||||
Default("50000000").IntVar(&cfg.queryMaxSamples)
|
||||
|
||||
promlogflag.AddFlags(a, &cfg.logLevel)
|
||||
promlogflag.AddFlags(a, &cfg.promlogConfig)
|
||||
|
||||
_, err := a.Parse(os.Args[1:])
|
||||
if err != nil {
|
||||
|
@ -210,12 +248,20 @@ func main() {
|
|||
os.Exit(2)
|
||||
}
|
||||
|
||||
logger := promlog.New(&cfg.promlogConfig)
|
||||
|
||||
cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, errors.Wrapf(err, "parse external URL %q", cfg.prometheusURL))
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
cfg.web.CORSOrigin, err = compileCORSRegexString(cfg.corsRegexString)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, errors.Wrapf(err, "could not compile CORS regex string %q", cfg.corsRegexString))
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
cfg.web.ReadTimeout = time.Duration(cfg.webTimeout)
|
||||
// Default -web.route-prefix to path of -web.external-url.
|
||||
if cfg.web.RoutePrefix == "" {
|
||||
|
@ -224,22 +270,54 @@ func main() {
|
|||
// RoutePrefix must always be at least '/'.
|
||||
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
|
||||
|
||||
if cfg.tsdb.MaxBlockDuration == 0 {
|
||||
cfg.tsdb.MaxBlockDuration = cfg.tsdb.Retention / 10
|
||||
{ // Time retention settings.
|
||||
if oldFlagRetentionDuration != 0 {
|
||||
level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.")
|
||||
cfg.tsdb.RetentionDuration = oldFlagRetentionDuration
|
||||
}
|
||||
|
||||
// When the new flag is set it takes precedence.
|
||||
if newFlagRetentionDuration != 0 {
|
||||
cfg.tsdb.RetentionDuration = newFlagRetentionDuration
|
||||
}
|
||||
|
||||
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
|
||||
cfg.tsdb.RetentionDuration = defaultRetentionDuration
|
||||
level.Info(logger).Log("msg", "no time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
|
||||
}
|
||||
|
||||
// Check for overflows. This limits our max retention to 100y.
|
||||
if cfg.tsdb.RetentionDuration < 0 {
|
||||
y, err := model.ParseDuration("100y")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
cfg.tsdb.RetentionDuration = y
|
||||
level.Warn(logger).Log("msg", "time retention value is too high. Limiting to: "+y.String())
|
||||
}
|
||||
}
|
||||
|
||||
{ // Max block size settings.
|
||||
if cfg.tsdb.MaxBlockDuration == 0 {
|
||||
maxBlockDuration, err := model.ParseDuration("31d")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// When the time retention is set and not too big use to define the max block duration.
|
||||
if cfg.tsdb.RetentionDuration != 0 && cfg.tsdb.RetentionDuration/10 < maxBlockDuration {
|
||||
maxBlockDuration = cfg.tsdb.RetentionDuration / 10
|
||||
}
|
||||
|
||||
cfg.tsdb.MaxBlockDuration = maxBlockDuration
|
||||
}
|
||||
}
|
||||
|
||||
promql.LookbackDelta = time.Duration(cfg.lookbackDelta)
|
||||
promql.SetDefaultEvaluationInterval(time.Duration(config.DefaultGlobalConfig.EvaluationInterval))
|
||||
|
||||
logger := promlog.New(cfg.logLevel)
|
||||
|
||||
// XXX(fabxc): Kubernetes does background logging which we can only customize by modifying
|
||||
// a global variable.
|
||||
// Ultimately, here is the best place to set it.
|
||||
k8s_runtime.ErrorHandlers = []func(error){
|
||||
func(err error) {
|
||||
level.Error(log.With(logger, "component", "k8s_client_runtime")).Log("err", err)
|
||||
},
|
||||
}
|
||||
// Above level 6, the k8s client would log bearer tokens in clear-text.
|
||||
klog.ClampLevel(6)
|
||||
klog.SetLogger(log.With(logger, "component", "k8s_client_runtime"))
|
||||
|
||||
level.Info(logger).Log("msg", "Starting Prometheus", "version", version.Info())
|
||||
level.Info(logger).Log("build_context", version.BuildContext())
|
||||
|
@ -249,7 +327,7 @@ func main() {
|
|||
|
||||
var (
|
||||
localStorage = &tsdb.ReadyStorage{}
|
||||
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), localStorage.StartTime, time.Duration(cfg.RemoteFlushDeadline))
|
||||
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, cfg.localStoragePath, time.Duration(cfg.RemoteFlushDeadline))
|
||||
fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage)
|
||||
)
|
||||
|
||||
|
@ -257,7 +335,7 @@ func main() {
|
|||
ctxWeb, cancelWeb = context.WithCancel(context.Background())
|
||||
ctxRule = context.Background()
|
||||
|
||||
notifier = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier"))
|
||||
notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier"))
|
||||
|
||||
ctxScrape, cancelScrape = context.WithCancel(context.Background())
|
||||
discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape"))
|
||||
|
@ -280,7 +358,7 @@ func main() {
|
|||
Appendable: fanoutStorage,
|
||||
TSDB: localStorage,
|
||||
QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage),
|
||||
NotifyFunc: sendAlerts(notifier, cfg.web.ExternalURL.String()),
|
||||
NotifyFunc: sendAlerts(notifierManager, cfg.web.ExternalURL.String()),
|
||||
Context: ctxRule,
|
||||
ExternalURL: cfg.web.ExternalURL,
|
||||
Registerer: prometheus.DefaultRegisterer,
|
||||
|
@ -297,7 +375,8 @@ func main() {
|
|||
cfg.web.QueryEngine = queryEngine
|
||||
cfg.web.ScrapeManager = scrapeManager
|
||||
cfg.web.RuleManager = ruleManager
|
||||
cfg.web.Notifier = notifier
|
||||
cfg.web.Notifier = notifierManager
|
||||
cfg.web.TSDBCfg = cfg.tsdb
|
||||
|
||||
cfg.web.Version = &web.PrometheusVersion{
|
||||
Version: version.Version,
|
||||
|
@ -333,7 +412,6 @@ func main() {
|
|||
webHandler.ApplyConfig,
|
||||
// The Scrape and notifier managers need to reload before the Discovery manager as
|
||||
// they need to read the most updated config when receiving the new targets list.
|
||||
notifier.ApplyConfig,
|
||||
scrapeManager.ApplyConfig,
|
||||
func(cfg *config.Config) error {
|
||||
c := make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||
|
@ -342,6 +420,7 @@ func main() {
|
|||
}
|
||||
return discoveryManagerScrape.ApplyConfig(c)
|
||||
},
|
||||
notifierManager.ApplyConfig,
|
||||
func(cfg *config.Config) error {
|
||||
c := make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||
for _, v := range cfg.AlertingConfig.AlertmanagerConfigs {
|
||||
|
@ -355,17 +434,21 @@ func main() {
|
|||
return discoveryManagerNotify.ApplyConfig(c)
|
||||
},
|
||||
func(cfg *config.Config) error {
|
||||
// Get all rule files matching the configuration oaths.
|
||||
// Get all rule files matching the configuration paths.
|
||||
var files []string
|
||||
for _, pat := range cfg.RuleFiles {
|
||||
fs, err := filepath.Glob(pat)
|
||||
if err != nil {
|
||||
// The only error can be a bad pattern.
|
||||
return fmt.Errorf("error retrieving rule files for %s: %s", pat, err)
|
||||
return errors.Wrapf(err, "error retrieving rule files for %s", pat)
|
||||
}
|
||||
files = append(files, fs...)
|
||||
}
|
||||
return ruleManager.Update(time.Duration(cfg.GlobalConfig.EvaluationInterval), files)
|
||||
return ruleManager.Update(
|
||||
time.Duration(cfg.GlobalConfig.EvaluationInterval),
|
||||
files,
|
||||
cfg.GlobalConfig.ExternalLabels,
|
||||
)
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -392,7 +475,7 @@ func main() {
|
|||
})
|
||||
}
|
||||
|
||||
var g group.Group
|
||||
var g run.Group
|
||||
{
|
||||
// Termination handler.
|
||||
term := make(chan os.Signal, 1)
|
||||
|
@ -522,7 +605,7 @@ func main() {
|
|||
}
|
||||
|
||||
if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil {
|
||||
return fmt.Errorf("error loading config from %q: %s", cfg.configFile, err)
|
||||
return errors.Wrapf(err, "error loading config from %q", cfg.configFile)
|
||||
}
|
||||
|
||||
reloadReady.Close()
|
||||
|
@ -560,6 +643,11 @@ func main() {
|
|||
g.Add(
|
||||
func() error {
|
||||
level.Info(logger).Log("msg", "Starting TSDB ...")
|
||||
if cfg.tsdb.WALSegmentSize != 0 {
|
||||
if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 {
|
||||
return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB")
|
||||
}
|
||||
}
|
||||
db, err := tsdb.Open(
|
||||
cfg.localStoragePath,
|
||||
log.With(logger, "component", "tsdb"),
|
||||
|
@ -567,9 +655,19 @@ func main() {
|
|||
&cfg.tsdb,
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening storage failed: %s", err)
|
||||
return errors.Wrapf(err, "opening storage failed")
|
||||
}
|
||||
level.Info(logger).Log("fs_type", prom_runtime.Statfs(cfg.localStoragePath))
|
||||
level.Info(logger).Log("msg", "TSDB started")
|
||||
level.Debug(logger).Log("msg", "TSDB options",
|
||||
"MinBlockDuration", cfg.tsdb.MinBlockDuration,
|
||||
"MaxBlockDuration", cfg.tsdb.MaxBlockDuration,
|
||||
"MaxBytes", cfg.tsdb.MaxBytes,
|
||||
"NoLockfile", cfg.tsdb.NoLockfile,
|
||||
"RetentionDuration", cfg.tsdb.RetentionDuration,
|
||||
"WALSegmentSize", cfg.tsdb.WALSegmentSize,
|
||||
"AllowOverlappingBlocks", cfg.tsdb.AllowOverlappingBlocks,
|
||||
)
|
||||
|
||||
startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000)
|
||||
localStorage.Set(db, startTimeMargin)
|
||||
|
@ -590,7 +688,7 @@ func main() {
|
|||
g.Add(
|
||||
func() error {
|
||||
if err := webHandler.Run(ctxWeb); err != nil {
|
||||
return fmt.Errorf("error starting web server: %s", err)
|
||||
return errors.Wrapf(err, "error starting web server")
|
||||
}
|
||||
return nil
|
||||
},
|
||||
|
@ -612,12 +710,12 @@ func main() {
|
|||
// so we wait until the config is fully loaded.
|
||||
<-reloadReady.C
|
||||
|
||||
notifier.Run(discoveryManagerNotify.SyncCh())
|
||||
notifierManager.Run(discoveryManagerNotify.SyncCh())
|
||||
level.Info(logger).Log("msg", "Notifier manager stopped")
|
||||
return nil
|
||||
},
|
||||
func(err error) {
|
||||
notifier.Stop()
|
||||
notifierManager.Stop()
|
||||
},
|
||||
)
|
||||
}
|
||||
|
@ -642,7 +740,7 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config
|
|||
|
||||
conf, err := config.LoadFile(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("couldn't load configuration (--config.file=%q): %v", filename, err)
|
||||
return errors.Wrapf(err, "couldn't load configuration (--config.file=%q)", filename)
|
||||
}
|
||||
|
||||
failed := false
|
||||
|
@ -653,8 +751,10 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config
|
|||
}
|
||||
}
|
||||
if failed {
|
||||
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
|
||||
return errors.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
|
||||
}
|
||||
|
||||
promql.SetDefaultEvaluationInterval(time.Duration(conf.GlobalConfig.EvaluationInterval))
|
||||
level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename)
|
||||
return nil
|
||||
}
|
||||
|
@ -664,6 +764,15 @@ func startsOrEndsWithQuote(s string) bool {
|
|||
strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'")
|
||||
}
|
||||
|
||||
// compileCORSRegexString compiles given string and adds anchors
|
||||
func compileCORSRegexString(s string) (*regexp.Regexp, error) {
|
||||
r, err := relabel.NewRegexp(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return r.Regexp, nil
|
||||
}
|
||||
|
||||
// computeExternalURL computes a sanitized external URL from a raw input. It infers unset
|
||||
// URL parts from the OS and the given listen address.
|
||||
func computeExternalURL(u, listenAddr string) (*url.URL, error) {
|
||||
|
@ -680,7 +789,7 @@ func computeExternalURL(u, listenAddr string) (*url.URL, error) {
|
|||
}
|
||||
|
||||
if startsOrEndsWithQuote(u) {
|
||||
return nil, fmt.Errorf("URL must not begin or end with quotes")
|
||||
return nil, errors.New("URL must not begin or end with quotes")
|
||||
}
|
||||
|
||||
eu, err := url.Parse(u)
|
||||
|
@ -697,8 +806,12 @@ func computeExternalURL(u, listenAddr string) (*url.URL, error) {
|
|||
return eu, nil
|
||||
}
|
||||
|
||||
type sender interface {
|
||||
Send(alerts ...*notifier.Alert)
|
||||
}
|
||||
|
||||
// sendAlerts implements the rules.NotifyFunc for a Notifier.
|
||||
func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc {
|
||||
func sendAlerts(s sender, externalURL string) rules.NotifyFunc {
|
||||
return func(ctx context.Context, expr string, alerts ...*rules.Alert) {
|
||||
var res []*notifier.Alert
|
||||
|
||||
|
@ -718,7 +831,7 @@ func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc {
|
|||
}
|
||||
|
||||
if len(alerts) > 0 {
|
||||
n.Send(res...)
|
||||
s.Send(res...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
@ -24,6 +25,9 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/notifier"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/rules"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
|
@ -159,6 +163,10 @@ func TestComputeExternalURL(t *testing.T) {
|
|||
|
||||
// Let's provide an invalid configuration file and verify the exit status indicates the error.
|
||||
func TestFailedStartupExitCode(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
|
||||
fakeInputFile := "fake-input-file"
|
||||
expectedExitStatus := 1
|
||||
|
||||
|
@ -173,3 +181,106 @@ func TestFailedStartupExitCode(t *testing.T) {
|
|||
t.Errorf("unable to retrieve the exit status for prometheus: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
type senderFunc func(alerts ...*notifier.Alert)
|
||||
|
||||
func (s senderFunc) Send(alerts ...*notifier.Alert) {
|
||||
s(alerts...)
|
||||
}
|
||||
|
||||
func TestSendAlerts(t *testing.T) {
|
||||
testCases := []struct {
|
||||
in []*rules.Alert
|
||||
exp []*notifier.Alert
|
||||
}{
|
||||
{
|
||||
in: []*rules.Alert{
|
||||
{
|
||||
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
|
||||
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
FiredAt: time.Unix(2, 0),
|
||||
ValidUntil: time.Unix(3, 0),
|
||||
},
|
||||
},
|
||||
exp: []*notifier.Alert{
|
||||
{
|
||||
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
|
||||
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
|
||||
StartsAt: time.Unix(2, 0),
|
||||
EndsAt: time.Unix(3, 0),
|
||||
GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: []*rules.Alert{
|
||||
{
|
||||
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
|
||||
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
|
||||
ActiveAt: time.Unix(1, 0),
|
||||
FiredAt: time.Unix(2, 0),
|
||||
ResolvedAt: time.Unix(4, 0),
|
||||
},
|
||||
},
|
||||
exp: []*notifier.Alert{
|
||||
{
|
||||
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
|
||||
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
|
||||
StartsAt: time.Unix(2, 0),
|
||||
EndsAt: time.Unix(4, 0),
|
||||
GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
in: []*rules.Alert{},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
senderFunc := senderFunc(func(alerts ...*notifier.Alert) {
|
||||
if len(tc.in) == 0 {
|
||||
t.Fatalf("sender called with 0 alert")
|
||||
}
|
||||
testutil.Equals(t, tc.exp, alerts)
|
||||
})
|
||||
sendAlerts(senderFunc, "http://localhost:9090")(context.TODO(), "up", tc.in...)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWALSegmentSizeBounds(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
|
||||
for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} {
|
||||
prom := exec.Command(promPath, "--storage.tsdb.wal-segment-size="+size, "--config.file="+promConfig)
|
||||
err := prom.Start()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
if expectedExitStatus == 0 {
|
||||
done := make(chan error, 1)
|
||||
go func() { done <- prom.Wait() }()
|
||||
select {
|
||||
case err := <-done:
|
||||
t.Errorf("prometheus should be still running: %v", err)
|
||||
case <-time.After(5 * time.Second):
|
||||
prom.Process.Signal(os.Interrupt)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
err = prom.Wait()
|
||||
testutil.NotOk(t, err, "")
|
||||
if exitError, ok := err.(*exec.ExitError); ok {
|
||||
status := exitError.Sys().(syscall.WaitStatus)
|
||||
testutil.Equals(t, expectedExitStatus, status.ExitStatus())
|
||||
} else {
|
||||
t.Errorf("unable to retrieve the exit status for prometheus: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,18 +16,13 @@ package main
|
|||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const filePerm = 0644
|
||||
|
||||
type archiver interface {
|
||||
write(filename string, b []byte) error
|
||||
close() error
|
||||
filename() string
|
||||
}
|
||||
|
||||
type tarGzFileWriter struct {
|
||||
tarWriter *tar.Writer
|
||||
gzWriter *gzip.Writer
|
||||
|
@ -37,7 +32,7 @@ type tarGzFileWriter struct {
|
|||
func newTarGzFileWriter(archiveName string) (*tarGzFileWriter, error) {
|
||||
file, err := os.Create(archiveName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating archive %q: %s", archiveName, err)
|
||||
return nil, errors.Wrapf(err, "error creating archive %q", archiveName)
|
||||
}
|
||||
gzw := gzip.NewWriter(file)
|
||||
tw := tar.NewWriter(gzw)
|
||||
|
@ -72,7 +67,3 @@ func (w *tarGzFileWriter) write(filename string, b []byte) error {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *tarGzFileWriter) filename() string {
|
||||
return w.file.Name()
|
||||
}
|
||||
|
|
|
@ -14,112 +14,56 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/google/pprof/profile"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
type debugWriterConfig struct {
|
||||
serverURL string
|
||||
tarballName string
|
||||
pathToFileName map[string]string
|
||||
postProcess func(b []byte) ([]byte, error)
|
||||
endPointGroups []endpointsGroup
|
||||
}
|
||||
|
||||
type debugWriter struct {
|
||||
archiver
|
||||
httpClient
|
||||
requestToFile map[*http.Request]string
|
||||
postProcess func(b []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
func newDebugWriter(cfg debugWriterConfig) (*debugWriter, error) {
|
||||
client, err := newPrometheusHTTPClient(cfg.serverURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
func debugWrite(cfg debugWriterConfig) error {
|
||||
archiver, err := newTarGzFileWriter(cfg.tarballName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return errors.Wrap(err, "error creating a new archiver")
|
||||
}
|
||||
reqs := make(map[*http.Request]string)
|
||||
for path, filename := range cfg.pathToFileName {
|
||||
req, err := http.NewRequest(http.MethodGet, client.urlJoin(path), nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
reqs[req] = filename
|
||||
}
|
||||
return &debugWriter{
|
||||
archiver,
|
||||
client,
|
||||
reqs,
|
||||
cfg.postProcess,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (w *debugWriter) Write() int {
|
||||
for req, filename := range w.requestToFile {
|
||||
_, body, err := w.do(req)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error executing HTTP request:", err)
|
||||
return 1
|
||||
for _, endPointGroup := range cfg.endPointGroups {
|
||||
for url, filename := range endPointGroup.urlToFilename {
|
||||
url := cfg.serverURL + url
|
||||
fmt.Println("collecting:", url)
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "error executing HTTP request")
|
||||
}
|
||||
body, err := ioutil.ReadAll(res.Body)
|
||||
res.Body.Close()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "error reading the response body")
|
||||
}
|
||||
|
||||
if endPointGroup.postProcess != nil {
|
||||
body, err = endPointGroup.postProcess(body)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "error post-processing HTTP response body")
|
||||
}
|
||||
}
|
||||
if err := archiver.write(filename, body); err != nil {
|
||||
return errors.Wrap(err, "error writing into the archive")
|
||||
}
|
||||
}
|
||||
|
||||
buf, err := w.postProcess(body)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error post-processing HTTP response body:", err)
|
||||
return 1
|
||||
}
|
||||
|
||||
if err := w.archiver.write(filename, buf); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error writing into archive:", err)
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
if err := w.close(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error closing archiver:", err)
|
||||
return 1
|
||||
if err := archiver.close(); err != nil {
|
||||
return errors.Wrap(err, "error closing archive writer")
|
||||
}
|
||||
|
||||
fmt.Printf("Compiling debug information complete, all files written in %q.\n", w.filename())
|
||||
return 0
|
||||
}
|
||||
|
||||
func validate(b []byte) (*profile.Profile, error) {
|
||||
p, err := profile.Parse(bytes.NewReader(b))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
var pprofPostProcess = func(b []byte) ([]byte, error) {
|
||||
p, err := validate(b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := p.WriteUncompressed(&buf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fmt.Println(p.String())
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
var metricsPostProcess = func(b []byte) ([]byte, error) {
|
||||
fmt.Println(string(b))
|
||||
return b, nil
|
||||
}
|
||||
|
||||
var allPostProcess = func(b []byte) ([]byte, error) {
|
||||
_, err := validate(b)
|
||||
if err != nil {
|
||||
return metricsPostProcess(b)
|
||||
}
|
||||
return pprofPostProcess(b)
|
||||
fmt.Printf("Compiling debug information complete, all files written in %q.\n", cfg.tarballName)
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
// Copyright 2015 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/api"
|
||||
)
|
||||
|
||||
const defaultTimeout = 2 * time.Minute
|
||||
|
||||
type httpClient interface {
|
||||
do(req *http.Request) (*http.Response, []byte, error)
|
||||
urlJoin(path string) string
|
||||
}
|
||||
|
||||
type prometheusHTTPClient struct {
|
||||
requestTimeout time.Duration
|
||||
httpClient api.Client
|
||||
}
|
||||
|
||||
func newPrometheusHTTPClient(serverURL string) (*prometheusHTTPClient, error) {
|
||||
hc, err := api.NewClient(api.Config{
|
||||
Address: serverURL,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating HTTP client: %s", err)
|
||||
}
|
||||
return &prometheusHTTPClient{
|
||||
requestTimeout: defaultTimeout,
|
||||
httpClient: hc,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *prometheusHTTPClient) do(req *http.Request) (*http.Response, []byte, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), c.requestTimeout)
|
||||
defer cancel()
|
||||
return c.httpClient.Do(ctx, req)
|
||||
}
|
||||
|
||||
func (c *prometheusHTTPClient) urlJoin(path string) string {
|
||||
return c.httpClient.URL(path, nil).String()
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
// Copyright 2015 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestURLJoin(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
inputHost string
|
||||
inputPath string
|
||||
expected string
|
||||
}{
|
||||
{"http://host", "path", "http://host/path"},
|
||||
{"http://host", "path/", "http://host/path"},
|
||||
{"http://host", "/path", "http://host/path"},
|
||||
{"http://host", "/path/", "http://host/path"},
|
||||
|
||||
{"http://host/", "path", "http://host/path"},
|
||||
{"http://host/", "path/", "http://host/path"},
|
||||
{"http://host/", "/path", "http://host/path"},
|
||||
{"http://host/", "/path/", "http://host/path"},
|
||||
|
||||
{"https://host", "path", "https://host/path"},
|
||||
{"https://host", "path/", "https://host/path"},
|
||||
{"https://host", "/path", "https://host/path"},
|
||||
{"https://host", "/path/", "https://host/path"},
|
||||
|
||||
{"https://host/", "path", "https://host/path"},
|
||||
{"https://host/", "path/", "https://host/path"},
|
||||
{"https://host/", "/path", "https://host/path"},
|
||||
{"https://host/", "/path/", "https://host/path"},
|
||||
}
|
||||
for i, c := range testCases {
|
||||
client, err := newPrometheusHTTPClient(c.inputHost)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
actual := client.urlJoin(c.inputPath)
|
||||
if actual != c.expected {
|
||||
t.Errorf("Error on case %d: %v(actual) != %v(expected)", i, actual, c.expected)
|
||||
}
|
||||
t.Logf("Case %d: %v(actual) == %v(expected)", i, actual, c.expected)
|
||||
}
|
||||
}
|
|
@ -14,6 +14,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
@ -25,13 +26,15 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/alecthomas/kingpin.v2"
|
||||
|
||||
"github.com/google/pprof/profile"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/api"
|
||||
"github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/version"
|
||||
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/pkg/rulefmt"
|
||||
"github.com/prometheus/prometheus/util/promlint"
|
||||
|
@ -199,10 +202,10 @@ func checkConfig(filename string) ([]string, error) {
|
|||
// If an explicit file was given, error if it is not accessible.
|
||||
if !strings.Contains(rf, "*") {
|
||||
if len(rfs) == 0 {
|
||||
return nil, fmt.Errorf("%q does not point to an existing file", rf)
|
||||
return nil, errors.Errorf("%q does not point to an existing file", rf)
|
||||
}
|
||||
if err := checkFileExists(rfs[0]); err != nil {
|
||||
return nil, fmt.Errorf("error checking rule file %q: %s", rfs[0], err)
|
||||
return nil, errors.Wrapf(err, "error checking rule file %q", rfs[0])
|
||||
}
|
||||
}
|
||||
ruleFiles = append(ruleFiles, rfs...)
|
||||
|
@ -210,7 +213,7 @@ func checkConfig(filename string) ([]string, error) {
|
|||
|
||||
for _, scfg := range cfg.ScrapeConfigs {
|
||||
if err := checkFileExists(scfg.HTTPClientConfig.BearerTokenFile); err != nil {
|
||||
return nil, fmt.Errorf("error checking bearer token file %q: %s", scfg.HTTPClientConfig.BearerTokenFile, err)
|
||||
return nil, errors.Wrapf(err, "error checking bearer token file %q", scfg.HTTPClientConfig.BearerTokenFile)
|
||||
}
|
||||
|
||||
if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig); err != nil {
|
||||
|
@ -218,7 +221,7 @@ func checkConfig(filename string) ([]string, error) {
|
|||
}
|
||||
|
||||
for _, kd := range scfg.ServiceDiscoveryConfig.KubernetesSDConfigs {
|
||||
if err := checkTLSConfig(kd.TLSConfig); err != nil {
|
||||
if err := checkTLSConfig(kd.HTTPClientConfig.TLSConfig); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
@ -244,17 +247,17 @@ func checkConfig(filename string) ([]string, error) {
|
|||
|
||||
func checkTLSConfig(tlsConfig config_util.TLSConfig) error {
|
||||
if err := checkFileExists(tlsConfig.CertFile); err != nil {
|
||||
return fmt.Errorf("error checking client cert file %q: %s", tlsConfig.CertFile, err)
|
||||
return errors.Wrapf(err, "error checking client cert file %q", tlsConfig.CertFile)
|
||||
}
|
||||
if err := checkFileExists(tlsConfig.KeyFile); err != nil {
|
||||
return fmt.Errorf("error checking client key file %q: %s", tlsConfig.KeyFile, err)
|
||||
return errors.Wrapf(err, "error checking client key file %q", tlsConfig.KeyFile)
|
||||
}
|
||||
|
||||
if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 {
|
||||
return fmt.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile)
|
||||
return errors.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile)
|
||||
}
|
||||
if len(tlsConfig.KeyFile) > 0 && len(tlsConfig.CertFile) == 0 {
|
||||
return fmt.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile)
|
||||
return errors.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -507,64 +510,88 @@ func parseTime(s string) (time.Time, error) {
|
|||
if t, err := time.Parse(time.RFC3339Nano, s); err == nil {
|
||||
return t, nil
|
||||
}
|
||||
return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s)
|
||||
return time.Time{}, errors.Errorf("cannot parse %q to a valid timestamp", s)
|
||||
}
|
||||
|
||||
func debugPprof(url string) int {
|
||||
w, err := newDebugWriter(debugWriterConfig{
|
||||
serverURL: url,
|
||||
tarballName: "debug.tar.gz",
|
||||
pathToFileName: map[string]string{
|
||||
"/debug/pprof/block": "block.pb",
|
||||
"/debug/pprof/goroutine": "goroutine.pb",
|
||||
"/debug/pprof/heap": "heap.pb",
|
||||
"/debug/pprof/mutex": "mutex.pb",
|
||||
"/debug/pprof/threadcreate": "threadcreate.pb",
|
||||
type endpointsGroup struct {
|
||||
urlToFilename map[string]string
|
||||
postProcess func(b []byte) ([]byte, error)
|
||||
}
|
||||
|
||||
var (
|
||||
pprofEndpoints = []endpointsGroup{
|
||||
{
|
||||
urlToFilename: map[string]string{
|
||||
"/debug/pprof/profile?seconds=30": "cpu.pb",
|
||||
"/debug/pprof/block": "block.pb",
|
||||
"/debug/pprof/goroutine": "goroutine.pb",
|
||||
"/debug/pprof/heap": "heap.pb",
|
||||
"/debug/pprof/mutex": "mutex.pb",
|
||||
"/debug/pprof/threadcreate": "threadcreate.pb",
|
||||
},
|
||||
postProcess: func(b []byte) ([]byte, error) {
|
||||
p, err := profile.Parse(bytes.NewReader(b))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if err := p.WriteUncompressed(&buf); err != nil {
|
||||
return nil, errors.Wrap(err, "writing the profile to the buffer")
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
},
|
||||
},
|
||||
postProcess: pprofPostProcess,
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error creating debug writer:", err)
|
||||
{
|
||||
urlToFilename: map[string]string{
|
||||
"/debug/pprof/trace?seconds=30": "trace.pb",
|
||||
},
|
||||
},
|
||||
}
|
||||
metricsEndpoints = []endpointsGroup{
|
||||
{
|
||||
urlToFilename: map[string]string{
|
||||
"/metrics": "metrics.txt",
|
||||
},
|
||||
},
|
||||
}
|
||||
allEndpoints = append(pprofEndpoints, metricsEndpoints...)
|
||||
)
|
||||
|
||||
func debugPprof(url string) int {
|
||||
if err := debugWrite(debugWriterConfig{
|
||||
serverURL: url,
|
||||
tarballName: "debug.tar.gz",
|
||||
endPointGroups: pprofEndpoints,
|
||||
}); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
|
||||
return 1
|
||||
}
|
||||
return w.Write()
|
||||
return 0
|
||||
}
|
||||
|
||||
func debugMetrics(url string) int {
|
||||
w, err := newDebugWriter(debugWriterConfig{
|
||||
serverURL: url,
|
||||
tarballName: "debug.tar.gz",
|
||||
pathToFileName: map[string]string{
|
||||
"/metrics": "metrics.txt",
|
||||
},
|
||||
postProcess: metricsPostProcess,
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error creating debug writer:", err)
|
||||
if err := debugWrite(debugWriterConfig{
|
||||
serverURL: url,
|
||||
tarballName: "debug.tar.gz",
|
||||
endPointGroups: metricsEndpoints,
|
||||
}); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
|
||||
return 1
|
||||
}
|
||||
return w.Write()
|
||||
return 0
|
||||
}
|
||||
|
||||
func debugAll(url string) int {
|
||||
w, err := newDebugWriter(debugWriterConfig{
|
||||
serverURL: url,
|
||||
tarballName: "debug.tar.gz",
|
||||
pathToFileName: map[string]string{
|
||||
"/debug/pprof/block": "block.pb",
|
||||
"/debug/pprof/goroutine": "goroutine.pb",
|
||||
"/debug/pprof/heap": "heap.pb",
|
||||
"/debug/pprof/mutex": "mutex.pb",
|
||||
"/debug/pprof/threadcreate": "threadcreate.pb",
|
||||
"/metrics": "metrics.txt",
|
||||
},
|
||||
postProcess: allPostProcess,
|
||||
})
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error creating debug writer:", err)
|
||||
if err := debugWrite(debugWriterConfig{
|
||||
serverURL: url,
|
||||
tarballName: "debug.tar.gz",
|
||||
endPointGroups: allEndpoints,
|
||||
}); err != nil {
|
||||
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
|
||||
return 1
|
||||
}
|
||||
return w.Write()
|
||||
return 0
|
||||
}
|
||||
|
||||
type printer interface {
|
||||
|
@ -583,7 +610,7 @@ func (p *promqlPrinter) printSeries(val []model.LabelSet) {
|
|||
fmt.Println(v)
|
||||
}
|
||||
}
|
||||
func (j *promqlPrinter) printLabelValues(val model.LabelValues) {
|
||||
func (p *promqlPrinter) printLabelValues(val model.LabelValues) {
|
||||
for _, v := range val {
|
||||
fmt.Println(v)
|
||||
}
|
||||
|
@ -592,11 +619,14 @@ func (j *promqlPrinter) printLabelValues(val model.LabelValues) {
|
|||
type jsonPrinter struct{}
|
||||
|
||||
func (j *jsonPrinter) printValue(v model.Value) {
|
||||
//nolint:errcheck
|
||||
json.NewEncoder(os.Stdout).Encode(v)
|
||||
}
|
||||
func (j *jsonPrinter) printSeries(v []model.LabelSet) {
|
||||
//nolint:errcheck
|
||||
json.NewEncoder(os.Stdout).Encode(v)
|
||||
}
|
||||
func (j *jsonPrinter) printLabelValues(v model.LabelValues) {
|
||||
//nolint:errcheck
|
||||
json.NewEncoder(os.Stdout).Encode(v)
|
||||
}
|
||||
|
|
|
@ -17,26 +17,27 @@ import (
|
|||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestQueryRange(t *testing.T) {
|
||||
s, getURL := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`)
|
||||
s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`)
|
||||
defer s.Close()
|
||||
|
||||
p := &promqlPrinter{}
|
||||
exitCode := QueryRange(s.URL, "up", "0", "300", 0, p)
|
||||
expectedPath := "/api/v1/query_range"
|
||||
if getURL().Path != expectedPath {
|
||||
t.Errorf("unexpected URL path %s (wanted %s)", getURL().Path, expectedPath)
|
||||
gotPath := getRequest().URL.Path
|
||||
if gotPath != expectedPath {
|
||||
t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath)
|
||||
}
|
||||
actual := getURL().Query().Get("query")
|
||||
form := getRequest().Form
|
||||
actual := form.Get("query")
|
||||
if actual != "up" {
|
||||
t.Errorf("unexpected value %s for query", actual)
|
||||
}
|
||||
actual = getURL().Query().Get("step")
|
||||
actual = form.Get("step")
|
||||
if actual != "1.000" {
|
||||
t.Errorf("unexpected value %s for step", actual)
|
||||
}
|
||||
|
@ -45,14 +46,16 @@ func TestQueryRange(t *testing.T) {
|
|||
}
|
||||
|
||||
exitCode = QueryRange(s.URL, "up", "0", "300", 10*time.Millisecond, p)
|
||||
if getURL().Path != expectedPath {
|
||||
t.Errorf("unexpected URL path %s (wanted %s)", getURL().Path, expectedPath)
|
||||
gotPath = getRequest().URL.Path
|
||||
if gotPath != expectedPath {
|
||||
t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath)
|
||||
}
|
||||
actual = getURL().Query().Get("query")
|
||||
form = getRequest().Form
|
||||
actual = form.Get("query")
|
||||
if actual != "up" {
|
||||
t.Errorf("unexpected value %s for query", actual)
|
||||
}
|
||||
actual = getURL().Query().Get("step")
|
||||
actual = form.Get("step")
|
||||
if actual != "0.010" {
|
||||
t.Errorf("unexpected value %s for step", actual)
|
||||
}
|
||||
|
@ -61,16 +64,17 @@ func TestQueryRange(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func mockServer(code int, body string) (*httptest.Server, func() *url.URL) {
|
||||
var u *url.URL
|
||||
func mockServer(code int, body string) (*httptest.Server, func() *http.Request) {
|
||||
var req *http.Request
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
u = r.URL
|
||||
r.ParseForm()
|
||||
req = r
|
||||
w.WriteHeader(code)
|
||||
fmt.Fprintln(w, body)
|
||||
}))
|
||||
|
||||
f := func() *url.URL {
|
||||
return u
|
||||
f := func() *http.Request {
|
||||
return req
|
||||
}
|
||||
return server, f
|
||||
}
|
||||
|
|
|
@ -18,13 +18,16 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/pkg/errors"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
|
@ -67,6 +70,9 @@ func ruleUnitTest(filename string) []error {
|
|||
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
|
||||
if unitTestInp.EvaluationInterval == 0 {
|
||||
unitTestInp.EvaluationInterval = 1 * time.Minute
|
||||
|
@ -84,7 +90,7 @@ func ruleUnitTest(filename string) []error {
|
|||
groupOrderMap := make(map[string]int)
|
||||
for i, gn := range unitTestInp.GroupEvalOrder {
|
||||
if _, ok := groupOrderMap[gn]; ok {
|
||||
return []error{fmt.Errorf("Group name repeated in evaluation order: %s", gn)}
|
||||
return []error{errors.Errorf("group name repeated in evaluation order: %s", gn)}
|
||||
}
|
||||
groupOrderMap[gn] = i
|
||||
}
|
||||
|
@ -124,6 +130,27 @@ func (utf *unitTestFile) maxEvalTime() time.Duration {
|
|||
return maxd
|
||||
}
|
||||
|
||||
// resolveAndGlobFilepaths joins all relative paths in a configuration
|
||||
// with a given base directory and replaces all globs with matching files.
|
||||
func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error {
|
||||
for i, rf := range utf.RuleFiles {
|
||||
if rf != "" && !filepath.IsAbs(rf) {
|
||||
utf.RuleFiles[i] = filepath.Join(baseDir, rf)
|
||||
}
|
||||
}
|
||||
|
||||
var globbedFiles []string
|
||||
for _, rf := range utf.RuleFiles {
|
||||
m, err := filepath.Glob(rf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
globbedFiles = append(globbedFiles, m...)
|
||||
}
|
||||
utf.RuleFiles = globbedFiles
|
||||
return nil
|
||||
}
|
||||
|
||||
// testGroup is a group of input series and tests associated with it.
|
||||
type testGroup struct {
|
||||
Interval time.Duration `yaml:"interval"`
|
||||
|
@ -135,27 +162,23 @@ type testGroup struct {
|
|||
// test performs the unit tests.
|
||||
func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, groupOrderMap map[string]int, ruleFiles ...string) []error {
|
||||
// Setup testing suite.
|
||||
suite, err := promql.NewTest(nil, tg.seriesLoadingString())
|
||||
suite, err := promql.NewLazyLoader(nil, tg.seriesLoadingString())
|
||||
if err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
defer suite.Close()
|
||||
|
||||
err = suite.Run()
|
||||
if err != nil {
|
||||
return []error{err}
|
||||
}
|
||||
|
||||
// Load the rule files.
|
||||
opts := &rules.ManagerOptions{
|
||||
QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()),
|
||||
Appendable: suite.Storage(),
|
||||
Context: context.Background(),
|
||||
NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {},
|
||||
Logger: &dummyLogger{},
|
||||
Logger: log.NewNopLogger(),
|
||||
}
|
||||
m := rules.NewManager(opts)
|
||||
groupsMap, ers := m.LoadGroups(tg.Interval, ruleFiles...)
|
||||
// TODO(beorn7): Provide a way to pass in external labels.
|
||||
groupsMap, ers := m.LoadGroups(tg.Interval, nil, ruleFiles...)
|
||||
if ers != nil {
|
||||
return ers
|
||||
}
|
||||
|
@ -165,14 +188,14 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
|
|||
// All this preparation is so that we can test alerts as we evaluate the rules.
|
||||
// This avoids storing them in memory, as the number of evals might be high.
|
||||
|
||||
// All the `eval_time` for which we have unit tests.
|
||||
var alertEvalTimes []time.Duration
|
||||
// All the `eval_time` for which we have unit tests for alerts.
|
||||
alertEvalTimesMap := map[time.Duration]struct{}{}
|
||||
// Map of all the eval_time+alertname combination present in the unit tests.
|
||||
alertsInTest := make(map[time.Duration]map[string]struct{})
|
||||
// Map of all the unit tests for given eval_time.
|
||||
alertTests := make(map[time.Duration][]alertTestCase)
|
||||
for _, alert := range tg.AlertRuleTests {
|
||||
alertEvalTimes = append(alertEvalTimes, alert.EvalTime)
|
||||
alertEvalTimesMap[alert.EvalTime] = struct{}{}
|
||||
|
||||
if _, ok := alertsInTest[alert.EvalTime]; !ok {
|
||||
alertsInTest[alert.EvalTime] = make(map[string]struct{})
|
||||
|
@ -181,6 +204,10 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
|
|||
|
||||
alertTests[alert.EvalTime] = append(alertTests[alert.EvalTime], alert)
|
||||
}
|
||||
alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap))
|
||||
for k := range alertEvalTimesMap {
|
||||
alertEvalTimes = append(alertEvalTimes, k)
|
||||
}
|
||||
sort.Slice(alertEvalTimes, func(i, j int) bool {
|
||||
return alertEvalTimes[i] < alertEvalTimes[j]
|
||||
})
|
||||
|
@ -191,8 +218,23 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
|
|||
var errs []error
|
||||
for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) {
|
||||
// Collects the alerts asked for unit testing.
|
||||
for _, g := range groups {
|
||||
g.Eval(suite.Context(), ts)
|
||||
suite.WithSamplesTill(ts, func(err error) {
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
return
|
||||
}
|
||||
for _, g := range groups {
|
||||
g.Eval(suite.Context(), ts)
|
||||
for _, r := range g.Rules() {
|
||||
if r.LastError() != nil {
|
||||
errs = append(errs, errors.Errorf(" rule: %s, time: %s, err: %v",
|
||||
r.Name(), ts.Sub(time.Unix(0, 0)), r.LastError()))
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
if len(errs) > 0 {
|
||||
return errs
|
||||
}
|
||||
|
||||
for {
|
||||
|
@ -253,14 +295,14 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
|
|||
}
|
||||
|
||||
if gotAlerts.Len() != expAlerts.Len() {
|
||||
errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
|
||||
errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
|
||||
testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String()))
|
||||
} else {
|
||||
sort.Sort(gotAlerts)
|
||||
sort.Sort(expAlerts)
|
||||
|
||||
if !reflect.DeepEqual(expAlerts, gotAlerts) {
|
||||
errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
|
||||
errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
|
||||
testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String()))
|
||||
}
|
||||
}
|
||||
|
@ -276,7 +318,7 @@ Outer:
|
|||
got, err := query(suite.Context(), testCase.Expr, mint.Add(testCase.EvalTime),
|
||||
suite.QueryEngine(), suite.Queryable())
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
|
||||
errs = append(errs, errors.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
|
||||
testCase.EvalTime.String(), err.Error()))
|
||||
continue
|
||||
}
|
||||
|
@ -293,7 +335,7 @@ Outer:
|
|||
for _, s := range testCase.ExpSamples {
|
||||
lb, err := promql.ParseMetric(s.Labels)
|
||||
if err != nil {
|
||||
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
|
||||
errs = append(errs, errors.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
|
||||
testCase.EvalTime.String(), err.Error()))
|
||||
continue Outer
|
||||
}
|
||||
|
@ -303,8 +345,14 @@ Outer:
|
|||
})
|
||||
}
|
||||
|
||||
sort.Slice(expSamples, func(i, j int) bool {
|
||||
return labels.Compare(expSamples[i].Labels, expSamples[j].Labels) <= 0
|
||||
})
|
||||
sort.Slice(gotSamples, func(i, j int) bool {
|
||||
return labels.Compare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0
|
||||
})
|
||||
if !reflect.DeepEqual(expSamples, gotSamples) {
|
||||
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr,
|
||||
errs = append(errs, errors.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr,
|
||||
testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
|
||||
}
|
||||
}
|
||||
|
@ -383,7 +431,7 @@ func query(ctx context.Context, qs string, t time.Time, engine *promql.Engine, q
|
|||
Metric: labels.Labels{},
|
||||
}}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("rule result is not a vector or scalar")
|
||||
return nil, errors.New("rule result is not a vector or scalar")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -468,9 +516,3 @@ func parsedSamplesString(pss []parsedSample) string {
|
|||
func (ps *parsedSample) String() string {
|
||||
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
|
||||
}
|
||||
|
||||
type dummyLogger struct{}
|
||||
|
||||
func (l *dummyLogger) Log(keyvals ...interface{}) error {
|
||||
return nil
|
||||
}
|
||||
|
|
324
config/config.go
324
config/config.go
|
@ -22,15 +22,18 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
sd_config "github.com/prometheus/prometheus/discovery/config"
|
||||
"gopkg.in/yaml.v2"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/relabel"
|
||||
)
|
||||
|
||||
var (
|
||||
patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`)
|
||||
relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$(?:\{\w+\}|\w+))+\w*)+$`)
|
||||
patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`)
|
||||
)
|
||||
|
||||
// Load parses the YAML input s into a Config.
|
||||
|
@ -57,7 +60,7 @@ func LoadFile(filename string) (*Config, error) {
|
|||
}
|
||||
cfg, err := Load(string(content))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing YAML file %s: %v", filename, err)
|
||||
return nil, errors.Wrapf(err, "parsing YAML file %s", filename)
|
||||
}
|
||||
resolveFilepaths(filepath.Dir(filename), cfg)
|
||||
return cfg, nil
|
||||
|
@ -81,9 +84,10 @@ var (
|
|||
DefaultScrapeConfig = ScrapeConfig{
|
||||
// ScrapeTimeout and ScrapeInterval default to the
|
||||
// configured globals.
|
||||
MetricsPath: "/metrics",
|
||||
Scheme: "http",
|
||||
HonorLabels: false,
|
||||
MetricsPath: "/metrics",
|
||||
Scheme: "http",
|
||||
HonorLabels: false,
|
||||
HonorTimestamps: true,
|
||||
}
|
||||
|
||||
// DefaultAlertmanagerConfig is the default alertmanager configuration.
|
||||
|
@ -92,14 +96,6 @@ var (
|
|||
Timeout: model.Duration(10 * time.Second),
|
||||
}
|
||||
|
||||
// DefaultRelabelConfig is the default Relabel configuration.
|
||||
DefaultRelabelConfig = RelabelConfig{
|
||||
Action: RelabelReplace,
|
||||
Separator: ";",
|
||||
Regex: MustNewRegexp("(.*)"),
|
||||
Replacement: "$1",
|
||||
}
|
||||
|
||||
// DefaultRemoteWriteConfig is the default remote write configuration.
|
||||
DefaultRemoteWriteConfig = RemoteWriteConfig{
|
||||
RemoteTimeout: model.Duration(30 * time.Second),
|
||||
|
@ -111,15 +107,16 @@ var (
|
|||
// With a maximum of 1000 shards, assuming an average of 100ms remote write
|
||||
// time and 100 samples per batch, we will be able to push 1M samples/s.
|
||||
MaxShards: 1000,
|
||||
MinShards: 1,
|
||||
MaxSamplesPerSend: 100,
|
||||
|
||||
// By default, buffer 100 batches, which at 100ms per batch is 10s. At
|
||||
// 1000 shards, this will buffer 10M samples total.
|
||||
Capacity: 100 * 100,
|
||||
// Each shard will have a max of 10 samples pending in it's channel, plus the pending
|
||||
// samples that have been enqueued. Theoretically we should only ever have about 110 samples
|
||||
// per shard pending. At 1000 shards that's 110k.
|
||||
Capacity: 10,
|
||||
BatchSendDeadline: model.Duration(5 * time.Second),
|
||||
|
||||
// Max number of times to retry a batch on recoverable errors.
|
||||
MaxRetries: 3,
|
||||
// Backoff times for retrying a batch of samples on recoverable errors.
|
||||
MinBackoff: model.Duration(30 * time.Millisecond),
|
||||
MaxBackoff: model.Duration(100 * time.Millisecond),
|
||||
}
|
||||
|
@ -158,30 +155,34 @@ func resolveFilepaths(baseDir string, cfg *Config) {
|
|||
cfg.RuleFiles[i] = join(rf)
|
||||
}
|
||||
|
||||
tlsPaths := func(cfg *config_util.TLSConfig) {
|
||||
cfg.CAFile = join(cfg.CAFile)
|
||||
cfg.CertFile = join(cfg.CertFile)
|
||||
cfg.KeyFile = join(cfg.KeyFile)
|
||||
}
|
||||
clientPaths := func(scfg *config_util.HTTPClientConfig) {
|
||||
if scfg.BasicAuth != nil {
|
||||
scfg.BasicAuth.PasswordFile = join(scfg.BasicAuth.PasswordFile)
|
||||
}
|
||||
scfg.BearerTokenFile = join(scfg.BearerTokenFile)
|
||||
scfg.TLSConfig.CAFile = join(scfg.TLSConfig.CAFile)
|
||||
scfg.TLSConfig.CertFile = join(scfg.TLSConfig.CertFile)
|
||||
scfg.TLSConfig.KeyFile = join(scfg.TLSConfig.KeyFile)
|
||||
tlsPaths(&scfg.TLSConfig)
|
||||
}
|
||||
sdPaths := func(cfg *sd_config.ServiceDiscoveryConfig) {
|
||||
for _, kcfg := range cfg.KubernetesSDConfigs {
|
||||
kcfg.BearerTokenFile = join(kcfg.BearerTokenFile)
|
||||
kcfg.TLSConfig.CAFile = join(kcfg.TLSConfig.CAFile)
|
||||
kcfg.TLSConfig.CertFile = join(kcfg.TLSConfig.CertFile)
|
||||
kcfg.TLSConfig.KeyFile = join(kcfg.TLSConfig.KeyFile)
|
||||
clientPaths(&kcfg.HTTPClientConfig)
|
||||
}
|
||||
for _, mcfg := range cfg.MarathonSDConfigs {
|
||||
mcfg.AuthTokenFile = join(mcfg.AuthTokenFile)
|
||||
mcfg.HTTPClientConfig.BearerTokenFile = join(mcfg.HTTPClientConfig.BearerTokenFile)
|
||||
mcfg.HTTPClientConfig.TLSConfig.CAFile = join(mcfg.HTTPClientConfig.TLSConfig.CAFile)
|
||||
mcfg.HTTPClientConfig.TLSConfig.CertFile = join(mcfg.HTTPClientConfig.TLSConfig.CertFile)
|
||||
mcfg.HTTPClientConfig.TLSConfig.KeyFile = join(mcfg.HTTPClientConfig.TLSConfig.KeyFile)
|
||||
clientPaths(&mcfg.HTTPClientConfig)
|
||||
}
|
||||
for _, consulcfg := range cfg.ConsulSDConfigs {
|
||||
consulcfg.TLSConfig.CAFile = join(consulcfg.TLSConfig.CAFile)
|
||||
consulcfg.TLSConfig.CertFile = join(consulcfg.TLSConfig.CertFile)
|
||||
consulcfg.TLSConfig.KeyFile = join(consulcfg.TLSConfig.KeyFile)
|
||||
tlsPaths(&consulcfg.TLSConfig)
|
||||
}
|
||||
for _, cfg := range cfg.OpenstackSDConfigs {
|
||||
tlsPaths(&cfg.TLSConfig)
|
||||
}
|
||||
for _, cfg := range cfg.TritonSDConfigs {
|
||||
tlsPaths(&cfg.TLSConfig)
|
||||
}
|
||||
for _, filecfg := range cfg.FileSDConfigs {
|
||||
for i, fn := range filecfg.Files {
|
||||
|
@ -198,6 +199,12 @@ func resolveFilepaths(baseDir string, cfg *Config) {
|
|||
clientPaths(&cfg.HTTPClientConfig)
|
||||
sdPaths(&cfg.ServiceDiscoveryConfig)
|
||||
}
|
||||
for _, cfg := range cfg.RemoteReadConfigs {
|
||||
clientPaths(&cfg.HTTPClientConfig)
|
||||
}
|
||||
for _, cfg := range cfg.RemoteWriteConfigs {
|
||||
clientPaths(&cfg.HTTPClientConfig)
|
||||
}
|
||||
}
|
||||
|
||||
func (c Config) String() string {
|
||||
|
@ -227,19 +234,22 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
|
||||
for _, rf := range c.RuleFiles {
|
||||
if !patRulePath.MatchString(rf) {
|
||||
return fmt.Errorf("invalid rule file path %q", rf)
|
||||
return errors.Errorf("invalid rule file path %q", rf)
|
||||
}
|
||||
}
|
||||
// Do global overrides and validate unique names.
|
||||
jobNames := map[string]struct{}{}
|
||||
for _, scfg := range c.ScrapeConfigs {
|
||||
if scfg == nil {
|
||||
return errors.New("empty or null scrape config section")
|
||||
}
|
||||
// First set the correct scrape interval, then check that the timeout
|
||||
// (inferred or explicit) is not greater than that.
|
||||
if scfg.ScrapeInterval == 0 {
|
||||
scfg.ScrapeInterval = c.GlobalConfig.ScrapeInterval
|
||||
}
|
||||
if scfg.ScrapeTimeout > scfg.ScrapeInterval {
|
||||
return fmt.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName)
|
||||
return errors.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName)
|
||||
}
|
||||
if scfg.ScrapeTimeout == 0 {
|
||||
if c.GlobalConfig.ScrapeTimeout > scfg.ScrapeInterval {
|
||||
|
@ -250,10 +260,20 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
}
|
||||
|
||||
if _, ok := jobNames[scfg.JobName]; ok {
|
||||
return fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
|
||||
return errors.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
|
||||
}
|
||||
jobNames[scfg.JobName] = struct{}{}
|
||||
}
|
||||
for _, rwcfg := range c.RemoteWriteConfigs {
|
||||
if rwcfg == nil {
|
||||
return errors.New("empty or null remote write config section")
|
||||
}
|
||||
}
|
||||
for _, rrcfg := range c.RemoteReadConfigs {
|
||||
if rrcfg == nil {
|
||||
return errors.New("empty or null remote read config section")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -267,7 +287,7 @@ type GlobalConfig struct {
|
|||
// How frequently to evaluate rules by default.
|
||||
EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"`
|
||||
// The labels to add to any timeseries that this Prometheus instance scrapes.
|
||||
ExternalLabels model.LabelSet `yaml:"external_labels,omitempty"`
|
||||
ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
|
@ -280,13 +300,22 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
|
||||
for _, l := range gc.ExternalLabels {
|
||||
if !model.LabelName(l.Name).IsValid() {
|
||||
return errors.Errorf("%q is not a valid label name", l.Name)
|
||||
}
|
||||
if !model.LabelValue(l.Value).IsValid() {
|
||||
return errors.Errorf("%q is not a valid label value", l.Value)
|
||||
}
|
||||
}
|
||||
|
||||
// First set the correct scrape interval, then check that the timeout
|
||||
// (inferred or explicit) is not greater than that.
|
||||
if gc.ScrapeInterval == 0 {
|
||||
gc.ScrapeInterval = DefaultGlobalConfig.ScrapeInterval
|
||||
}
|
||||
if gc.ScrapeTimeout > gc.ScrapeInterval {
|
||||
return fmt.Errorf("global scrape timeout greater than scrape interval")
|
||||
return errors.New("global scrape timeout greater than scrape interval")
|
||||
}
|
||||
if gc.ScrapeTimeout == 0 {
|
||||
if DefaultGlobalConfig.ScrapeTimeout > gc.ScrapeInterval {
|
||||
|
@ -316,6 +345,8 @@ type ScrapeConfig struct {
|
|||
JobName string `yaml:"job_name"`
|
||||
// Indicator whether the scraped metrics should remain unmodified.
|
||||
HonorLabels bool `yaml:"honor_labels,omitempty"`
|
||||
// Indicator whether the scraped timestamps should be respected.
|
||||
HonorTimestamps bool `yaml:"honor_timestamps"`
|
||||
// A set of query parameters with which the target is scraped.
|
||||
Params url.Values `yaml:"params,omitempty"`
|
||||
// How frequently to scrape the targets of this scrape config.
|
||||
|
@ -336,9 +367,9 @@ type ScrapeConfig struct {
|
|||
HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"`
|
||||
|
||||
// List of target relabel configurations.
|
||||
RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"`
|
||||
RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"`
|
||||
// List of metric relabel configurations.
|
||||
MetricRelabelConfigs []*RelabelConfig `yaml:"metric_relabel_configs,omitempty"`
|
||||
MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
|
@ -350,7 +381,7 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if len(c.JobName) == 0 {
|
||||
return fmt.Errorf("job_name is empty")
|
||||
return errors.New("job_name is empty")
|
||||
}
|
||||
|
||||
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
|
||||
|
@ -360,6 +391,13 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer.
|
||||
// We cannot make it a pointer as the parser panics for inlined pointer structs.
|
||||
// Thus we just do its validation here.
|
||||
if err := c.ServiceDiscoveryConfig.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check for users putting URLs in target groups.
|
||||
if len(c.RelabelConfigs) == 0 {
|
||||
for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
|
||||
|
@ -371,6 +409,17 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
}
|
||||
}
|
||||
|
||||
for _, rlcfg := range c.RelabelConfigs {
|
||||
if rlcfg == nil {
|
||||
return errors.New("empty or null target relabeling rule in scrape config")
|
||||
}
|
||||
}
|
||||
for _, rlcfg := range c.MetricRelabelConfigs {
|
||||
if rlcfg == nil {
|
||||
return errors.New("empty or null metric relabeling rule in scrape config")
|
||||
}
|
||||
}
|
||||
|
||||
// Add index to the static config target groups for unique identification
|
||||
// within scrape pool.
|
||||
for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
|
||||
|
@ -382,7 +431,7 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
|
||||
// AlertingConfig configures alerting and alertmanager related configs.
|
||||
type AlertingConfig struct {
|
||||
AlertRelabelConfigs []*RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"`
|
||||
AlertmanagerConfigs []*AlertmanagerConfig `yaml:"alertmanagers,omitempty"`
|
||||
}
|
||||
|
||||
|
@ -392,7 +441,16 @@ func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error
|
|||
// by the default due to the YAML parser behavior for empty blocks.
|
||||
*c = AlertingConfig{}
|
||||
type plain AlertingConfig
|
||||
return unmarshal((*plain)(c))
|
||||
if err := unmarshal((*plain)(c)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, rlcfg := range c.AlertRelabelConfigs {
|
||||
if rlcfg == nil {
|
||||
return errors.New("empty or null alert relabeling rule")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with.
|
||||
|
@ -411,7 +469,7 @@ type AlertmanagerConfig struct {
|
|||
Timeout model.Duration `yaml:"timeout,omitempty"`
|
||||
|
||||
// List of Alertmanager relabel configurations.
|
||||
RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"`
|
||||
RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
|
@ -429,6 +487,13 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
|
|||
return err
|
||||
}
|
||||
|
||||
// The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer.
|
||||
// We cannot make it a pointer as the parser panics for inlined pointer structs.
|
||||
// Thus we just do its validation here.
|
||||
if err := c.ServiceDiscoveryConfig.Validate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check for users putting URLs in target groups.
|
||||
if len(c.RelabelConfigs) == 0 {
|
||||
for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
|
||||
|
@ -440,6 +505,12 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
|
|||
}
|
||||
}
|
||||
|
||||
for _, rlcfg := range c.RelabelConfigs {
|
||||
if rlcfg == nil {
|
||||
return errors.New("empty or null Alertmanager target relabeling rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Add index to the static config target groups for unique identification
|
||||
// within scrape pool.
|
||||
for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
|
||||
|
@ -453,7 +524,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
|
|||
func CheckTargetAddress(address model.LabelValue) error {
|
||||
// For now check for a URL, we may want to expand this later.
|
||||
if strings.Contains(string(address), "/") {
|
||||
return fmt.Errorf("%q is not a valid hostname", address)
|
||||
return errors.Errorf("%q is not a valid hostname", address)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -470,151 +541,11 @@ type FileSDConfig struct {
|
|||
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
|
||||
}
|
||||
|
||||
// RelabelAction is the action to be performed on relabeling.
|
||||
type RelabelAction string
|
||||
|
||||
const (
|
||||
// RelabelReplace performs a regex replacement.
|
||||
RelabelReplace RelabelAction = "replace"
|
||||
// RelabelKeep drops targets for which the input does not match the regex.
|
||||
RelabelKeep RelabelAction = "keep"
|
||||
// RelabelDrop drops targets for which the input does match the regex.
|
||||
RelabelDrop RelabelAction = "drop"
|
||||
// RelabelHashMod sets a label to the modulus of a hash of labels.
|
||||
RelabelHashMod RelabelAction = "hashmod"
|
||||
// RelabelLabelMap copies labels to other labelnames based on a regex.
|
||||
RelabelLabelMap RelabelAction = "labelmap"
|
||||
// RelabelLabelDrop drops any label matching the regex.
|
||||
RelabelLabelDrop RelabelAction = "labeldrop"
|
||||
// RelabelLabelKeep drops any label not matching the regex.
|
||||
RelabelLabelKeep RelabelAction = "labelkeep"
|
||||
)
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (a *RelabelAction) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
switch act := RelabelAction(strings.ToLower(s)); act {
|
||||
case RelabelReplace, RelabelKeep, RelabelDrop, RelabelHashMod, RelabelLabelMap, RelabelLabelDrop, RelabelLabelKeep:
|
||||
*a = act
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unknown relabel action %q", s)
|
||||
}
|
||||
|
||||
// RelabelConfig is the configuration for relabeling of target label sets.
|
||||
type RelabelConfig struct {
|
||||
// A list of labels from which values are taken and concatenated
|
||||
// with the configured separator in order.
|
||||
SourceLabels model.LabelNames `yaml:"source_labels,flow,omitempty"`
|
||||
// Separator is the string between concatenated values from the source labels.
|
||||
Separator string `yaml:"separator,omitempty"`
|
||||
// Regex against which the concatenation is matched.
|
||||
Regex Regexp `yaml:"regex,omitempty"`
|
||||
// Modulus to take of the hash of concatenated values from the source labels.
|
||||
Modulus uint64 `yaml:"modulus,omitempty"`
|
||||
// TargetLabel is the label to which the resulting string is written in a replacement.
|
||||
// Regexp interpolation is allowed for the replace action.
|
||||
TargetLabel string `yaml:"target_label,omitempty"`
|
||||
// Replacement is the regex replacement pattern to be used.
|
||||
Replacement string `yaml:"replacement,omitempty"`
|
||||
// Action is the action to be performed for the relabeling.
|
||||
Action RelabelAction `yaml:"action,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
*c = DefaultRelabelConfig
|
||||
type plain RelabelConfig
|
||||
if err := unmarshal((*plain)(c)); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Regex.Regexp == nil {
|
||||
c.Regex = MustNewRegexp("")
|
||||
}
|
||||
if c.Modulus == 0 && c.Action == RelabelHashMod {
|
||||
return fmt.Errorf("relabel configuration for hashmod requires non-zero modulus")
|
||||
}
|
||||
if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" {
|
||||
return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action)
|
||||
}
|
||||
if c.Action == RelabelReplace && !relabelTarget.MatchString(c.TargetLabel) {
|
||||
return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action)
|
||||
}
|
||||
if c.Action == RelabelLabelMap && !relabelTarget.MatchString(c.Replacement) {
|
||||
return fmt.Errorf("%q is invalid 'replacement' for %s action", c.Replacement, c.Action)
|
||||
}
|
||||
if c.Action == RelabelHashMod && !model.LabelName(c.TargetLabel).IsValid() {
|
||||
return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action)
|
||||
}
|
||||
|
||||
if c.Action == RelabelLabelDrop || c.Action == RelabelLabelKeep {
|
||||
if c.SourceLabels != nil ||
|
||||
c.TargetLabel != DefaultRelabelConfig.TargetLabel ||
|
||||
c.Modulus != DefaultRelabelConfig.Modulus ||
|
||||
c.Separator != DefaultRelabelConfig.Separator ||
|
||||
c.Replacement != DefaultRelabelConfig.Replacement {
|
||||
return fmt.Errorf("%s action requires only 'regex', and no other fields", c.Action)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Regexp encapsulates a regexp.Regexp and makes it YAML marshallable.
|
||||
type Regexp struct {
|
||||
*regexp.Regexp
|
||||
original string
|
||||
}
|
||||
|
||||
// NewRegexp creates a new anchored Regexp and returns an error if the
|
||||
// passed-in regular expression does not compile.
|
||||
func NewRegexp(s string) (Regexp, error) {
|
||||
regex, err := regexp.Compile("^(?:" + s + ")$")
|
||||
return Regexp{
|
||||
Regexp: regex,
|
||||
original: s,
|
||||
}, err
|
||||
}
|
||||
|
||||
// MustNewRegexp works like NewRegexp, but panics if the regular expression does not compile.
|
||||
func MustNewRegexp(s string) Regexp {
|
||||
re, err := NewRegexp(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return re
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (re *Regexp) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
r, err := NewRegexp(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*re = r
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalYAML implements the yaml.Marshaler interface.
|
||||
func (re Regexp) MarshalYAML() (interface{}, error) {
|
||||
if re.original != "" {
|
||||
return re.original, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// RemoteWriteConfig is the configuration for writing to remote storage.
|
||||
type RemoteWriteConfig struct {
|
||||
URL *config_util.URL `yaml:"url"`
|
||||
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
|
||||
WriteRelabelConfigs []*RelabelConfig `yaml:"write_relabel_configs,omitempty"`
|
||||
URL *config_util.URL `yaml:"url"`
|
||||
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
|
||||
WriteRelabelConfigs []*relabel.Config `yaml:"write_relabel_configs,omitempty"`
|
||||
|
||||
// We cannot do proper Go type embedding below as the parser will then parse
|
||||
// values arbitrarily into the overflow maps of further-down types.
|
||||
|
@ -630,7 +561,12 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
|
|||
return err
|
||||
}
|
||||
if c.URL == nil {
|
||||
return fmt.Errorf("url for remote_write is empty")
|
||||
return errors.New("url for remote_write is empty")
|
||||
}
|
||||
for _, rlcfg := range c.WriteRelabelConfigs {
|
||||
if rlcfg == nil {
|
||||
return errors.New("empty or null relabeling rule in remote write config")
|
||||
}
|
||||
}
|
||||
|
||||
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
|
||||
|
@ -648,15 +584,15 @@ type QueueConfig struct {
|
|||
// Max number of shards, i.e. amount of concurrency.
|
||||
MaxShards int `yaml:"max_shards,omitempty"`
|
||||
|
||||
// Min number of shards, i.e. amount of concurrency.
|
||||
MinShards int `yaml:"min_shards,omitempty"`
|
||||
|
||||
// Maximum number of samples per send.
|
||||
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
|
||||
|
||||
// Maximum time sample will wait in buffer.
|
||||
BatchSendDeadline model.Duration `yaml:"batch_send_deadline,omitempty"`
|
||||
|
||||
// Max number of times to retry a batch on recoverable errors.
|
||||
MaxRetries int `yaml:"max_retries,omitempty"`
|
||||
|
||||
// On recoverable errors, backoff exponentially.
|
||||
MinBackoff model.Duration `yaml:"min_backoff,omitempty"`
|
||||
MaxBackoff model.Duration `yaml:"max_backoff,omitempty"`
|
||||
|
@ -684,7 +620,7 @@ func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro
|
|||
return err
|
||||
}
|
||||
if c.URL == nil {
|
||||
return fmt.Errorf("url for remote_read is empty")
|
||||
return errors.New("url for remote_read is empty")
|
||||
}
|
||||
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
|
||||
// We cannot make it a pointer as the parser panics for inlined pointer structs.
|
||||
|
|
|
@ -23,7 +23,13 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/azure"
|
||||
sd_config "github.com/prometheus/prometheus/discovery/config"
|
||||
"github.com/prometheus/prometheus/discovery/consul"
|
||||
"github.com/prometheus/prometheus/discovery/dns"
|
||||
"github.com/prometheus/prometheus/discovery/ec2"
|
||||
|
@ -34,12 +40,9 @@ import (
|
|||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/discovery/triton"
|
||||
"github.com/prometheus/prometheus/discovery/zookeeper"
|
||||
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
sd_config "github.com/prometheus/prometheus/discovery/config"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/relabel"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
func mustParseURL(u string) *config_util.URL {
|
||||
|
@ -56,9 +59,9 @@ var expectedConf = &Config{
|
|||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
EvaluationInterval: model.Duration(30 * time.Second),
|
||||
|
||||
ExternalLabels: model.LabelSet{
|
||||
"monitor": "codelab",
|
||||
"foo": "bar",
|
||||
ExternalLabels: labels.Labels{
|
||||
{Name: "foo", Value: "bar"},
|
||||
{Name: "monitor", Value: "codelab"},
|
||||
},
|
||||
},
|
||||
|
||||
|
@ -71,13 +74,13 @@ var expectedConf = &Config{
|
|||
{
|
||||
URL: mustParseURL("http://remote1/push"),
|
||||
RemoteTimeout: model.Duration(30 * time.Second),
|
||||
WriteRelabelConfigs: []*RelabelConfig{
|
||||
WriteRelabelConfigs: []*relabel.Config{
|
||||
{
|
||||
SourceLabels: model.LabelNames{"__name__"},
|
||||
Separator: ";",
|
||||
Regex: MustNewRegexp("expensive.*"),
|
||||
Regex: relabel.MustNewRegexp("expensive.*"),
|
||||
Replacement: "$1",
|
||||
Action: RelabelDrop,
|
||||
Action: relabel.Drop,
|
||||
},
|
||||
},
|
||||
QueueConfig: DefaultQueueConfig,
|
||||
|
@ -86,6 +89,12 @@ var expectedConf = &Config{
|
|||
URL: mustParseURL("http://remote2/push"),
|
||||
RemoteTimeout: model.Duration(30 * time.Second),
|
||||
QueueConfig: DefaultQueueConfig,
|
||||
HTTPClientConfig: config_util.HTTPClientConfig{
|
||||
TLSConfig: config_util.TLSConfig{
|
||||
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
|
||||
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
|
@ -100,6 +109,12 @@ var expectedConf = &Config{
|
|||
RemoteTimeout: model.Duration(1 * time.Minute),
|
||||
ReadRecent: false,
|
||||
RequiredMatchers: model.LabelSet{"job": "special"},
|
||||
HTTPClientConfig: config_util.HTTPClientConfig{
|
||||
TLSConfig: config_util.TLSConfig{
|
||||
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
|
||||
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
|
@ -107,9 +122,10 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "prometheus",
|
||||
|
||||
HonorLabels: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorLabels: true,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -145,33 +161,33 @@ var expectedConf = &Config{
|
|||
},
|
||||
},
|
||||
|
||||
RelabelConfigs: []*RelabelConfig{
|
||||
RelabelConfigs: []*relabel.Config{
|
||||
{
|
||||
SourceLabels: model.LabelNames{"job", "__meta_dns_name"},
|
||||
TargetLabel: "job",
|
||||
Separator: ";",
|
||||
Regex: MustNewRegexp("(.*)some-[regex]"),
|
||||
Regex: relabel.MustNewRegexp("(.*)some-[regex]"),
|
||||
Replacement: "foo-${1}",
|
||||
Action: RelabelReplace,
|
||||
Action: relabel.Replace,
|
||||
}, {
|
||||
SourceLabels: model.LabelNames{"abc"},
|
||||
TargetLabel: "cde",
|
||||
Separator: ";",
|
||||
Regex: DefaultRelabelConfig.Regex,
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelReplace,
|
||||
Regex: relabel.DefaultRelabelConfig.Regex,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.Replace,
|
||||
}, {
|
||||
TargetLabel: "abc",
|
||||
Separator: ";",
|
||||
Regex: DefaultRelabelConfig.Regex,
|
||||
Regex: relabel.DefaultRelabelConfig.Regex,
|
||||
Replacement: "static",
|
||||
Action: RelabelReplace,
|
||||
Action: relabel.Replace,
|
||||
}, {
|
||||
TargetLabel: "abc",
|
||||
Separator: ";",
|
||||
Regex: MustNewRegexp(""),
|
||||
Regex: relabel.MustNewRegexp(""),
|
||||
Replacement: "static",
|
||||
Action: RelabelReplace,
|
||||
Action: relabel.Replace,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -179,9 +195,10 @@ var expectedConf = &Config{
|
|||
|
||||
JobName: "service-x",
|
||||
|
||||
ScrapeInterval: model.Duration(50 * time.Second),
|
||||
ScrapeTimeout: model.Duration(5 * time.Second),
|
||||
SampleLimit: 1000,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(50 * time.Second),
|
||||
ScrapeTimeout: model.Duration(5 * time.Second),
|
||||
SampleLimit: 1000,
|
||||
|
||||
HTTPClientConfig: config_util.HTTPClientConfig{
|
||||
BasicAuth: &config_util.BasicAuth{
|
||||
|
@ -212,64 +229,65 @@ var expectedConf = &Config{
|
|||
},
|
||||
},
|
||||
|
||||
RelabelConfigs: []*RelabelConfig{
|
||||
RelabelConfigs: []*relabel.Config{
|
||||
{
|
||||
SourceLabels: model.LabelNames{"job"},
|
||||
Regex: MustNewRegexp("(.*)some-[regex]"),
|
||||
Regex: relabel.MustNewRegexp("(.*)some-[regex]"),
|
||||
Separator: ";",
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelDrop,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.Drop,
|
||||
},
|
||||
{
|
||||
SourceLabels: model.LabelNames{"__address__"},
|
||||
TargetLabel: "__tmp_hash",
|
||||
Regex: DefaultRelabelConfig.Regex,
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Regex: relabel.DefaultRelabelConfig.Regex,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Modulus: 8,
|
||||
Separator: ";",
|
||||
Action: RelabelHashMod,
|
||||
Action: relabel.HashMod,
|
||||
},
|
||||
{
|
||||
SourceLabels: model.LabelNames{"__tmp_hash"},
|
||||
Regex: MustNewRegexp("1"),
|
||||
Regex: relabel.MustNewRegexp("1"),
|
||||
Separator: ";",
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelKeep,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.Keep,
|
||||
},
|
||||
{
|
||||
Regex: MustNewRegexp("1"),
|
||||
Regex: relabel.MustNewRegexp("1"),
|
||||
Separator: ";",
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelLabelMap,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.LabelMap,
|
||||
},
|
||||
{
|
||||
Regex: MustNewRegexp("d"),
|
||||
Regex: relabel.MustNewRegexp("d"),
|
||||
Separator: ";",
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelLabelDrop,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.LabelDrop,
|
||||
},
|
||||
{
|
||||
Regex: MustNewRegexp("k"),
|
||||
Regex: relabel.MustNewRegexp("k"),
|
||||
Separator: ";",
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelLabelKeep,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.LabelKeep,
|
||||
},
|
||||
},
|
||||
MetricRelabelConfigs: []*RelabelConfig{
|
||||
MetricRelabelConfigs: []*relabel.Config{
|
||||
{
|
||||
SourceLabels: model.LabelNames{"__name__"},
|
||||
Regex: MustNewRegexp("expensive_metric.*"),
|
||||
Regex: relabel.MustNewRegexp("expensive_metric.*"),
|
||||
Separator: ";",
|
||||
Replacement: DefaultRelabelConfig.Replacement,
|
||||
Action: RelabelDrop,
|
||||
Replacement: relabel.DefaultRelabelConfig.Replacement,
|
||||
Action: relabel.Drop,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
JobName: "service-y",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -280,7 +298,7 @@ var expectedConf = &Config{
|
|||
Server: "localhost:1234",
|
||||
Token: "mysecret",
|
||||
Services: []string{"nginx", "cache", "mysql"},
|
||||
ServiceTag: "canary",
|
||||
ServiceTags: []string{"canary", "v1"},
|
||||
NodeMeta: map[string]string{"rack": "123"},
|
||||
TagSeparator: consul.DefaultSDConfig.TagSeparator,
|
||||
Scheme: "https",
|
||||
|
@ -296,22 +314,23 @@ var expectedConf = &Config{
|
|||
},
|
||||
},
|
||||
|
||||
RelabelConfigs: []*RelabelConfig{
|
||||
RelabelConfigs: []*relabel.Config{
|
||||
{
|
||||
SourceLabels: model.LabelNames{"__meta_sd_consul_tags"},
|
||||
Regex: MustNewRegexp("label:([^=]+)=([^,]+)"),
|
||||
Regex: relabel.MustNewRegexp("label:([^=]+)=([^,]+)"),
|
||||
Separator: ",",
|
||||
TargetLabel: "${1}",
|
||||
Replacement: "${2}",
|
||||
Action: RelabelReplace,
|
||||
Action: relabel.Replace,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
JobName: "service-z",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: model.Duration(10 * time.Second),
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: model.Duration(10 * time.Second),
|
||||
|
||||
MetricsPath: "/metrics",
|
||||
Scheme: "http",
|
||||
|
@ -328,8 +347,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-kubernetes",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -339,9 +359,15 @@ var expectedConf = &Config{
|
|||
{
|
||||
APIServer: kubernetesSDHostURL(),
|
||||
Role: kubernetes.RoleEndpoint,
|
||||
BasicAuth: &config_util.BasicAuth{
|
||||
Username: "myusername",
|
||||
Password: "mysecret",
|
||||
HTTPClientConfig: config_util.HTTPClientConfig{
|
||||
BasicAuth: &config_util.BasicAuth{
|
||||
Username: "myusername",
|
||||
Password: "mysecret",
|
||||
},
|
||||
TLSConfig: config_util.TLSConfig{
|
||||
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
|
||||
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
|
||||
},
|
||||
},
|
||||
NamespaceDiscovery: kubernetes.NamespaceDiscovery{},
|
||||
},
|
||||
|
@ -351,11 +377,18 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-kubernetes-namespaces",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
HTTPClientConfig: config_util.HTTPClientConfig{
|
||||
BasicAuth: &config_util.BasicAuth{
|
||||
Username: "myusername",
|
||||
PasswordFile: filepath.FromSlash("testdata/valid_password_file"),
|
||||
},
|
||||
},
|
||||
|
||||
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
|
||||
KubernetesSDConfigs: []*kubernetes.SDConfig{
|
||||
|
@ -374,8 +407,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-marathon",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -401,8 +435,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-ec2",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -433,8 +468,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-azure",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -442,13 +478,14 @@ var expectedConf = &Config{
|
|||
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
|
||||
AzureSDConfigs: []*azure.SDConfig{
|
||||
{
|
||||
Environment: "AzurePublicCloud",
|
||||
SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11",
|
||||
TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2",
|
||||
ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C",
|
||||
ClientSecret: "mysecret",
|
||||
RefreshInterval: model.Duration(5 * time.Minute),
|
||||
Port: 9100,
|
||||
Environment: "AzurePublicCloud",
|
||||
SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11",
|
||||
TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2",
|
||||
ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C",
|
||||
ClientSecret: "mysecret",
|
||||
AuthenticationMethod: "OAuth",
|
||||
RefreshInterval: model.Duration(5 * time.Minute),
|
||||
Port: 9100,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -456,8 +493,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-nerve",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -475,8 +513,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "0123service-xxx",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -492,11 +531,33 @@ var expectedConf = &Config{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
JobName: "badfederation",
|
||||
|
||||
HonorTimestamps: false,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: "/federate",
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
||||
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
|
||||
StaticConfigs: []*targetgroup.Group{
|
||||
{
|
||||
Targets: []model.LabelSet{
|
||||
{model.AddressLabel: "localhost:9090"},
|
||||
},
|
||||
Source: "0",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
JobName: "測試",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -515,8 +576,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-triton",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -542,8 +604,9 @@ var expectedConf = &Config{
|
|||
{
|
||||
JobName: "service-openstack",
|
||||
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
HonorTimestamps: true,
|
||||
ScrapeInterval: model.Duration(15 * time.Second),
|
||||
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
|
||||
|
||||
MetricsPath: DefaultScrapeConfig.MetricsPath,
|
||||
Scheme: DefaultScrapeConfig.Scheme,
|
||||
|
@ -556,9 +619,9 @@ var expectedConf = &Config{
|
|||
Port: 80,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
TLSConfig: config_util.TLSConfig{
|
||||
CAFile: "valid_ca_file",
|
||||
CertFile: "valid_cert_file",
|
||||
KeyFile: "valid_key_file",
|
||||
CAFile: "testdata/valid_ca_file",
|
||||
CertFile: "testdata/valid_cert_file",
|
||||
KeyFile: "testdata/valid_key_file",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -598,10 +661,10 @@ func TestLoadConfig(t *testing.T) {
|
|||
testutil.Ok(t, err)
|
||||
|
||||
expectedConf.original = c.original
|
||||
testutil.Equals(t, expectedConf, c)
|
||||
assert.Equal(t, expectedConf, c)
|
||||
}
|
||||
|
||||
// YAML marshalling must not reveal authentication credentials.
|
||||
// YAML marshaling must not reveal authentication credentials.
|
||||
func TestElideSecrets(t *testing.T) {
|
||||
c, err := LoadFile("testdata/conf.good.yml")
|
||||
testutil.Ok(t, err)
|
||||
|
@ -627,6 +690,11 @@ func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) {
|
|||
testutil.Equals(t, ruleFilesExpectedConf, c)
|
||||
}
|
||||
|
||||
func TestKubernetesEmptyAPIServer(t *testing.T) {
|
||||
_, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml")
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
var expectedErrors = []struct {
|
||||
filename string
|
||||
errMsg string
|
||||
|
@ -646,6 +714,9 @@ var expectedErrors = []struct {
|
|||
}, {
|
||||
filename: "labelname2.bad.yml",
|
||||
errMsg: `"not:allowed" is not a valid label name`,
|
||||
}, {
|
||||
filename: "labelvalue.bad.yml",
|
||||
errMsg: `"\xff" is not a valid label value`,
|
||||
}, {
|
||||
filename: "regex.bad.yml",
|
||||
errMsg: "error parsing regexp",
|
||||
|
@ -697,6 +768,9 @@ var expectedErrors = []struct {
|
|||
}, {
|
||||
filename: "bearertoken_basicauth.bad.yml",
|
||||
errMsg: "at most one of basic_auth, bearer_token & bearer_token_file must be configured",
|
||||
}, {
|
||||
filename: "kubernetes_http_config_without_api_server.bad.yml",
|
||||
errMsg: "to use custom HTTP client configuration please provide the 'api_server' URL explicitly",
|
||||
}, {
|
||||
filename: "kubernetes_bearertoken.bad.yml",
|
||||
errMsg: "at most one of bearer_token & bearer_token_file must be configured",
|
||||
|
@ -751,6 +825,62 @@ var expectedErrors = []struct {
|
|||
filename: "section_key_dup.bad.yml",
|
||||
errMsg: "field scrape_configs already set in type config.plain",
|
||||
},
|
||||
{
|
||||
filename: "azure_client_id_missing.bad.yml",
|
||||
errMsg: "azure SD configuration requires a client_id",
|
||||
},
|
||||
{
|
||||
filename: "azure_client_secret_missing.bad.yml",
|
||||
errMsg: "azure SD configuration requires a client_secret",
|
||||
},
|
||||
{
|
||||
filename: "azure_subscription_id_missing.bad.yml",
|
||||
errMsg: "azure SD configuration requires a subscription_id",
|
||||
},
|
||||
{
|
||||
filename: "azure_tenant_id_missing.bad.yml",
|
||||
errMsg: "azure SD configuration requires a tenant_id",
|
||||
},
|
||||
{
|
||||
filename: "azure_authentication_method.bad.yml",
|
||||
errMsg: "unknown authentication_type \"invalid\". Supported types are \"OAuth\" or \"ManagedIdentity\"",
|
||||
},
|
||||
{
|
||||
filename: "empty_scrape_config.bad.yml",
|
||||
errMsg: "empty or null scrape config section",
|
||||
},
|
||||
{
|
||||
filename: "empty_rw_config.bad.yml",
|
||||
errMsg: "empty or null remote write config section",
|
||||
},
|
||||
{
|
||||
filename: "empty_rr_config.bad.yml",
|
||||
errMsg: "empty or null remote read config section",
|
||||
},
|
||||
{
|
||||
filename: "empty_target_relabel_config.bad.yml",
|
||||
errMsg: "empty or null target relabeling rule",
|
||||
},
|
||||
{
|
||||
filename: "empty_metric_relabel_config.bad.yml",
|
||||
errMsg: "empty or null metric relabeling rule",
|
||||
},
|
||||
{
|
||||
filename: "empty_alert_relabel_config.bad.yml",
|
||||
errMsg: "empty or null alert relabeling rule",
|
||||
},
|
||||
{
|
||||
filename: "empty_alertmanager_relabel_config.bad.yml",
|
||||
errMsg: "empty or null Alertmanager target relabeling rule",
|
||||
},
|
||||
{
|
||||
filename: "empty_rw_relabel_config.bad.yml",
|
||||
errMsg: "empty or null relabeling rule in remote write config",
|
||||
},
|
||||
{
|
||||
filename: "empty_static_config.bad.yml",
|
||||
errMsg: "empty or null section in static_configs",
|
||||
},
|
||||
}
|
||||
|
||||
func TestBadConfigs(t *testing.T) {
|
||||
|
@ -793,33 +923,6 @@ func TestEmptyGlobalBlock(t *testing.T) {
|
|||
testutil.Equals(t, exp, *c)
|
||||
}
|
||||
|
||||
func TestTargetLabelValidity(t *testing.T) {
|
||||
tests := []struct {
|
||||
str string
|
||||
valid bool
|
||||
}{
|
||||
{"-label", false},
|
||||
{"label", true},
|
||||
{"label${1}", true},
|
||||
{"${1}label", true},
|
||||
{"${1}", true},
|
||||
{"${1}label", true},
|
||||
{"${", false},
|
||||
{"$", false},
|
||||
{"${}", false},
|
||||
{"foo${", false},
|
||||
{"$1", true},
|
||||
{"asd$2asd", true},
|
||||
{"-foo${1}bar-", false},
|
||||
{"_${1}_", true},
|
||||
{"foo${bar}foo", true},
|
||||
}
|
||||
for _, test := range tests {
|
||||
testutil.Assert(t, relabelTarget.Match([]byte(test.str)) == test.valid,
|
||||
"Expected %q to be %v", test.str, test.valid)
|
||||
}
|
||||
}
|
||||
|
||||
func kubernetesSDHostURL() config_util.URL {
|
||||
tURL, _ := url.Parse("https://localhost:1234")
|
||||
return config_util.URL{URL: tURL}
|
||||
|
|
4
config/testdata/azure_authentication_method.bad.yml
vendored
Normal file
4
config/testdata/azure_authentication_method.bad.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- azure_sd_configs:
|
||||
- authentication_method: invalid
|
||||
subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
|
7
config/testdata/azure_client_id_missing.bad.yml
vendored
Normal file
7
config/testdata/azure_client_id_missing.bad.yml
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
scrape_configs:
|
||||
- job_name: azure
|
||||
azure_sd_configs:
|
||||
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
|
||||
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
|
||||
client_id:
|
||||
client_secret: mysecret
|
7
config/testdata/azure_client_secret_missing.bad.yml
vendored
Normal file
7
config/testdata/azure_client_secret_missing.bad.yml
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
scrape_configs:
|
||||
- job_name: azure
|
||||
azure_sd_configs:
|
||||
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
|
||||
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
|
||||
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
|
||||
client_secret:
|
7
config/testdata/azure_subscription_id_missing.bad.yml
vendored
Normal file
7
config/testdata/azure_subscription_id_missing.bad.yml
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
scrape_configs:
|
||||
- job_name: azure
|
||||
azure_sd_configs:
|
||||
- subscription_id:
|
||||
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
|
||||
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
|
||||
client_secret: mysecret
|
7
config/testdata/azure_tenant_id_missing.bad.yml
vendored
Normal file
7
config/testdata/azure_tenant_id_missing.bad.yml
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
scrape_configs:
|
||||
- job_name: azure
|
||||
azure_sd_configs:
|
||||
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
|
||||
tenant_id:
|
||||
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
|
||||
client_secret: mysecret
|
28
config/testdata/conf.good.yml
vendored
28
config/testdata/conf.good.yml
vendored
|
@ -19,6 +19,9 @@ remote_write:
|
|||
regex: expensive.*
|
||||
action: drop
|
||||
- url: http://remote2/push
|
||||
tls_config:
|
||||
cert_file: valid_cert_file
|
||||
key_file: valid_key_file
|
||||
|
||||
remote_read:
|
||||
- url: http://remote1/read
|
||||
|
@ -27,6 +30,9 @@ remote_read:
|
|||
read_recent: false
|
||||
required_matchers:
|
||||
job: special
|
||||
tls_config:
|
||||
cert_file: valid_cert_file
|
||||
key_file: valid_key_file
|
||||
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
|
@ -122,7 +128,7 @@ scrape_configs:
|
|||
- server: 'localhost:1234'
|
||||
token: mysecret
|
||||
services: ['nginx', 'cache', 'mysql']
|
||||
tag: "canary"
|
||||
tags: ["canary", "v1"]
|
||||
node_meta:
|
||||
rack: "123"
|
||||
allow_stale: true
|
||||
|
@ -153,6 +159,9 @@ scrape_configs:
|
|||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
api_server: 'https://localhost:1234'
|
||||
tls_config:
|
||||
cert_file: valid_cert_file
|
||||
key_file: valid_key_file
|
||||
|
||||
basic_auth:
|
||||
username: 'myusername'
|
||||
|
@ -167,6 +176,11 @@ scrape_configs:
|
|||
names:
|
||||
- default
|
||||
|
||||
basic_auth:
|
||||
username: 'myusername'
|
||||
password_file: valid_password_file
|
||||
|
||||
|
||||
- job_name: service-marathon
|
||||
marathon_sd_configs:
|
||||
- servers:
|
||||
|
@ -196,6 +210,7 @@ scrape_configs:
|
|||
- job_name: service-azure
|
||||
azure_sd_configs:
|
||||
- environment: AzurePublicCloud
|
||||
authentication_method: OAuth
|
||||
subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
|
||||
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
|
||||
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
|
||||
|
@ -215,6 +230,13 @@ scrape_configs:
|
|||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
- job_name: badfederation
|
||||
honor_timestamps: false
|
||||
metrics_path: /federate
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9090
|
||||
|
||||
- job_name: 測試
|
||||
metrics_path: /metrics
|
||||
static_configs:
|
||||
|
@ -230,8 +252,8 @@ scrape_configs:
|
|||
refresh_interval: 1m
|
||||
version: 1
|
||||
tls_config:
|
||||
cert_file: testdata/valid_cert_file
|
||||
key_file: testdata/valid_key_file
|
||||
cert_file: valid_cert_file
|
||||
key_file: valid_key_file
|
||||
|
||||
- job_name: service-openstack
|
||||
openstack_sd_configs:
|
||||
|
|
3
config/testdata/empty_alert_relabel_config.bad.yml
vendored
Normal file
3
config/testdata/empty_alert_relabel_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
alerting:
|
||||
alert_relabel_configs:
|
||||
-
|
4
config/testdata/empty_alertmanager_relabel_config.bad.yml
vendored
Normal file
4
config/testdata/empty_alertmanager_relabel_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
alerting:
|
||||
alertmanagers:
|
||||
- relabel_configs:
|
||||
-
|
4
config/testdata/empty_metric_relabel_config.bad.yml
vendored
Normal file
4
config/testdata/empty_metric_relabel_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- job_name: "test"
|
||||
metric_relabel_configs:
|
||||
-
|
2
config/testdata/empty_rr_config.bad.yml
vendored
Normal file
2
config/testdata/empty_rr_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
remote_read:
|
||||
-
|
2
config/testdata/empty_rw_config.bad.yml
vendored
Normal file
2
config/testdata/empty_rw_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
remote_write:
|
||||
-
|
4
config/testdata/empty_rw_relabel_config.bad.yml
vendored
Normal file
4
config/testdata/empty_rw_relabel_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
remote_write:
|
||||
- url: "foo"
|
||||
write_relabel_configs:
|
||||
-
|
2
config/testdata/empty_scrape_config.bad.yml
vendored
Normal file
2
config/testdata/empty_scrape_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
scrape_configs:
|
||||
-
|
4
config/testdata/empty_static_config.bad.yml
vendored
Normal file
4
config/testdata/empty_static_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- job_name: "test"
|
||||
static_configs:
|
||||
-
|
4
config/testdata/empty_target_relabel_config.bad.yml
vendored
Normal file
4
config/testdata/empty_target_relabel_config.bad.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- job_name: "test"
|
||||
relabel_configs:
|
||||
-
|
4
config/testdata/kubernetes_empty_apiserver.good.yml
vendored
Normal file
4
config/testdata/kubernetes_empty_apiserver.good.yml
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
5
config/testdata/kubernetes_http_config_without_api_server.bad.yml
vendored
Normal file
5
config/testdata/kubernetes_http_config_without_api_server.bad.yml
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
bearer_token: 1234
|
3
config/testdata/labelvalue.bad.yml
vendored
Normal file
3
config/testdata/labelvalue.bad.yml
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
global:
|
||||
external_labels:
|
||||
name: !!binary "/w=="
|
|
@ -2,33 +2,37 @@
|
|||
|
||||
{{/* Navbar, should be passed . */}}
|
||||
{{ define "navbar" }}
|
||||
<nav class="navbar navbar-inverse navbar-static-top">
|
||||
<nav class="navbar fixed-top navbar-expand-sm navbar-dark bg-dark">
|
||||
<div class="container-fluid">
|
||||
<!-- Brand and toggle get grouped for better mobile display -->
|
||||
<div class="navbar-header">
|
||||
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
|
||||
<span class="sr-only">Toggle navigation</span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
<span class="icon-bar"></span>
|
||||
<button type="button" class="navbar-toggler" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false" aria-controls="navbar-nav" aria-label="toggle navigation">
|
||||
<span class="navbar-toggler-icon"></span>
|
||||
|
||||
|
||||
|
||||
|
||||
</button>
|
||||
<a class="navbar-brand" href="{{ pathPrefix }}/">Prometheus</a>
|
||||
</div>
|
||||
|
||||
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
|
||||
<ul class="nav navbar-nav">
|
||||
<li><a href="{{ pathPrefix }}/alerts">Alerts</a></li>
|
||||
<li><a href="https://www.pagerduty.com/">PagerDuty</a></li>
|
||||
</div>
|
||||
</ul>
|
||||
<li class="nav-item"><a class="nav-link" href="{{ pathPrefix }}/alerts">Alerts</a></li>
|
||||
<li class="nav-item"><a class="nav-link" href="https://www.pagerduty.com/">PagerDuty</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
{{ end }}
|
||||
|
||||
{{/* LHS menu, should be passed . */}}
|
||||
{{ define "menu" }}
|
||||
<div class="prom_lhs_menu">
|
||||
<ul>
|
||||
<div class="prom_lhs_menu row">
|
||||
<nav class="col-md-2 md-block bg-dark sidebar prom_lhs_menu_nav">
|
||||
<div class="sidebar-sticky">
|
||||
<ul class="nav flex-column">
|
||||
|
||||
{{ template "_menuItem" (args . "index.html.example" "Overview") }}
|
||||
|
||||
{{ if query "up{job='node'}" }}
|
||||
|
@ -36,15 +40,15 @@
|
|||
{{ if match "^node" .Path }}
|
||||
{{ if .Params.instance }}
|
||||
<ul>
|
||||
<li {{ if eq .Path "node-overview.html" }}class="prom_lhs_menu_selected"{{ end }}>
|
||||
<a href="node-overview.html?instance={{ .Params.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</a>
|
||||
<li {{ if eq .Path "node-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
|
||||
<a class="nav-link" href="node-overview.html?instance={{ .Params.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</a>
|
||||
</li>
|
||||
<ul>
|
||||
<li {{ if eq .Path "node-cpu.html" }}class="prom_lhs_menu_selected"{{ end }}>
|
||||
<a href="node-cpu.html?instance={{ .Params.instance }}">CPU</a>
|
||||
<li {{ if eq .Path "node-cpu.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
|
||||
<a class="nav-link" href="node-cpu.html?instance={{ .Params.instance }}">CPU</a>
|
||||
</li>
|
||||
<li {{ if eq .Path "node-disk.html" }}class="prom_lhs_menu_selected"{{ end }}>
|
||||
<a href="node-disk.html?instance={{ .Params.instance }}">Disk</a>
|
||||
<li {{ if eq .Path "node-disk.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
|
||||
<a class="nav-link" href="node-disk.html?instance={{ .Params.instance }}">Disk</a>
|
||||
</li>
|
||||
</ul>
|
||||
</ul>
|
||||
|
@ -57,8 +61,8 @@
|
|||
{{ if match "^prometheus" .Path }}
|
||||
{{ if .Params.instance }}
|
||||
<ul>
|
||||
<li {{ if eq .Path "prometheus-overview.html" }}class="prom_lhs_menu_selected"{{ end }}>
|
||||
<a href="prometheus-overview.html?instance={{ .Params.instance }}">{{.Params.instance }}</a>
|
||||
<li {{ if eq .Path "prometheus-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
|
||||
<a class="nav-link" href="prometheus-overview.html?instance={{ .Params.instance }}">{{.Params.instance }}</a>
|
||||
</li>
|
||||
</ul>
|
||||
{{ end }}
|
||||
|
@ -67,10 +71,12 @@
|
|||
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
{{/* Helper, pass (args . path name) */}}
|
||||
{{ define "_menuItem" }}
|
||||
<li {{ if eq .arg0.Path .arg1 }} class="prom_lhs_menu_selected" {{ end }}><a href="{{ .arg1 }}">{{ .arg2 }}</a></li>
|
||||
<li {{ if eq .arg0.Path .arg1 }} class="prom_lhs_menu_selected nav-item" {{ end }}><a class="nav-link" href="{{ .arg1 }}">{{ .arg2 }}</a></li>
|
||||
{{ end }}
|
||||
|
||||
|
|
|
@ -2,13 +2,15 @@
|
|||
{{/* Load Prometheus console library JS/CSS. Should go in <head> */}}
|
||||
{{ define "prom_console_head" }}
|
||||
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/rickshaw/rickshaw.min.css">
|
||||
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/bootstrap-3.3.1/css/bootstrap.min.css">
|
||||
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/bootstrap-4.3.1/css/bootstrap.min.css">
|
||||
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/css/prom_console.css">
|
||||
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/bootstrap4-glyphicons/css/bootstrap-glyphicons.min.css">
|
||||
<script src="{{ pathPrefix }}/static/vendor/rickshaw/vendor/d3.v3.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/rickshaw/vendor/d3.layout.min.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/rickshaw/rickshaw.min.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/js/jquery.min.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/bootstrap-3.3.1/js/bootstrap.min.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/js/jquery-3.3.1.min.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/js/popper.min.js"></script>
|
||||
<script src="{{ pathPrefix }}/static/vendor/bootstrap-4.3.1/js/bootstrap.min.js"></script>
|
||||
|
||||
<script>
|
||||
var PATH_PREFIX = "{{ pathPrefix }}";
|
||||
|
@ -17,13 +19,15 @@ var PATH_PREFIX = "{{ pathPrefix }}";
|
|||
{{ end }}
|
||||
|
||||
{{/* Top of all pages. */}}
|
||||
{{ define "head" }}
|
||||
<html>
|
||||
{{ define "head" -}}
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
{{ template "prom_console_head" }}
|
||||
</head>
|
||||
<body>
|
||||
{{ template "navbar" . }}
|
||||
|
||||
{{ template "menu" . }}
|
||||
{{ end }}
|
||||
|
||||
|
@ -32,6 +36,7 @@ var PATH_PREFIX = "{{ pathPrefix }}";
|
|||
{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }}
|
||||
{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }}
|
||||
{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }}
|
||||
{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }}
|
||||
{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }}
|
||||
{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }}
|
||||
{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }}
|
||||
|
@ -50,7 +55,7 @@ renderTemplate is the name of the template to use to render the value.
|
|||
|
||||
{{ define "prom_right_table_head" }}
|
||||
<div class="prom_console_rhs">
|
||||
<table class="table table-bordered table-hover table-condensed">
|
||||
<table class="table table-bordered table-hover table-sm">
|
||||
{{ end }}
|
||||
{{ define "prom_right_table_tail" }}
|
||||
</table>
|
||||
|
@ -76,7 +81,7 @@ renderTemplate is the name of the template to use to render the value.
|
|||
|
||||
{{ define "prom_content_head" }}
|
||||
<div class="prom_console_content">
|
||||
<div class="container">
|
||||
<div class="container-fluid">
|
||||
{{ template "prom_graph_timecontrol" . }}
|
||||
{{ end }}
|
||||
{{ define "prom_content_tail" }}
|
||||
|
@ -87,34 +92,32 @@ renderTemplate is the name of the template to use to render the value.
|
|||
{{ define "prom_graph_timecontrol" }}
|
||||
<div class="prom_graph_timecontrol">
|
||||
<div class="prom_graph_timecontrol_inner">
|
||||
<div class="prom_graph_timecontrol_group">
|
||||
<button class="btn btn-default pull-left" type="button" id="prom_graph_duration_shrink" title="Shrink the time range.">
|
||||
<div class="prom_graph_timecontrol_group ">
|
||||
<button class="btn btn-light pull-left" type="button" id="prom_graph_duration_shrink" title="Shrink the time range.">
|
||||
<i class="glyphicon glyphicon-minus"></i>
|
||||
</button>
|
||||
<input class="input pull-left" size="3" title="Time range of graph" type="text" id="prom_graph_duration">
|
||||
<button class="btn btn-default pull-left" type="button" id="prom_graph_duration_grow" title="Grow the time range.">
|
||||
</button><!-- Comments between elements to remove spaces
|
||||
--><input class="input pull-left align-middle" size="3" title="Time range of graph" type="text" id="prom_graph_duration"><!--
|
||||
--><button class="btn btn-light pull-left" type="button" id="prom_graph_duration_grow" title="Grow the time range.">
|
||||
<i class="glyphicon glyphicon-plus"></i>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="prom_graph_timecontrol_group">
|
||||
<button class="btn btn-default pull-left" type="button" id="prom_graph_time_back" title="Rewind the end time.">
|
||||
<div class="prom_graph_timecontrol_group ">
|
||||
<button class="btn btn-light pull-left" type="button" id="prom_graph_time_back" title="Rewind the end time.">
|
||||
<i class="glyphicon glyphicon-backward"></i>
|
||||
</button>
|
||||
<input class="input pull-left" title="End time of graph" placeholder="Until" type="text" id="prom_graph_time_end" size="16" value="">
|
||||
<button class="btn btn-default pull-left" type="button" id="prom_graph_time_forward" title="Advance the end time.">
|
||||
</button><!--
|
||||
--><input class="input pull-left align-middle" title="End time of graph" placeholder="Until" type="text" id="prom_graph_time_end" size="16" value=""><!--
|
||||
--><button class="btn btn-light pull-left" type="button" id="prom_graph_time_forward" title="Advance the end time.">
|
||||
<i class="glyphicon glyphicon-forward"></i>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div class="prom_graph_timecontrol_group">
|
||||
<div class="prom_graph_timecontrol_group ">
|
||||
<div class="btn-group dropup prom_graph_timecontrol_refresh pull-left">
|
||||
<button type="button" class="btn btn-default pull-left" id="prom_graph_refresh_button" title="Refresh.">
|
||||
<button type="button" class="btn btn-light pull-left" id="prom_graph_refresh_button" title="Refresh.">
|
||||
<i class="glyphicon glyphicon-repeat"></i>
|
||||
<span class="icon-repeat"></span>
|
||||
(<span id="prom_graph_refresh_button_value">Off</span>)
|
||||
</button>
|
||||
<button type="button" class="btn btn-default pull-left dropdown-toggle" data-toggle="dropdown" title="Set autorefresh." aria-expanded="false">
|
||||
<button type="button" class="btn btn-light pull-left dropdown-toggle" data-toggle="dropdown" title="Set autorefresh."aria-haspopup="true" aria-expanded="false">
|
||||
<span class="caret"></span>
|
||||
</button>
|
||||
<ul class="dropdown-menu" id="prom_graph_refresh_intervals" role="menu">
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<p>These are example consoles for Prometheus.</p>
|
||||
|
||||
<p>These consoles expect exporters to have the following job labels:</p>
|
||||
<table class="table table-condensed table-striped table-bordered" style="width: 0%">
|
||||
<table class="table table-sm table-striped table-bordered" style="width: 0%">
|
||||
<tr>
|
||||
<th>Exporter</th>
|
||||
<th>Job label</th>
|
||||
|
|
|
@ -60,6 +60,7 @@
|
|||
{{ template "prom_right_table_tail" }}
|
||||
|
||||
{{ template "prom_content_head" . }}
|
||||
<div class="prom_content_div">
|
||||
<h1>Prometheus Overview - {{ .Params.instance }}</h1>
|
||||
|
||||
<h3>Ingested Samples</h3>
|
||||
|
@ -89,7 +90,7 @@
|
|||
yUnits: "/s",
|
||||
})
|
||||
</script>
|
||||
|
||||
</div>
|
||||
{{ template "prom_content_tail" . }}
|
||||
|
||||
{{ template "tail" }}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
{{ template "prom_content_head" . }}
|
||||
<h1>Prometheus</h1>
|
||||
|
||||
<table class="table table-condensed table-striped table-bordered" style="width: 0%">
|
||||
<table class="table table-sm table-striped table-bordered" style="width: 0%">
|
||||
<tr>
|
||||
<th>Prometheus</th>
|
||||
<th>Up</th>
|
||||
|
@ -27,6 +27,7 @@
|
|||
{{ else }}
|
||||
<tr><td colspan=4>No devices found.</td></tr>
|
||||
{{ end }}
|
||||
</table>
|
||||
|
||||
{{ template "prom_content_tail" . }}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
### Service Discovery
|
||||
# Service Discovery
|
||||
|
||||
This directory contains the service discovery (SD) component of Prometheus.
|
||||
|
||||
|
@ -15,7 +15,7 @@ what makes a good SD and covers some of the common implementation issues.
|
|||
|
||||
The first question to be asked is does it make sense to add this particular
|
||||
SD? An SD mechanism should be reasonably well established, and at a minimum in
|
||||
use across multiple organisations. It should allow discovering of machines
|
||||
use across multiple organizations. It should allow discovering of machines
|
||||
and/or services running somewhere. When exactly an SD is popular enough to
|
||||
justify being added to Prometheus natively is an open question.
|
||||
|
||||
|
@ -59,7 +59,7 @@ label with the host:port of the target (preferably an IP address to avoid DNS
|
|||
lookups). No other labelnames should be exposed.
|
||||
|
||||
It is very common for initial pull requests for new SDs to include hardcoded
|
||||
assumptions that make sense for the the author's setup. SD should be generic,
|
||||
assumptions that make sense for the author's setup. SD should be generic,
|
||||
any customisation should be handled via relabelling. There should be basically
|
||||
no business logic, filtering, or transformations of the data from the SD beyond
|
||||
that which is needed to fit it into the metadata data model.
|
||||
|
@ -131,23 +131,23 @@ the Prometheus server will be able to see them.
|
|||
|
||||
### The SD interface
|
||||
|
||||
A Service Discovery (SD) mechanism has to discover targets and provide them to Prometheus. We expect similar targets to be grouped together, in the form of a [`TargetGroup`](https://godoc.org/github.com/prometheus/prometheus/config#TargetGroup). The SD mechanism sends the targets down to prometheus as list of `TargetGroups`.
|
||||
A Service Discovery (SD) mechanism has to discover targets and provide them to Prometheus. We expect similar targets to be grouped together, in the form of a [target group](https://godoc.org/github.com/prometheus/prometheus/discovery/targetgroup#Group). The SD mechanism sends the targets down to prometheus as list of target groups.
|
||||
|
||||
An SD mechanism has to implement the `Discoverer` Interface:
|
||||
```go
|
||||
type Discoverer interface {
|
||||
Run(ctx context.Context, up chan<- []*config.TargetGroup)
|
||||
Run(ctx context.Context, up chan<- []*targetgroup.Group)
|
||||
}
|
||||
```
|
||||
|
||||
Prometheus will call the `Run()` method on a provider to initialise the discovery mechanism. The mechanism will then send *all* the `TargetGroup`s into the channel.
|
||||
Now the mechanism will watch for changes. For each update it can send all `TargetGroup`s, or only changed and new `TargetGroup`s, down the channel. `Manager` will handle
|
||||
Prometheus will call the `Run()` method on a provider to initialize the discovery mechanism. The mechanism will then send *all* the target groups into the channel.
|
||||
Now the mechanism will watch for changes. For each update it can send all target groups, or only changed and new target groups, down the channel. `Manager` will handle
|
||||
both cases.
|
||||
|
||||
For example if we had a discovery mechanism and it retrieves the following groups:
|
||||
|
||||
```
|
||||
[]config.TargetGroup{
|
||||
[]targetgroup.Group{
|
||||
{
|
||||
Targets: []model.LabelSet{
|
||||
{
|
||||
|
@ -187,11 +187,11 @@ For example if we had a discovery mechanism and it retrieves the following group
|
|||
}
|
||||
```
|
||||
|
||||
Here there are two `TargetGroups` one group with source `file1` and another with `file2`. The grouping is implementation specific and could even be one target per group. But, one has to make sure every target group sent by an SD instance should have a `Source` which is unique across all the `TargetGroup`s of that SD instance.
|
||||
Here there are two target groups one group with source `file1` and another with `file2`. The grouping is implementation specific and could even be one target per group. But, one has to make sure every target group sent by an SD instance should have a `Source` which is unique across all the target groups of that SD instance.
|
||||
|
||||
In this case, both the `TargetGroup`s are sent down the channel the first time `Run()` is called. Now, for an update, we need to send the whole _changed_ `TargetGroup` down the channel. i.e, if the target with `hostname: demo-postgres-2` goes away, we send:
|
||||
In this case, both the target groups are sent down the channel the first time `Run()` is called. Now, for an update, we need to send the whole _changed_ target group down the channel. i.e, if the target with `hostname: demo-postgres-2` goes away, we send:
|
||||
```
|
||||
&config.TargetGroup{
|
||||
&targetgroup.Group{
|
||||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__instance__": "10.11.122.11:6001",
|
||||
|
@ -209,7 +209,7 @@ down the channel.
|
|||
|
||||
If all the targets in a group go away, we need to send the target groups with empty `Targets` down the channel. i.e, if all targets with `job: postgres` go away, we send:
|
||||
```
|
||||
&config.TargetGroup{
|
||||
&targetgroup.Group{
|
||||
Targets: nil,
|
||||
"Source": "file2",
|
||||
}
|
||||
|
|
|
@ -19,65 +19,68 @@ import (
|
|||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/arm/network"
|
||||
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-10-01/network"
|
||||
"github.com/Azure/go-autorest/autorest"
|
||||
"github.com/Azure/go-autorest/autorest/adal"
|
||||
"github.com/Azure/go-autorest/autorest/azure"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/pkg/errors"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
const (
|
||||
azureLabel = model.MetaLabelPrefix + "azure_"
|
||||
azureLabelSubscriptionID = azureLabel + "subscription_id"
|
||||
azureLabelTenantID = azureLabel + "tenant_id"
|
||||
azureLabelMachineID = azureLabel + "machine_id"
|
||||
azureLabelMachineResourceGroup = azureLabel + "machine_resource_group"
|
||||
azureLabelMachineName = azureLabel + "machine_name"
|
||||
azureLabelMachineOSType = azureLabel + "machine_os_type"
|
||||
azureLabelMachineLocation = azureLabel + "machine_location"
|
||||
azureLabelMachinePrivateIP = azureLabel + "machine_private_ip"
|
||||
azureLabelMachinePublicIP = azureLabel + "machine_public_ip"
|
||||
azureLabelMachineTag = azureLabel + "machine_tag_"
|
||||
azureLabelMachineScaleSet = azureLabel + "machine_scale_set"
|
||||
|
||||
authMethodOAuth = "OAuth"
|
||||
authMethodManagedIdentity = "ManagedIdentity"
|
||||
)
|
||||
|
||||
var (
|
||||
azureSDRefreshFailuresCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_azure_refresh_failures_total",
|
||||
Help: "Number of Azure-SD refresh failures.",
|
||||
})
|
||||
azureSDRefreshDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_azure_refresh_duration_seconds",
|
||||
Help: "The duration of a Azure-SD refresh in seconds.",
|
||||
})
|
||||
|
||||
// DefaultSDConfig is the default Azure SD configuration.
|
||||
DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(5 * time.Minute),
|
||||
Environment: azure.PublicCloud.Name,
|
||||
}
|
||||
)
|
||||
// DefaultSDConfig is the default Azure SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(5 * time.Minute),
|
||||
Environment: azure.PublicCloud.Name,
|
||||
AuthenticationMethod: authMethodOAuth,
|
||||
}
|
||||
|
||||
// SDConfig is the configuration for Azure based service discovery.
|
||||
type SDConfig struct {
|
||||
Environment string `yaml:"environment,omitempty"`
|
||||
Port int `yaml:"port"`
|
||||
SubscriptionID string `yaml:"subscription_id"`
|
||||
TenantID string `yaml:"tenant_id,omitempty"`
|
||||
ClientID string `yaml:"client_id,omitempty"`
|
||||
ClientSecret config_util.Secret `yaml:"client_secret,omitempty"`
|
||||
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
|
||||
Environment string `yaml:"environment,omitempty"`
|
||||
Port int `yaml:"port"`
|
||||
SubscriptionID string `yaml:"subscription_id"`
|
||||
TenantID string `yaml:"tenant_id,omitempty"`
|
||||
ClientID string `yaml:"client_id,omitempty"`
|
||||
ClientSecret config_util.Secret `yaml:"client_secret,omitempty"`
|
||||
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
|
||||
AuthenticationMethod string `yaml:"authentication_method,omitempty"`
|
||||
}
|
||||
|
||||
func validateAuthParam(param, name string) error {
|
||||
if len(param) == 0 {
|
||||
return errors.Errorf("azure SD configuration requires a %s", name)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
|
@ -88,24 +91,35 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c.SubscriptionID == "" {
|
||||
return fmt.Errorf("Azure SD configuration requires a subscription_id")
|
||||
|
||||
if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if c.AuthenticationMethod == authMethodOAuth {
|
||||
if err = validateAuthParam(c.TenantID, "tenant_id"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = validateAuthParam(c.ClientID, "client_id"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err = validateAuthParam(string(c.ClientSecret), "client_secret"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if c.AuthenticationMethod != authMethodOAuth && c.AuthenticationMethod != authMethodManagedIdentity {
|
||||
return errors.Errorf("unknown authentication_type %q. Supported types are %q or %q", c.AuthenticationMethod, authMethodOAuth, authMethodManagedIdentity)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(azureSDRefreshDuration)
|
||||
prometheus.MustRegister(azureSDRefreshFailuresCount)
|
||||
}
|
||||
|
||||
// Discovery periodically performs Azure-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery struct {
|
||||
cfg *SDConfig
|
||||
interval time.Duration
|
||||
port int
|
||||
logger log.Logger
|
||||
*refresh.Discovery
|
||||
logger log.Logger
|
||||
cfg *SDConfig
|
||||
port int
|
||||
}
|
||||
|
||||
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
|
||||
|
@ -113,42 +127,18 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
|
|||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
return &Discovery{
|
||||
cfg: cfg,
|
||||
interval: time.Duration(cfg.RefreshInterval),
|
||||
port: cfg.Port,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
ticker := time.NewTicker(d.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Unable to refresh during Azure discovery", "err", err)
|
||||
} else {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
d := &Discovery{
|
||||
cfg: cfg,
|
||||
port: cfg.Port,
|
||||
logger: logger,
|
||||
}
|
||||
d.Discovery = refresh.NewDiscovery(
|
||||
logger,
|
||||
"azure",
|
||||
time.Duration(cfg.RefreshInterval),
|
||||
d.refresh,
|
||||
)
|
||||
return d
|
||||
}
|
||||
|
||||
// azureClient represents multiple Azure Resource Manager providers.
|
||||
|
@ -170,13 +160,30 @@ func createAzureClient(cfg SDConfig) (azureClient, error) {
|
|||
resourceManagerEndpoint := env.ResourceManagerEndpoint
|
||||
|
||||
var c azureClient
|
||||
oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, cfg.TenantID)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
spt, err := adal.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), resourceManagerEndpoint)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
|
||||
var spt *adal.ServicePrincipalToken
|
||||
|
||||
switch cfg.AuthenticationMethod {
|
||||
case authMethodManagedIdentity:
|
||||
msiEndpoint, err := adal.GetMSIVMEndpoint()
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
|
||||
spt, err = adal.NewServicePrincipalTokenFromMSI(msiEndpoint, resourceManagerEndpoint)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
case authMethodOAuth:
|
||||
oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, cfg.TenantID)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
|
||||
spt, err = adal.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), resourceManagerEndpoint)
|
||||
if err != nil {
|
||||
return azureClient{}, err
|
||||
}
|
||||
}
|
||||
|
||||
bearerAuthorizer := autorest.NewBearerAuthorizer(spt)
|
||||
|
@ -204,25 +211,25 @@ type azureResource struct {
|
|||
|
||||
// virtualMachine represents an Azure virtual machine (which can also be created by a VMSS)
|
||||
type virtualMachine struct {
|
||||
ID string
|
||||
Name string
|
||||
Type string
|
||||
Location string
|
||||
OsType string
|
||||
ScaleSet string
|
||||
Tags map[string]*string
|
||||
NetworkProfile compute.NetworkProfile
|
||||
ID string
|
||||
Name string
|
||||
Type string
|
||||
Location string
|
||||
OsType string
|
||||
ScaleSet string
|
||||
Tags map[string]*string
|
||||
NetworkInterfaces []string
|
||||
}
|
||||
|
||||
// Create a new azureResource object from an ID string.
|
||||
func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error) {
|
||||
// Resource IDs have the following format.
|
||||
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME
|
||||
// or if embeded resource then
|
||||
// or if embedded resource then
|
||||
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME/TYPE/NAME
|
||||
s := strings.Split(id, "/")
|
||||
if len(s) != 9 && len(s) != 11 {
|
||||
err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id)
|
||||
err := errors.Errorf("invalid ID '%s'. Refusing to create azureResource", id)
|
||||
level.Error(logger).Log("err", err)
|
||||
return azureResource{}, err
|
||||
}
|
||||
|
@ -233,39 +240,31 @@ func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error)
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
defer level.Debug(d.logger).Log("msg", "Azure discovery completed")
|
||||
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
azureSDRefreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
azureSDRefreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
tg = &targetgroup.Group{}
|
||||
client, err := createAzureClient(*d.cfg)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not create Azure client: %s", err)
|
||||
return nil, errors.Wrap(err, "could not create Azure client")
|
||||
}
|
||||
|
||||
machines, err := client.getVMs()
|
||||
machines, err := client.getVMs(ctx)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not get virtual machines: %s", err)
|
||||
return nil, errors.Wrap(err, "could not get virtual machines")
|
||||
}
|
||||
|
||||
level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines))
|
||||
|
||||
// Load the vms managed by scale sets.
|
||||
scaleSets, err := client.getScaleSets()
|
||||
scaleSets, err := client.getScaleSets(ctx)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not get virtual machine scale sets: %s", err)
|
||||
return nil, errors.Wrap(err, "could not get virtual machine scale sets")
|
||||
}
|
||||
|
||||
for _, scaleSet := range scaleSets {
|
||||
scaleSetVms, err := client.getScaleSetVMs(scaleSet)
|
||||
scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("could not get virtual machine scale set vms: %s", err)
|
||||
return nil, errors.Wrap(err, "could not get virtual machine scale set vms")
|
||||
}
|
||||
machines = append(machines, scaleSetVms...)
|
||||
}
|
||||
|
@ -277,9 +276,12 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
err error
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(machines))
|
||||
ch := make(chan target, len(machines))
|
||||
for i, vm := range machines {
|
||||
go func(i int, vm virtualMachine) {
|
||||
defer wg.Done()
|
||||
r, err := newAzureResourceFromID(vm.ID, d.logger)
|
||||
if err != nil {
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
|
@ -287,6 +289,8 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
}
|
||||
|
||||
labels := model.LabelSet{
|
||||
azureLabelSubscriptionID: model.LabelValue(d.cfg.SubscriptionID),
|
||||
azureLabelTenantID: model.LabelValue(d.cfg.TenantID),
|
||||
azureLabelMachineID: model.LabelValue(vm.ID),
|
||||
azureLabelMachineName: model.LabelValue(vm.Name),
|
||||
azureLabelMachineOSType: model.LabelValue(vm.OsType),
|
||||
|
@ -306,37 +310,44 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
}
|
||||
|
||||
// Get the IP address information via separate call to the network provider.
|
||||
for _, nic := range *vm.NetworkProfile.NetworkInterfaces {
|
||||
networkInterface, err := client.getNetworkInterfaceByID(*nic.ID)
|
||||
for _, nicID := range vm.NetworkInterfaces {
|
||||
networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID)
|
||||
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Unable to get network interface", "name", *nic.ID, "err", err)
|
||||
level.Error(d.logger).Log("msg", "Unable to get network interface", "name", nicID, "err", err)
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
// Get out of this routine because we cannot continue without a network interface.
|
||||
return
|
||||
}
|
||||
|
||||
if networkInterface.InterfacePropertiesFormat == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Unfortunately Azure does not return information on whether a VM is deallocated.
|
||||
// This information is available via another API call however the Go SDK does not
|
||||
// yet support this. On deallocated machines, this value happens to be nil so it
|
||||
// is a cheap and easy way to determine if a machine is allocated or not.
|
||||
if networkInterface.Properties.Primary == nil {
|
||||
if networkInterface.Primary == nil {
|
||||
level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name)
|
||||
ch <- target{}
|
||||
return
|
||||
}
|
||||
|
||||
if *networkInterface.Properties.Primary {
|
||||
for _, ip := range *networkInterface.Properties.IPConfigurations {
|
||||
if ip.Properties.PrivateIPAddress != nil {
|
||||
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress)
|
||||
address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port))
|
||||
if *networkInterface.Primary {
|
||||
for _, ip := range *networkInterface.IPConfigurations {
|
||||
if ip.PublicIPAddress != nil && ip.PublicIPAddress.PublicIPAddressPropertiesFormat != nil {
|
||||
labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.PublicIPAddress.IPAddress)
|
||||
}
|
||||
if ip.PrivateIPAddress != nil {
|
||||
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.PrivateIPAddress)
|
||||
address := net.JoinHostPort(*ip.PrivateIPAddress, fmt.Sprintf("%d", d.port))
|
||||
labels[model.AddressLabel] = model.LabelValue(address)
|
||||
ch <- target{labelSet: labels, err: nil}
|
||||
return
|
||||
}
|
||||
// If we made it here, we don't have a private IP which should be impossible.
|
||||
// Return an empty target and error to ensure an all or nothing situation.
|
||||
err = fmt.Errorf("unable to find a private IP for VM %s", vm.Name)
|
||||
err = errors.Errorf("unable to find a private IP for VM %s", vm.Name)
|
||||
ch <- target{labelSet: nil, err: err}
|
||||
return
|
||||
}
|
||||
|
@ -345,140 +356,142 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
}(i, vm)
|
||||
}
|
||||
|
||||
for range machines {
|
||||
tgt := <-ch
|
||||
wg.Wait()
|
||||
close(ch)
|
||||
|
||||
var tg targetgroup.Group
|
||||
for tgt := range ch {
|
||||
if tgt.err != nil {
|
||||
return nil, fmt.Errorf("unable to complete Azure service discovery: %s", err)
|
||||
return nil, errors.Wrap(err, "unable to complete Azure service discovery")
|
||||
}
|
||||
if tgt.labelSet != nil {
|
||||
tg.Targets = append(tg.Targets, tgt.labelSet)
|
||||
}
|
||||
}
|
||||
|
||||
return tg, nil
|
||||
return []*targetgroup.Group{&tg}, nil
|
||||
}
|
||||
|
||||
func (client *azureClient) getVMs() ([]virtualMachine, error) {
|
||||
func (client *azureClient) getVMs(ctx context.Context) ([]virtualMachine, error) {
|
||||
var vms []virtualMachine
|
||||
result, err := client.vm.ListAll()
|
||||
result, err := client.vm.ListAll(ctx)
|
||||
if err != nil {
|
||||
return vms, fmt.Errorf("could not list virtual machines: %s", err)
|
||||
return nil, errors.Wrap(err, "could not list virtual machines")
|
||||
}
|
||||
|
||||
for _, vm := range *result.Value {
|
||||
vms = append(vms, mapFromVM(vm))
|
||||
}
|
||||
|
||||
// If we still have results, keep going until we have no more.
|
||||
for result.NextLink != nil {
|
||||
result, err = client.vm.ListAllNextResults(result)
|
||||
if err != nil {
|
||||
return vms, fmt.Errorf("could not list virtual machines: %s", err)
|
||||
}
|
||||
|
||||
for _, vm := range *result.Value {
|
||||
for result.NotDone() {
|
||||
for _, vm := range result.Values() {
|
||||
vms = append(vms, mapFromVM(vm))
|
||||
}
|
||||
err = result.NextWithContext(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not list virtual machines")
|
||||
}
|
||||
}
|
||||
|
||||
return vms, nil
|
||||
}
|
||||
|
||||
func (client *azureClient) getScaleSets() ([]compute.VirtualMachineScaleSet, error) {
|
||||
func (client *azureClient) getScaleSets(ctx context.Context) ([]compute.VirtualMachineScaleSet, error) {
|
||||
var scaleSets []compute.VirtualMachineScaleSet
|
||||
result, err := client.vmss.ListAll()
|
||||
result, err := client.vmss.ListAll(ctx)
|
||||
if err != nil {
|
||||
return scaleSets, fmt.Errorf("could not list virtual machine scale sets: %s", err)
|
||||
return nil, errors.Wrap(err, "could not list virtual machine scale sets")
|
||||
}
|
||||
scaleSets = append(scaleSets, *result.Value...)
|
||||
|
||||
for result.NextLink != nil {
|
||||
result, err = client.vmss.ListAllNextResults(result)
|
||||
for result.NotDone() {
|
||||
scaleSets = append(scaleSets, result.Values()...)
|
||||
err = result.NextWithContext(ctx)
|
||||
if err != nil {
|
||||
return scaleSets, fmt.Errorf("could not list virtual machine scale sets: %s", err)
|
||||
return nil, errors.Wrap(err, "could not list virtual machine scale sets")
|
||||
}
|
||||
scaleSets = append(scaleSets, *result.Value...)
|
||||
}
|
||||
|
||||
return scaleSets, nil
|
||||
}
|
||||
|
||||
func (client *azureClient) getScaleSetVMs(scaleSet compute.VirtualMachineScaleSet) ([]virtualMachine, error) {
|
||||
func (client *azureClient) getScaleSetVMs(ctx context.Context, scaleSet compute.VirtualMachineScaleSet) ([]virtualMachine, error) {
|
||||
var vms []virtualMachine
|
||||
//TODO do we really need to fetch the resourcegroup this way?
|
||||
r, err := newAzureResourceFromID(*scaleSet.ID, nil)
|
||||
|
||||
if err != nil {
|
||||
return vms, fmt.Errorf("could not parse scale set ID: %s", err)
|
||||
return nil, errors.Wrap(err, "could not parse scale set ID")
|
||||
}
|
||||
|
||||
result, err := client.vmssvm.List(r.ResourceGroup, *(scaleSet.Name), "", "", "")
|
||||
result, err := client.vmssvm.List(ctx, r.ResourceGroup, *(scaleSet.Name), "", "", "")
|
||||
if err != nil {
|
||||
return vms, fmt.Errorf("could not list virtual machine scale set vms: %s", err)
|
||||
return nil, errors.Wrap(err, "could not list virtual machine scale set vms")
|
||||
}
|
||||
|
||||
for _, vm := range *result.Value {
|
||||
vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name))
|
||||
}
|
||||
|
||||
for result.NextLink != nil {
|
||||
result, err = client.vmssvm.ListNextResults(result)
|
||||
if err != nil {
|
||||
return vms, fmt.Errorf("could not list virtual machine scale set vms: %s", err)
|
||||
}
|
||||
|
||||
for _, vm := range *result.Value {
|
||||
for result.NotDone() {
|
||||
for _, vm := range result.Values() {
|
||||
vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name))
|
||||
}
|
||||
err = result.NextWithContext(ctx)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "could not list virtual machine scale set vms")
|
||||
}
|
||||
}
|
||||
|
||||
return vms, nil
|
||||
}
|
||||
|
||||
func mapFromVM(vm compute.VirtualMachine) virtualMachine {
|
||||
osType := string(vm.Properties.StorageProfile.OsDisk.OsType)
|
||||
osType := string(vm.StorageProfile.OsDisk.OsType)
|
||||
tags := map[string]*string{}
|
||||
networkInterfaces := []string{}
|
||||
|
||||
if vm.Tags != nil {
|
||||
tags = *(vm.Tags)
|
||||
tags = vm.Tags
|
||||
}
|
||||
|
||||
if vm.NetworkProfile != nil {
|
||||
for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) {
|
||||
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
|
||||
}
|
||||
}
|
||||
|
||||
return virtualMachine{
|
||||
ID: *(vm.ID),
|
||||
Name: *(vm.Name),
|
||||
Type: *(vm.Type),
|
||||
Location: *(vm.Location),
|
||||
OsType: osType,
|
||||
ScaleSet: "",
|
||||
Tags: tags,
|
||||
NetworkProfile: *(vm.Properties.NetworkProfile),
|
||||
ID: *(vm.ID),
|
||||
Name: *(vm.Name),
|
||||
Type: *(vm.Type),
|
||||
Location: *(vm.Location),
|
||||
OsType: osType,
|
||||
ScaleSet: "",
|
||||
Tags: tags,
|
||||
NetworkInterfaces: networkInterfaces,
|
||||
}
|
||||
}
|
||||
|
||||
func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName string) virtualMachine {
|
||||
osType := string(vm.Properties.StorageProfile.OsDisk.OsType)
|
||||
osType := string(vm.StorageProfile.OsDisk.OsType)
|
||||
tags := map[string]*string{}
|
||||
networkInterfaces := []string{}
|
||||
|
||||
if vm.Tags != nil {
|
||||
tags = *(vm.Tags)
|
||||
tags = vm.Tags
|
||||
}
|
||||
|
||||
if vm.NetworkProfile != nil {
|
||||
for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) {
|
||||
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
|
||||
}
|
||||
}
|
||||
|
||||
return virtualMachine{
|
||||
ID: *(vm.ID),
|
||||
Name: *(vm.Name),
|
||||
Type: *(vm.Type),
|
||||
Location: *(vm.Location),
|
||||
OsType: osType,
|
||||
ScaleSet: scaleSetName,
|
||||
Tags: tags,
|
||||
NetworkProfile: *(vm.Properties.NetworkProfile),
|
||||
ID: *(vm.ID),
|
||||
Name: *(vm.Name),
|
||||
Type: *(vm.Type),
|
||||
Location: *(vm.Location),
|
||||
OsType: osType,
|
||||
ScaleSet: scaleSetName,
|
||||
Tags: tags,
|
||||
NetworkInterfaces: networkInterfaces,
|
||||
}
|
||||
}
|
||||
|
||||
func (client *azureClient) getNetworkInterfaceByID(networkInterfaceID string) (network.Interface, error) {
|
||||
func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*network.Interface, error) {
|
||||
result := network.Interface{}
|
||||
queryParameters := map[string]interface{}{
|
||||
"api-version": client.nic.APIVersion,
|
||||
"api-version": "2018-10-01",
|
||||
}
|
||||
|
||||
preparer := autorest.CreatePreparer(
|
||||
|
@ -486,21 +499,20 @@ func (client *azureClient) getNetworkInterfaceByID(networkInterfaceID string) (n
|
|||
autorest.WithBaseURL(client.nic.BaseURI),
|
||||
autorest.WithPath(networkInterfaceID),
|
||||
autorest.WithQueryParameters(queryParameters))
|
||||
req, err := preparer.Prepare(&http.Request{})
|
||||
req, err := preparer.Prepare((&http.Request{}).WithContext(ctx))
|
||||
if err != nil {
|
||||
return result, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request")
|
||||
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request")
|
||||
}
|
||||
|
||||
resp, err := client.nic.GetSender(req)
|
||||
if err != nil {
|
||||
result.Response = autorest.Response{Response: resp}
|
||||
return result, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure sending request")
|
||||
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure sending request")
|
||||
}
|
||||
|
||||
result, err = client.nic.GetResponder(resp)
|
||||
if err != nil {
|
||||
err = autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure responding to request")
|
||||
return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure responding to request")
|
||||
}
|
||||
|
||||
return result, nil
|
||||
return &result, nil
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ import (
|
|||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/arm/compute"
|
||||
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
|
||||
)
|
||||
|
||||
func TestMapFromVMWithEmptyTags(t *testing.T) {
|
||||
|
@ -25,7 +25,9 @@ func TestMapFromVMWithEmptyTags(t *testing.T) {
|
|||
name := "name"
|
||||
vmType := "type"
|
||||
location := "westeurope"
|
||||
networkProfile := compute.NetworkProfile{}
|
||||
networkProfile := compute.NetworkProfile{
|
||||
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
|
||||
}
|
||||
properties := &compute.VirtualMachineProperties{
|
||||
StorageProfile: &compute.StorageProfile{
|
||||
OsDisk: &compute.OSDisk{
|
||||
|
@ -36,22 +38,22 @@ func TestMapFromVMWithEmptyTags(t *testing.T) {
|
|||
}
|
||||
|
||||
testVM := compute.VirtualMachine{
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: nil,
|
||||
Properties: properties,
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: nil,
|
||||
VirtualMachineProperties: properties,
|
||||
}
|
||||
|
||||
expectedVM := virtualMachine{
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: map[string]*string{},
|
||||
NetworkProfile: networkProfile,
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: map[string]*string{},
|
||||
NetworkInterfaces: []string{},
|
||||
}
|
||||
|
||||
actualVM := mapFromVM(testVM)
|
||||
|
@ -69,7 +71,9 @@ func TestMapFromVMWithTags(t *testing.T) {
|
|||
tags := map[string]*string{
|
||||
"prometheus": new(string),
|
||||
}
|
||||
networkProfile := compute.NetworkProfile{}
|
||||
networkProfile := compute.NetworkProfile{
|
||||
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
|
||||
}
|
||||
properties := &compute.VirtualMachineProperties{
|
||||
StorageProfile: &compute.StorageProfile{
|
||||
OsDisk: &compute.OSDisk{
|
||||
|
@ -80,22 +84,22 @@ func TestMapFromVMWithTags(t *testing.T) {
|
|||
}
|
||||
|
||||
testVM := compute.VirtualMachine{
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: &tags,
|
||||
Properties: properties,
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: tags,
|
||||
VirtualMachineProperties: properties,
|
||||
}
|
||||
|
||||
expectedVM := virtualMachine{
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: tags,
|
||||
NetworkProfile: networkProfile,
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: tags,
|
||||
NetworkInterfaces: []string{},
|
||||
}
|
||||
|
||||
actualVM := mapFromVM(testVM)
|
||||
|
@ -110,7 +114,9 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
|
|||
name := "name"
|
||||
vmType := "type"
|
||||
location := "westeurope"
|
||||
networkProfile := compute.NetworkProfile{}
|
||||
networkProfile := compute.NetworkProfile{
|
||||
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
|
||||
}
|
||||
properties := &compute.VirtualMachineScaleSetVMProperties{
|
||||
StorageProfile: &compute.StorageProfile{
|
||||
OsDisk: &compute.OSDisk{
|
||||
|
@ -121,24 +127,24 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
|
|||
}
|
||||
|
||||
testVM := compute.VirtualMachineScaleSetVM{
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: nil,
|
||||
Properties: properties,
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: nil,
|
||||
VirtualMachineScaleSetVMProperties: properties,
|
||||
}
|
||||
|
||||
scaleSet := "testSet"
|
||||
expectedVM := virtualMachine{
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: map[string]*string{},
|
||||
NetworkProfile: networkProfile,
|
||||
ScaleSet: scaleSet,
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: map[string]*string{},
|
||||
NetworkInterfaces: []string{},
|
||||
ScaleSet: scaleSet,
|
||||
}
|
||||
|
||||
actualVM := mapFromVMScaleSetVM(testVM, scaleSet)
|
||||
|
@ -156,7 +162,9 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
|
|||
tags := map[string]*string{
|
||||
"prometheus": new(string),
|
||||
}
|
||||
networkProfile := compute.NetworkProfile{}
|
||||
networkProfile := compute.NetworkProfile{
|
||||
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
|
||||
}
|
||||
properties := &compute.VirtualMachineScaleSetVMProperties{
|
||||
StorageProfile: &compute.StorageProfile{
|
||||
OsDisk: &compute.OSDisk{
|
||||
|
@ -167,24 +175,24 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
|
|||
}
|
||||
|
||||
testVM := compute.VirtualMachineScaleSetVM{
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: &tags,
|
||||
Properties: properties,
|
||||
ID: &id,
|
||||
Name: &name,
|
||||
Type: &vmType,
|
||||
Location: &location,
|
||||
Tags: tags,
|
||||
VirtualMachineScaleSetVMProperties: properties,
|
||||
}
|
||||
|
||||
scaleSet := "testSet"
|
||||
expectedVM := virtualMachine{
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: tags,
|
||||
NetworkProfile: networkProfile,
|
||||
ScaleSet: scaleSet,
|
||||
ID: id,
|
||||
Name: name,
|
||||
Type: vmType,
|
||||
Location: location,
|
||||
OsType: "Linux",
|
||||
Tags: tags,
|
||||
NetworkInterfaces: []string{},
|
||||
ScaleSet: scaleSet,
|
||||
}
|
||||
|
||||
actualVM := mapFromVMScaleSetVM(testVM, scaleSet)
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
package config
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/azure"
|
||||
"github.com/prometheus/prometheus/discovery/consul"
|
||||
"github.com/prometheus/prometheus/discovery/dns"
|
||||
|
@ -58,8 +60,67 @@ type ServiceDiscoveryConfig struct {
|
|||
TritonSDConfigs []*triton.SDConfig `yaml:"triton_sd_configs,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (c *ServiceDiscoveryConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
type plain ServiceDiscoveryConfig
|
||||
return unmarshal((*plain)(c))
|
||||
// Validate validates the ServiceDiscoveryConfig.
|
||||
func (c *ServiceDiscoveryConfig) Validate() error {
|
||||
for _, cfg := range c.AzureSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in azure_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.ConsulSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in consul_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.DNSSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in dns_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.EC2SDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in ec2_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.FileSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in file_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.GCESDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in gce_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.KubernetesSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in kubernetes_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.MarathonSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in marathon_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.NerveSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in nerve_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.OpenstackSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in openstack_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.ServersetSDConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in serverset_sd_configs")
|
||||
}
|
||||
}
|
||||
for _, cfg := range c.StaticConfigs {
|
||||
if cfg == nil {
|
||||
return errors.New("empty or null section in static_configs")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -25,10 +25,12 @@ import (
|
|||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
consul "github.com/hashicorp/consul/api"
|
||||
"github.com/mwitkow/go-conntrack"
|
||||
conntrack "github.com/mwitkow/go-conntrack"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
@ -55,6 +57,8 @@ const (
|
|||
servicePortLabel = model.MetaLabelPrefix + "consul_service_port"
|
||||
// datacenterLabel is the name of the label containing the datacenter ID.
|
||||
datacenterLabel = model.MetaLabelPrefix + "consul_dc"
|
||||
// taggedAddressesLabel is the prefix for the labels mapping to a target's tagged addresses.
|
||||
taggedAddressesLabel = model.MetaLabelPrefix + "consul_tagged_address_"
|
||||
// serviceIDLabel is the name of the label containing the service ID.
|
||||
serviceIDLabel = model.MetaLabelPrefix + "consul_service_id"
|
||||
|
||||
|
@ -71,9 +75,10 @@ var (
|
|||
})
|
||||
rpcDuration = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Name: "sd_consul_rpc_duration_seconds",
|
||||
Help: "The duration of a Consul RPC call in seconds.",
|
||||
Namespace: namespace,
|
||||
Name: "sd_consul_rpc_duration_seconds",
|
||||
Help: "The duration of a Consul RPC call in seconds.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"endpoint", "call"},
|
||||
)
|
||||
|
@ -111,9 +116,8 @@ type SDConfig struct {
|
|||
// The list of services for which targets are discovered.
|
||||
// Defaults to all services if empty.
|
||||
Services []string `yaml:"services,omitempty"`
|
||||
// An optional tag used to filter instances inside a service. A single tag is supported
|
||||
// here to match the Consul API.
|
||||
ServiceTag string `yaml:"tag,omitempty"`
|
||||
// A list of tags used to filter instances inside a service. Services must contain all tags in the list.
|
||||
ServiceTags []string `yaml:"tags,omitempty"`
|
||||
// Desired node metadata.
|
||||
NodeMeta map[string]string `yaml:"node_meta,omitempty"`
|
||||
|
||||
|
@ -129,7 +133,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if strings.TrimSpace(c.Server) == "" {
|
||||
return fmt.Errorf("Consul SD configuration requires a server address")
|
||||
return errors.New("consul SD configuration requires a server address")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -150,7 +154,7 @@ type Discovery struct {
|
|||
clientDatacenter string
|
||||
tagSeparator string
|
||||
watchedServices []string // Set of services which will be discovered.
|
||||
watchedTag string // A tag used to filter instances of a service.
|
||||
watchedTags []string // Tags used to filter instances of a service.
|
||||
watchedNodeMeta map[string]string
|
||||
allowStale bool
|
||||
refreshInterval time.Duration
|
||||
|
@ -200,7 +204,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
|
|||
client: client,
|
||||
tagSeparator: conf.TagSeparator,
|
||||
watchedServices: conf.Services,
|
||||
watchedTag: conf.ServiceTag,
|
||||
watchedTags: conf.ServiceTags,
|
||||
watchedNodeMeta: conf.NodeMeta,
|
||||
allowStale: conf.AllowStale,
|
||||
refreshInterval: time.Duration(conf.RefreshInterval),
|
||||
|
@ -236,16 +240,20 @@ func (d *Discovery) shouldWatchFromName(name string) bool {
|
|||
// *all* services. Details in https://github.com/prometheus/prometheus/pull/3814
|
||||
func (d *Discovery) shouldWatchFromTags(tags []string) bool {
|
||||
// If there's no fixed set of watched tags, we watch everything.
|
||||
if d.watchedTag == "" {
|
||||
if len(d.watchedTags) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, tag := range tags {
|
||||
if d.watchedTag == tag {
|
||||
return true
|
||||
tagOuter:
|
||||
for _, wtag := range d.watchedTags {
|
||||
for _, tag := range tags {
|
||||
if wtag == tag {
|
||||
continue tagOuter
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
return false
|
||||
return true
|
||||
}
|
||||
|
||||
// Get the local datacenter if not specified.
|
||||
|
@ -265,7 +273,7 @@ func (d *Discovery) getDatacenter() error {
|
|||
|
||||
dc, ok := info["Config"]["Datacenter"].(string)
|
||||
if !ok {
|
||||
err := fmt.Errorf("Invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"])
|
||||
err := errors.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"])
|
||||
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
|
||||
return err
|
||||
}
|
||||
|
@ -304,7 +312,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
|||
}
|
||||
d.initialize(ctx)
|
||||
|
||||
if len(d.watchedServices) == 0 || d.watchedTag != "" {
|
||||
if len(d.watchedServices) == 0 || len(d.watchedTags) != 0 {
|
||||
// We need to watch the catalog.
|
||||
ticker := time.NewTicker(d.refreshInterval)
|
||||
|
||||
|
@ -322,7 +330,6 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
|||
<-ticker.C
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// We only have fully defined services.
|
||||
for _, name := range d.watchedServices {
|
||||
|
@ -335,17 +342,18 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
|||
// Watch the catalog for new services we would like to watch. This is called only
|
||||
// when we don't know yet the names of the services and need to ask Consul the
|
||||
// entire list of services.
|
||||
func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) error {
|
||||
func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) {
|
||||
catalog := d.client.Catalog()
|
||||
level.Debug(d.logger).Log("msg", "Watching services", "tag", d.watchedTag)
|
||||
level.Debug(d.logger).Log("msg", "Watching services", "tags", d.watchedTags)
|
||||
|
||||
t0 := time.Now()
|
||||
srvs, meta, err := catalog.Services(&consul.QueryOptions{
|
||||
opts := &consul.QueryOptions{
|
||||
WaitIndex: *lastIndex,
|
||||
WaitTime: watchTimeout,
|
||||
AllowStale: d.allowStale,
|
||||
NodeMeta: d.watchedNodeMeta,
|
||||
})
|
||||
}
|
||||
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
|
||||
elapsed := time.Since(t0)
|
||||
rpcDuration.WithLabelValues("catalog", "services").Observe(elapsed.Seconds())
|
||||
|
||||
|
@ -353,11 +361,11 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
|
|||
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
|
||||
rpcFailuresCount.Inc()
|
||||
time.Sleep(retryInterval)
|
||||
return err
|
||||
return
|
||||
}
|
||||
// If the index equals the previous one, the watch timed out with no update.
|
||||
if meta.LastIndex == *lastIndex {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
*lastIndex = meta.LastIndex
|
||||
|
||||
|
@ -389,18 +397,17 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
|
|||
// Send clearing target group.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
return
|
||||
case ch <- []*targetgroup.Group{{Source: name}}:
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// consulService contains data belonging to the same service.
|
||||
type consulService struct {
|
||||
name string
|
||||
tag string
|
||||
tags []string
|
||||
labels model.LabelSet
|
||||
discovery *Discovery
|
||||
client *consul.Client
|
||||
|
@ -414,7 +421,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
|
|||
discovery: d,
|
||||
client: d.client,
|
||||
name: name,
|
||||
tag: d.watchedTag,
|
||||
tags: d.watchedTags,
|
||||
labels: model.LabelSet{
|
||||
serviceLabel: model.LabelValue(name),
|
||||
datacenterLabel: model.LabelValue(d.clientDatacenter),
|
||||
|
@ -434,43 +441,47 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
|
|||
return
|
||||
default:
|
||||
srv.watch(ctx, ch, catalog, &lastIndex)
|
||||
<-ticker.C
|
||||
select {
|
||||
case <-ticker.C:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Get updates for a service.
|
||||
func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, catalog *consul.Catalog, lastIndex *uint64) error {
|
||||
level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tag", srv.tag)
|
||||
func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, catalog *consul.Catalog, lastIndex *uint64) {
|
||||
level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", srv.tags)
|
||||
|
||||
t0 := time.Now()
|
||||
nodes, meta, err := catalog.Service(srv.name, srv.tag, &consul.QueryOptions{
|
||||
opts := &consul.QueryOptions{
|
||||
WaitIndex: *lastIndex,
|
||||
WaitTime: watchTimeout,
|
||||
AllowStale: srv.discovery.allowStale,
|
||||
NodeMeta: srv.discovery.watchedNodeMeta,
|
||||
})
|
||||
}
|
||||
nodes, meta, err := catalog.ServiceMultipleTags(srv.name, srv.tags, opts.WithContext(ctx))
|
||||
elapsed := time.Since(t0)
|
||||
rpcDuration.WithLabelValues("catalog", "service").Observe(elapsed.Seconds())
|
||||
|
||||
// Check the context before in order to exit early.
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
return
|
||||
default:
|
||||
// Continue.
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tag", srv.tag, "err", err)
|
||||
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", srv.tags, "err", err)
|
||||
rpcFailuresCount.Inc()
|
||||
time.Sleep(retryInterval)
|
||||
return err
|
||||
return
|
||||
}
|
||||
// If the index equals the previous one, the watch timed out with no update.
|
||||
if meta.LastIndex == *lastIndex {
|
||||
return nil
|
||||
return
|
||||
}
|
||||
*lastIndex = meta.LastIndex
|
||||
|
||||
|
@ -487,7 +498,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
|
|||
var tags = srv.tagSeparator + strings.Join(node.ServiceTags, srv.tagSeparator) + srv.tagSeparator
|
||||
|
||||
// If the service address is not empty it should be used instead of the node address
|
||||
// since the service may be registered remotely through a different node
|
||||
// since the service may be registered remotely through a different node.
|
||||
var addr string
|
||||
if node.ServiceAddress != "" {
|
||||
addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort))
|
||||
|
@ -505,25 +516,29 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
|
|||
serviceIDLabel: model.LabelValue(node.ServiceID),
|
||||
}
|
||||
|
||||
// Add all key/value pairs from the node's metadata as their own labels
|
||||
// Add all key/value pairs from the node's metadata as their own labels.
|
||||
for k, v := range node.NodeMeta {
|
||||
name := strutil.SanitizeLabelName(k)
|
||||
labels[metaDataLabel+model.LabelName(name)] = model.LabelValue(v)
|
||||
}
|
||||
|
||||
// Add all key/value pairs from the service's metadata as their own labels
|
||||
// Add all key/value pairs from the service's metadata as their own labels.
|
||||
for k, v := range node.ServiceMeta {
|
||||
name := strutil.SanitizeLabelName(k)
|
||||
labels[serviceMetaDataLabel+model.LabelName(name)] = model.LabelValue(v)
|
||||
}
|
||||
|
||||
// Add all key/value pairs from the service's tagged addresses as their own labels.
|
||||
for k, v := range node.TaggedAddresses {
|
||||
name := strutil.SanitizeLabelName(k)
|
||||
labels[taggedAddressesLabel+model.LabelName(name)] = model.LabelValue(v)
|
||||
}
|
||||
|
||||
tgroup.Targets = append(tgroup.Targets, labels)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case ch <- []*targetgroup.Group{&tgroup}:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ func TestConfiguredService(t *testing.T) {
|
|||
consulDiscovery, err := NewDiscovery(conf, nil)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error when initialising discovery %v", err)
|
||||
t.Errorf("Unexpected error when initializing discovery %v", err)
|
||||
}
|
||||
if !consulDiscovery.shouldWatch("configuredServiceName", []string{""}) {
|
||||
t.Errorf("Expected service %s to be watched", "configuredServiceName")
|
||||
|
@ -46,13 +46,13 @@ func TestConfiguredService(t *testing.T) {
|
|||
|
||||
func TestConfiguredServiceWithTag(t *testing.T) {
|
||||
conf := &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTag: "http",
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http"},
|
||||
}
|
||||
consulDiscovery, err := NewDiscovery(conf, nil)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error when initialising discovery %v", err)
|
||||
t.Errorf("Unexpected error when initializing discovery %v", err)
|
||||
}
|
||||
if consulDiscovery.shouldWatch("configuredServiceName", []string{""}) {
|
||||
t.Errorf("Expected service %s to not be watched without tag", "configuredServiceName")
|
||||
|
@ -68,12 +68,102 @@ func TestConfiguredServiceWithTag(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestConfiguredServiceWithTags(t *testing.T) {
|
||||
type testcase struct {
|
||||
// What we've configured to watch.
|
||||
conf *SDConfig
|
||||
// The service we're checking if we should watch or not.
|
||||
serviceName string
|
||||
serviceTags []string
|
||||
shouldWatch bool
|
||||
}
|
||||
|
||||
cases := []testcase{
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1"},
|
||||
},
|
||||
serviceName: "configuredServiceName",
|
||||
serviceTags: []string{""},
|
||||
shouldWatch: false,
|
||||
},
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1"},
|
||||
},
|
||||
serviceName: "configuredServiceName",
|
||||
serviceTags: []string{"http", "v1"},
|
||||
shouldWatch: true,
|
||||
},
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1"},
|
||||
},
|
||||
serviceName: "nonConfiguredServiceName",
|
||||
serviceTags: []string{""},
|
||||
shouldWatch: false,
|
||||
},
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1"},
|
||||
},
|
||||
serviceName: "nonConfiguredServiceName",
|
||||
serviceTags: []string{"http, v1"},
|
||||
shouldWatch: false,
|
||||
},
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1"},
|
||||
},
|
||||
serviceName: "configuredServiceName",
|
||||
serviceTags: []string{"http", "v1", "foo"},
|
||||
shouldWatch: true,
|
||||
},
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1", "foo"},
|
||||
},
|
||||
serviceName: "configuredServiceName",
|
||||
serviceTags: []string{"http", "v1", "foo"},
|
||||
shouldWatch: true,
|
||||
},
|
||||
testcase{
|
||||
conf: &SDConfig{
|
||||
Services: []string{"configuredServiceName"},
|
||||
ServiceTags: []string{"http", "v1"},
|
||||
},
|
||||
serviceName: "configuredServiceName",
|
||||
serviceTags: []string{"http", "v1", "v1"},
|
||||
shouldWatch: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
consulDiscovery, err := NewDiscovery(tc.conf, nil)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error when initializing discovery %v", err)
|
||||
}
|
||||
ret := consulDiscovery.shouldWatch(tc.serviceName, tc.serviceTags)
|
||||
if ret != tc.shouldWatch {
|
||||
t.Errorf("Expected should watch? %t, got %t. Watched service and tags: %s %+v, input was %s %+v", tc.shouldWatch, ret, tc.conf.Services, tc.conf.ServiceTags, tc.serviceName, tc.serviceTags)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
func TestNonConfiguredService(t *testing.T) {
|
||||
conf := &SDConfig{}
|
||||
consulDiscovery, err := NewDiscovery(conf, nil)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error when initialising discovery %v", err)
|
||||
t.Errorf("Unexpected error when initializing discovery %v", err)
|
||||
}
|
||||
if !consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{""}) {
|
||||
t.Errorf("Expected service %s to be watched", "nonConfiguredServiceName")
|
||||
|
@ -87,6 +177,7 @@ const (
|
|||
"Node": "node1",
|
||||
"Address": "1.1.1.1",
|
||||
"Datacenter": "test-dc",
|
||||
"TaggedAddresses": {"lan":"192.168.10.10","wan":"10.0.10.10"},
|
||||
"NodeMeta": {"rack_name": "2304"},
|
||||
"ServiceID": "test",
|
||||
"ServiceName": "test",
|
||||
|
@ -194,7 +285,7 @@ func TestAllOptions(t *testing.T) {
|
|||
|
||||
config.Services = []string{"test"}
|
||||
config.NodeMeta = map[string]string{"rack_name": "2304"}
|
||||
config.ServiceTag = "tag1"
|
||||
config.ServiceTags = []string{"tag1"}
|
||||
config.AllowStale = true
|
||||
config.Token = "fake-token"
|
||||
|
||||
|
|
|
@ -24,8 +24,11 @@ import (
|
|||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/miekg/dns"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
|
@ -76,16 +79,16 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if len(c.Names) == 0 {
|
||||
return fmt.Errorf("DNS-SD config must contain at least one SRV record name")
|
||||
return errors.New("DNS-SD config must contain at least one SRV record name")
|
||||
}
|
||||
switch strings.ToUpper(c.Type) {
|
||||
case "SRV":
|
||||
case "A", "AAAA":
|
||||
if c.Port == 0 {
|
||||
return fmt.Errorf("a port is required in DNS-SD configs for all record types except SRV")
|
||||
return errors.New("a port is required in DNS-SD configs for all record types except SRV")
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("invalid DNS-SD records type %s", c.Type)
|
||||
return errors.Errorf("invalid DNS-SD records type %s", c.Type)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -98,12 +101,13 @@ func init() {
|
|||
// Discovery periodically performs DNS-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery struct {
|
||||
names []string
|
||||
*refresh.Discovery
|
||||
names []string
|
||||
port int
|
||||
qtype uint16
|
||||
logger log.Logger
|
||||
|
||||
interval time.Duration
|
||||
port int
|
||||
qtype uint16
|
||||
logger log.Logger
|
||||
lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
|
||||
}
|
||||
|
||||
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
|
||||
|
@ -121,51 +125,52 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
|
|||
case "SRV":
|
||||
qtype = dns.TypeSRV
|
||||
}
|
||||
return &Discovery{
|
||||
d := &Discovery{
|
||||
names: conf.Names,
|
||||
interval: time.Duration(conf.RefreshInterval),
|
||||
qtype: qtype,
|
||||
port: conf.Port,
|
||||
logger: logger,
|
||||
lookupFn: lookupWithSearchPath,
|
||||
}
|
||||
d.Discovery = refresh.NewDiscovery(
|
||||
logger,
|
||||
"dns",
|
||||
time.Duration(conf.RefreshInterval),
|
||||
d.refresh,
|
||||
)
|
||||
return d
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
ticker := time.NewTicker(d.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Get an initial set right away.
|
||||
d.refreshAll(ctx, ch)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
d.refreshAll(ctx, ch)
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) refreshAll(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
var wg sync.WaitGroup
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
ch = make(chan *targetgroup.Group)
|
||||
tgs = make([]*targetgroup.Group, 0, len(d.names))
|
||||
)
|
||||
|
||||
wg.Add(len(d.names))
|
||||
for _, name := range d.names {
|
||||
go func(n string) {
|
||||
if err := d.refresh(ctx, n, ch); err != nil {
|
||||
if err := d.refreshOne(ctx, n, ch); err != nil {
|
||||
level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err)
|
||||
}
|
||||
wg.Done()
|
||||
}(name)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(ch)
|
||||
}()
|
||||
|
||||
for tg := range ch {
|
||||
tgs = append(tgs, tg)
|
||||
}
|
||||
return tgs, nil
|
||||
}
|
||||
|
||||
func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targetgroup.Group) error {
|
||||
response, err := lookupWithSearchPath(name, d.qtype, d.logger)
|
||||
func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
|
||||
response, err := d.lookupFn(name, d.qtype, d.logger)
|
||||
dnsSDLookupsCount.Inc()
|
||||
if err != nil {
|
||||
dnsSDLookupFailuresCount.Inc()
|
||||
|
@ -178,7 +183,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
|
|||
}
|
||||
|
||||
for _, record := range response.Answer {
|
||||
target := model.LabelValue("")
|
||||
var target model.LabelValue
|
||||
switch addr := record.(type) {
|
||||
case *dns.SRV:
|
||||
// Remove the final dot from rooted DNS names to make them look more usual.
|
||||
|
@ -203,7 +208,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
|
|||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case ch <- tg:
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -214,7 +219,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
|
|||
//
|
||||
// There are three possible outcomes:
|
||||
//
|
||||
// 1. One of the permutations of the given name is recognised as
|
||||
// 1. One of the permutations of the given name is recognized as
|
||||
// "valid" by the DNS, in which case we consider ourselves "done"
|
||||
// and that answer is returned. Note that, due to the way the DNS
|
||||
// handles "name has resource records, but none of the specified type",
|
||||
|
@ -239,7 +244,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
|
|||
func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
conf, err := dns.ClientConfigFromFile(resolvConf)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not load resolv.conf: %s", err)
|
||||
return nil, errors.Wrap(err, "could not load resolv.conf")
|
||||
}
|
||||
|
||||
allResponsesValid := true
|
||||
|
@ -265,7 +270,7 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms
|
|||
return &dns.Msg{}, nil
|
||||
}
|
||||
// Outcome 3: boned.
|
||||
return nil, fmt.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name)
|
||||
return nil, errors.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name)
|
||||
}
|
||||
|
||||
// lookupFromAnyServer uses all configured servers to try and resolve a specific
|
||||
|
@ -301,7 +306,7 @@ func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logg
|
|||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("could not resolve %s: no servers returned a viable answer", name)
|
||||
return nil, errors.Errorf("could not resolve %s: no servers returned a viable answer", name)
|
||||
}
|
||||
|
||||
// askServerForName makes a request to a specific DNS server for a specific
|
||||
|
@ -317,19 +322,18 @@ func askServerForName(name string, queryType uint16, client *dns.Client, servAdd
|
|||
}
|
||||
|
||||
response, _, err := client.Exchange(msg, servAddr)
|
||||
if err == dns.ErrTruncated {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if response.Truncated {
|
||||
if client.Net == "tcp" {
|
||||
return nil, fmt.Errorf("got truncated message on TCP (64kiB limit exceeded?)")
|
||||
return nil, errors.New("got truncated message on TCP (64kiB limit exceeded?)")
|
||||
}
|
||||
|
||||
client.Net = "tcp"
|
||||
return askServerForName(name, queryType, client, servAddr, false)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if msg.Id != response.Id {
|
||||
return nil, fmt.Errorf("DNS ID mismatch, request: %d, response: %d", msg.Id, response.Id)
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
|
180
discovery/dns/dns_test.go
Normal file
180
discovery/dns/dns_test.go
Normal file
|
@ -0,0 +1,180 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package dns
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/miekg/dns"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
func TestDNS(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
config SDConfig
|
||||
lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
|
||||
|
||||
expected []*targetgroup.Group
|
||||
}{
|
||||
{
|
||||
name: "A record query with error",
|
||||
config: SDConfig{
|
||||
Names: []string{"web.example.com."},
|
||||
RefreshInterval: model.Duration(time.Minute),
|
||||
Port: 80,
|
||||
Type: "A",
|
||||
},
|
||||
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
return nil, fmt.Errorf("some error")
|
||||
},
|
||||
expected: []*targetgroup.Group{},
|
||||
},
|
||||
{
|
||||
name: "A record query",
|
||||
config: SDConfig{
|
||||
Names: []string{"web.example.com."},
|
||||
RefreshInterval: model.Duration(time.Minute),
|
||||
Port: 80,
|
||||
Type: "A",
|
||||
},
|
||||
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
return &dns.Msg{
|
||||
Answer: []dns.RR{
|
||||
&dns.A{A: net.IPv4(192, 0, 2, 2)},
|
||||
},
|
||||
},
|
||||
nil
|
||||
},
|
||||
expected: []*targetgroup.Group{
|
||||
&targetgroup.Group{
|
||||
Source: "web.example.com.",
|
||||
Targets: []model.LabelSet{
|
||||
{"__address__": "192.0.2.2:80", "__meta_dns_name": "web.example.com."},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AAAA record query",
|
||||
config: SDConfig{
|
||||
Names: []string{"web.example.com."},
|
||||
RefreshInterval: model.Duration(time.Minute),
|
||||
Port: 80,
|
||||
Type: "AAAA",
|
||||
},
|
||||
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
return &dns.Msg{
|
||||
Answer: []dns.RR{
|
||||
&dns.AAAA{AAAA: net.IPv6loopback},
|
||||
},
|
||||
},
|
||||
nil
|
||||
},
|
||||
expected: []*targetgroup.Group{
|
||||
&targetgroup.Group{
|
||||
Source: "web.example.com.",
|
||||
Targets: []model.LabelSet{
|
||||
{"__address__": "[::1]:80", "__meta_dns_name": "web.example.com."},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SRV record query",
|
||||
config: SDConfig{
|
||||
Names: []string{"_mysql._tcp.db.example.com."},
|
||||
RefreshInterval: model.Duration(time.Minute),
|
||||
},
|
||||
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
return &dns.Msg{
|
||||
Answer: []dns.RR{
|
||||
&dns.SRV{Port: 3306, Target: "db1.example.com."},
|
||||
&dns.SRV{Port: 3306, Target: "db2.example.com."},
|
||||
},
|
||||
},
|
||||
nil
|
||||
},
|
||||
expected: []*targetgroup.Group{
|
||||
&targetgroup.Group{
|
||||
Source: "_mysql._tcp.db.example.com.",
|
||||
Targets: []model.LabelSet{
|
||||
{"__address__": "db1.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."},
|
||||
{"__address__": "db2.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SRV record query with unsupported resource records",
|
||||
config: SDConfig{
|
||||
Names: []string{"_mysql._tcp.db.example.com."},
|
||||
RefreshInterval: model.Duration(time.Minute),
|
||||
},
|
||||
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
return &dns.Msg{
|
||||
Answer: []dns.RR{
|
||||
&dns.SRV{Port: 3306, Target: "db1.example.com."},
|
||||
&dns.TXT{Txt: []string{"this should be discarded"}},
|
||||
},
|
||||
},
|
||||
nil
|
||||
},
|
||||
expected: []*targetgroup.Group{
|
||||
&targetgroup.Group{
|
||||
Source: "_mysql._tcp.db.example.com.",
|
||||
Targets: []model.LabelSet{
|
||||
{"__address__": "db1.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SRV record query with empty answer (NXDOMAIN)",
|
||||
config: SDConfig{
|
||||
Names: []string{"_mysql._tcp.db.example.com."},
|
||||
RefreshInterval: model.Duration(time.Minute),
|
||||
},
|
||||
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
|
||||
return &dns.Msg{}, nil
|
||||
},
|
||||
expected: []*targetgroup.Group{
|
||||
&targetgroup.Group{
|
||||
Source: "_mysql._tcp.db.example.com.",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
sd := NewDiscovery(tc.config, nil)
|
||||
sd.lookupFn = tc.lookup
|
||||
|
||||
tgs, err := sd.refresh(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expected, tgs)
|
||||
})
|
||||
}
|
||||
}
|
|
@ -25,13 +25,13 @@ import (
|
|||
"github.com/aws/aws-sdk-go/aws/credentials/stscreds"
|
||||
"github.com/aws/aws-sdk-go/aws/ec2metadata"
|
||||
"github.com/aws/aws-sdk-go/aws/session"
|
||||
"github.com/aws/aws-sdk-go/service/ec2"
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/pkg/errors"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/aws/aws-sdk-go/service/ec2"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
@ -46,6 +46,7 @@ const (
|
|||
ec2LabelPlatform = ec2Label + "platform"
|
||||
ec2LabelPublicDNS = ec2Label + "public_dns_name"
|
||||
ec2LabelPublicIP = ec2Label + "public_ip"
|
||||
ec2LabelPrivateDNS = ec2Label + "private_dns_name"
|
||||
ec2LabelPrivateIP = ec2Label + "private_ip"
|
||||
ec2LabelPrimarySubnetID = ec2Label + "primary_subnet_id"
|
||||
ec2LabelSubnetID = ec2Label + "subnet_id"
|
||||
|
@ -54,23 +55,11 @@ const (
|
|||
subnetSeparator = ","
|
||||
)
|
||||
|
||||
var (
|
||||
ec2SDRefreshFailuresCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_ec2_refresh_failures_total",
|
||||
Help: "The number of EC2-SD scrape failures.",
|
||||
})
|
||||
ec2SDRefreshDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_ec2_refresh_duration_seconds",
|
||||
Help: "The duration of a EC2-SD refresh in seconds.",
|
||||
})
|
||||
// DefaultSDConfig is the default EC2 SD configuration.
|
||||
DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
)
|
||||
// DefaultSDConfig is the default EC2 SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
|
||||
// Filter is the configuration for filtering EC2 instances.
|
||||
type Filter struct {
|
||||
|
@ -107,33 +96,28 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
metadata := ec2metadata.New(sess)
|
||||
region, err := metadata.Region()
|
||||
if err != nil {
|
||||
return fmt.Errorf("EC2 SD configuration requires a region")
|
||||
return errors.New("EC2 SD configuration requires a region")
|
||||
}
|
||||
c.Region = region
|
||||
}
|
||||
for _, f := range c.Filters {
|
||||
if len(f.Values) == 0 {
|
||||
return fmt.Errorf("EC2 SD configuration filter values cannot be empty")
|
||||
return errors.New("EC2 SD configuration filter values cannot be empty")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(ec2SDRefreshFailuresCount)
|
||||
prometheus.MustRegister(ec2SDRefreshDuration)
|
||||
}
|
||||
|
||||
// Discovery periodically performs EC2-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery struct {
|
||||
*refresh.Discovery
|
||||
aws *aws.Config
|
||||
interval time.Duration
|
||||
profile string
|
||||
roleARN string
|
||||
port int
|
||||
filters []*Filter
|
||||
logger log.Logger
|
||||
}
|
||||
|
||||
// NewDiscovery returns a new EC2Discovery which periodically refreshes its targets.
|
||||
|
@ -145,7 +129,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
|
|||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
return &Discovery{
|
||||
d := &Discovery{
|
||||
aws: &aws.Config{
|
||||
Endpoint: &conf.Endpoint,
|
||||
Region: &conf.Region,
|
||||
|
@ -156,62 +140,23 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
|
|||
filters: conf.Filters,
|
||||
interval: time.Duration(conf.RefreshInterval),
|
||||
port: conf.Port,
|
||||
logger: logger,
|
||||
}
|
||||
d.Discovery = refresh.NewDiscovery(
|
||||
logger,
|
||||
"ec2",
|
||||
time.Duration(conf.RefreshInterval),
|
||||
d.refresh,
|
||||
)
|
||||
return d
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
ticker := time.NewTicker(d.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Get an initial set right away.
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
|
||||
} else {
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
ec2SDRefreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
ec2SDRefreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
sess, err := session.NewSessionWithOptions(session.Options{
|
||||
Config: *d.aws,
|
||||
Profile: d.profile,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not create aws session: %s", err)
|
||||
return nil, errors.Wrap(err, "could not create aws session")
|
||||
}
|
||||
|
||||
var ec2s *ec2.EC2
|
||||
|
@ -221,7 +166,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
} else {
|
||||
ec2s = ec2.New(sess)
|
||||
}
|
||||
tg = &targetgroup.Group{
|
||||
tg := &targetgroup.Group{
|
||||
Source: *d.aws.Region,
|
||||
}
|
||||
|
||||
|
@ -235,7 +180,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
|
||||
input := &ec2.DescribeInstancesInput{Filters: filters}
|
||||
|
||||
if err = ec2s.DescribeInstancesPages(input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
|
||||
if err = ec2s.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
|
||||
for _, r := range p.Reservations {
|
||||
for _, inst := range r.Instances {
|
||||
if inst.PrivateIpAddress == nil {
|
||||
|
@ -250,6 +195,9 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
}
|
||||
|
||||
labels[ec2LabelPrivateIP] = model.LabelValue(*inst.PrivateIpAddress)
|
||||
if inst.PrivateDnsName != nil {
|
||||
labels[ec2LabelPrivateDNS] = model.LabelValue(*inst.PrivateDnsName)
|
||||
}
|
||||
addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.port))
|
||||
labels[model.AddressLabel] = model.LabelValue(addr)
|
||||
|
||||
|
@ -300,7 +248,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
}
|
||||
return true
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("could not describe instances: %s", err)
|
||||
return nil, errors.Wrap(err, "could not describe instances")
|
||||
}
|
||||
return tg, nil
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@ package file
|
|||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
@ -28,11 +27,13 @@ import (
|
|||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
fsnotify "gopkg.in/fsnotify/fsnotify.v1"
|
||||
yaml "gopkg.in/yaml.v2"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"gopkg.in/fsnotify/fsnotify.v1"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -59,11 +60,11 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if len(c.Files) == 0 {
|
||||
return fmt.Errorf("file service discovery config must contain at least one path name")
|
||||
return errors.New("file service discovery config must contain at least one path name")
|
||||
}
|
||||
for _, name := range c.Files {
|
||||
if !patFileSDName.MatchString(name) {
|
||||
return fmt.Errorf("path name %q is not valid for file discovery", name)
|
||||
return errors.Errorf("path name %q is not valid for file discovery", name)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
@ -134,8 +135,9 @@ func NewTimestampCollector() *TimestampCollector {
|
|||
var (
|
||||
fileSDScanDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_file_scan_duration_seconds",
|
||||
Help: "The duration of the File-SD scan in seconds.",
|
||||
Name: "prometheus_sd_file_scan_duration_seconds",
|
||||
Help: "The duration of the File-SD scan in seconds.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
})
|
||||
fileSDReadErrorsCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
|
@ -382,7 +384,7 @@ func (d *Discovery) readFile(filename string) ([]*targetgroup.Group, error) {
|
|||
return nil, err
|
||||
}
|
||||
default:
|
||||
panic(fmt.Errorf("discovery.File.readFile: unhandled file extension %q", ext))
|
||||
panic(errors.Errorf("discovery.File.readFile: unhandled file extension %q", ext))
|
||||
}
|
||||
|
||||
for i, tg := range targetGroups {
|
||||
|
|
|
@ -22,13 +22,13 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"golang.org/x/oauth2"
|
||||
"golang.org/x/oauth2/google"
|
||||
compute "google.golang.org/api/compute/v1"
|
||||
"google.golang.org/api/option"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
@ -50,24 +50,12 @@ const (
|
|||
gceLabelMachineType = gceLabel + "machine_type"
|
||||
)
|
||||
|
||||
var (
|
||||
gceSDRefreshFailuresCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_gce_refresh_failures_total",
|
||||
Help: "The number of GCE-SD refresh failures.",
|
||||
})
|
||||
gceSDRefreshDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_gce_refresh_duration",
|
||||
Help: "The duration of a GCE-SD refresh in seconds.",
|
||||
})
|
||||
// DefaultSDConfig is the default GCE SD configuration.
|
||||
DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
TagSeparator: ",",
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
)
|
||||
// DefaultSDConfig is the default GCE SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
TagSeparator: ",",
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
|
||||
// SDConfig is the configuration for GCE based service discovery.
|
||||
type SDConfig struct {
|
||||
|
@ -97,105 +85,59 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if c.Project == "" {
|
||||
return fmt.Errorf("GCE SD configuration requires a project")
|
||||
return errors.New("GCE SD configuration requires a project")
|
||||
}
|
||||
if c.Zone == "" {
|
||||
return fmt.Errorf("GCE SD configuration requires a zone")
|
||||
return errors.New("GCE SD configuration requires a zone")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(gceSDRefreshFailuresCount)
|
||||
prometheus.MustRegister(gceSDRefreshDuration)
|
||||
}
|
||||
|
||||
// Discovery periodically performs GCE-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery struct {
|
||||
*refresh.Discovery
|
||||
project string
|
||||
zone string
|
||||
filter string
|
||||
client *http.Client
|
||||
svc *compute.Service
|
||||
isvc *compute.InstancesService
|
||||
interval time.Duration
|
||||
port int
|
||||
tagSeparator string
|
||||
logger log.Logger
|
||||
}
|
||||
|
||||
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
|
||||
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
gd := &Discovery{
|
||||
d := &Discovery{
|
||||
project: conf.Project,
|
||||
zone: conf.Zone,
|
||||
filter: conf.Filter,
|
||||
interval: time.Duration(conf.RefreshInterval),
|
||||
port: conf.Port,
|
||||
tagSeparator: conf.TagSeparator,
|
||||
logger: logger,
|
||||
}
|
||||
var err error
|
||||
gd.client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeReadonlyScope)
|
||||
d.client, err = google.DefaultClient(context.Background(), compute.ComputeReadonlyScope)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error setting up communication with GCE service: %s", err)
|
||||
return nil, errors.Wrap(err, "error setting up communication with GCE service")
|
||||
}
|
||||
gd.svc, err = compute.New(gd.client)
|
||||
d.svc, err = compute.NewService(context.Background(), option.WithHTTPClient(d.client))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error setting up communication with GCE service: %s", err)
|
||||
return nil, errors.Wrap(err, "error setting up communication with GCE service")
|
||||
}
|
||||
gd.isvc = compute.NewInstancesService(gd.svc)
|
||||
return gd, nil
|
||||
d.isvc = compute.NewInstancesService(d.svc)
|
||||
|
||||
d.Discovery = refresh.NewDiscovery(
|
||||
logger,
|
||||
"gce",
|
||||
time.Duration(conf.RefreshInterval),
|
||||
d.refresh,
|
||||
)
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
// Get an initial set right away.
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
|
||||
} else {
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(d.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
|
||||
continue
|
||||
}
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
gceSDRefreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
gceSDRefreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
tg = &targetgroup.Group{
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
tg := &targetgroup.Group{
|
||||
Source: fmt.Sprintf("GCE_%s_%s", d.project, d.zone),
|
||||
}
|
||||
|
||||
|
@ -203,7 +145,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
if len(d.filter) > 0 {
|
||||
ilc = ilc.Filter(d.filter)
|
||||
}
|
||||
err = ilc.Pages(context.TODO(), func(l *compute.InstanceList) error {
|
||||
err := ilc.Pages(ctx, func(l *compute.InstanceList) error {
|
||||
for _, inst := range l.Items {
|
||||
if len(inst.NetworkInterfaces) == 0 {
|
||||
continue
|
||||
|
@ -260,7 +202,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("error retrieving refresh targets from gce: %s", err)
|
||||
return nil, errors.Wrap(err, "error retrieving refresh targets from gce")
|
||||
}
|
||||
return tg, nil
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/tools/metrics"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
|
@ -136,6 +137,22 @@ var (
|
|||
},
|
||||
[]string{"queue_name"},
|
||||
)
|
||||
clientGoWorkqueueUnfinishedWorkSecondsMetricVec = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: workqueueMetricsNamespace,
|
||||
Name: "unfinished_work_seconds",
|
||||
Help: "How long an item has remained unfinished in the work queue.",
|
||||
},
|
||||
[]string{"queue_name"},
|
||||
)
|
||||
clientGoWorkqueueLongestRunningProcessorMetricVec = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: workqueueMetricsNamespace,
|
||||
Name: "longest_running_processor_seconds",
|
||||
Help: "Duration of the longest running processor in the work queue.",
|
||||
},
|
||||
[]string{"queue_name"},
|
||||
)
|
||||
clientGoWorkqueueWorkDurationMetricVec = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Namespace: workqueueMetricsNamespace,
|
||||
|
@ -153,6 +170,7 @@ type noopMetric struct{}
|
|||
func (noopMetric) Inc() {}
|
||||
func (noopMetric) Dec() {}
|
||||
func (noopMetric) Observe(float64) {}
|
||||
func (noopMetric) Set(float64) {}
|
||||
|
||||
// Definition of client-go metrics adapters for HTTP requests observation
|
||||
type clientGoRequestMetricAdapter struct{}
|
||||
|
@ -218,6 +236,8 @@ func (f *clientGoWorkqueueMetricsProvider) Register(registerer prometheus.Regist
|
|||
registerer.MustRegister(clientGoWorkqueueAddsMetricVec)
|
||||
registerer.MustRegister(clientGoWorkqueueLatencyMetricVec)
|
||||
registerer.MustRegister(clientGoWorkqueueWorkDurationMetricVec)
|
||||
registerer.MustRegister(clientGoWorkqueueUnfinishedWorkSecondsMetricVec)
|
||||
registerer.MustRegister(clientGoWorkqueueLongestRunningProcessorMetricVec)
|
||||
}
|
||||
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric {
|
||||
|
@ -226,21 +246,48 @@ func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue
|
|||
func (f *clientGoWorkqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric {
|
||||
return clientGoWorkqueueAddsMetricVec.WithLabelValues(name)
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewLatencyMetric(name string) workqueue.SummaryMetric {
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewLatencyMetric(name string) workqueue.HistogramMetric {
|
||||
metric := clientGoWorkqueueLatencyMetricVec.WithLabelValues(name)
|
||||
// Convert microseconds to seconds for consistency across metrics.
|
||||
return prometheus.ObserverFunc(func(v float64) {
|
||||
metric.Observe(v / 1e6)
|
||||
})
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.SummaryMetric {
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric {
|
||||
metric := clientGoWorkqueueWorkDurationMetricVec.WithLabelValues(name)
|
||||
// Convert microseconds to seconds for consistency across metrics.
|
||||
return prometheus.ObserverFunc(func(v float64) {
|
||||
metric.Observe(v / 1e6)
|
||||
})
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric {
|
||||
return clientGoWorkqueueUnfinishedWorkSecondsMetricVec.WithLabelValues(name)
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric {
|
||||
return clientGoWorkqueueLongestRunningProcessorMetricVec.WithLabelValues(name)
|
||||
}
|
||||
func (clientGoWorkqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric {
|
||||
// Retries are not used so the metric is ommited.
|
||||
// Retries are not used so the metric is omitted.
|
||||
return noopMetric{}
|
||||
}
|
||||
func (clientGoWorkqueueMetricsProvider) NewDeprecatedDepthMetric(name string) workqueue.GaugeMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
func (clientGoWorkqueueMetricsProvider) NewDeprecatedAddsMetric(name string) workqueue.CounterMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
func (clientGoWorkqueueMetricsProvider) NewDeprecatedLatencyMetric(name string) workqueue.SummaryMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedWorkDurationMetric(name string) workqueue.SummaryMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(name string) workqueue.SettableGaugeMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
func (clientGoWorkqueueMetricsProvider) NewDeprecatedRetriesMetric(name string) workqueue.CounterMetric {
|
||||
return noopMetric{}
|
||||
}
|
||||
|
|
|
@ -15,17 +15,18 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
// Endpoints discovers new endpoint targets.
|
||||
|
@ -150,7 +151,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group)
|
|||
|
||||
namespace, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
level.Error(e.logger).Log("msg", "spliting key failed", "key", key)
|
||||
level.Error(e.logger).Log("msg", "splitting key failed", "key", key)
|
||||
return true
|
||||
}
|
||||
|
||||
|
@ -178,7 +179,7 @@ func convertToEndpoints(o interface{}) (*apiv1.Endpoints, error) {
|
|||
return endpoints, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Received unexpected object: %v", o)
|
||||
return nil, errors.Errorf("received unexpected object: %v", o)
|
||||
}
|
||||
|
||||
func endpointsSource(ep *apiv1.Endpoints) string {
|
||||
|
@ -191,6 +192,8 @@ func endpointsSourceFromNamespaceAndName(namespace, name string) string {
|
|||
|
||||
const (
|
||||
endpointsNameLabel = metaLabelPrefix + "endpoints_name"
|
||||
endpointNodeName = metaLabelPrefix + "endpoint_node_name"
|
||||
endpointHostname = metaLabelPrefix + "endpoint_hostname"
|
||||
endpointReadyLabel = metaLabelPrefix + "endpoint_ready"
|
||||
endpointPortNameLabel = metaLabelPrefix + "endpoint_port_name"
|
||||
endpointPortProtocolLabel = metaLabelPrefix + "endpoint_port_protocol"
|
||||
|
@ -229,6 +232,13 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
|
|||
target[model.LabelName(endpointAddressTargetNameLabel)] = lv(addr.TargetRef.Name)
|
||||
}
|
||||
|
||||
if addr.NodeName != nil {
|
||||
target[model.LabelName(endpointNodeName)] = lv(*addr.NodeName)
|
||||
}
|
||||
if addr.Hostname != "" {
|
||||
target[model.LabelName(endpointHostname)] = lv(addr.Hostname)
|
||||
}
|
||||
|
||||
pod := e.resolvePodRef(addr.TargetRef)
|
||||
if pod == nil {
|
||||
// This target is not a Pod, so don't continue with Pod specific logic.
|
||||
|
@ -324,11 +334,12 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
|
|||
p.Name = ref.Name
|
||||
|
||||
obj, exists, err := e.podStore.Get(p)
|
||||
if err != nil || !exists {
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err)
|
||||
return nil
|
||||
}
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
return obj.(*apiv1.Pod)
|
||||
}
|
||||
|
@ -339,11 +350,12 @@ func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) {
|
|||
svc.Name = name
|
||||
|
||||
obj, exists, err := e.serviceStore.Get(svc)
|
||||
if !exists || err != nil {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
level.Error(e.logger).Log("msg", "retrieving service failed", "err", err)
|
||||
return
|
||||
}
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
svc = obj.(*apiv1.Service)
|
||||
|
||||
|
|
|
@ -18,13 +18,14 @@ import (
|
|||
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
||||
func makeEndpoints() *v1.Endpoints {
|
||||
var nodeName = "foobar"
|
||||
return &v1.Endpoints{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testendpoints",
|
||||
|
@ -34,7 +35,9 @@ func makeEndpoints() *v1.Endpoints {
|
|||
{
|
||||
Addresses: []v1.EndpointAddress{
|
||||
{
|
||||
IP: "1.2.3.4",
|
||||
IP: "1.2.3.4",
|
||||
Hostname: "testendpoint1",
|
||||
NodeName: &nodeName,
|
||||
},
|
||||
},
|
||||
Ports: []v1.EndpointPort{
|
||||
|
@ -69,14 +72,13 @@ func makeEndpoints() *v1.Endpoints {
|
|||
}
|
||||
|
||||
func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
beforeRun: func() {
|
||||
obj := makeEndpoints()
|
||||
c.CoreV1().Endpoints(obj.Namespace).Create(obj)
|
||||
w.Endpoints().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -84,6 +86,8 @@ func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__address__": "1.2.3.4:9000",
|
||||
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
|
||||
"__meta_kubernetes_endpoint_node_name": "foobar",
|
||||
"__meta_kubernetes_endpoint_port_name": "testport",
|
||||
"__meta_kubernetes_endpoint_port_protocol": "TCP",
|
||||
"__meta_kubernetes_endpoint_ready": "true",
|
||||
|
@ -148,7 +152,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
|
|||
PodIP: "1.2.3.4",
|
||||
},
|
||||
}
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, obj)
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, obj)
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -181,7 +185,6 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
|
|||
},
|
||||
}
|
||||
c.CoreV1().Endpoints(obj.Namespace).Create(obj)
|
||||
w.Endpoints().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -232,14 +235,13 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestEndpointsDiscoveryDelete(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeEndpoints()
|
||||
c.CoreV1().Endpoints(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{})
|
||||
w.Endpoints().Delete(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -251,7 +253,7 @@ func TestEndpointsDiscoveryDelete(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestEndpointsDiscoveryUpdate(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -293,7 +295,6 @@ func TestEndpointsDiscoveryUpdate(t *testing.T) {
|
|||
},
|
||||
}
|
||||
c.CoreV1().Endpoints(obj.Namespace).Update(obj)
|
||||
w.Endpoints().Modify(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -323,7 +324,7 @@ func TestEndpointsDiscoveryUpdate(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -336,7 +337,6 @@ func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
|
|||
Subsets: []v1.EndpointSubset{},
|
||||
}
|
||||
c.CoreV1().Endpoints(obj.Namespace).Update(obj)
|
||||
w.Endpoints().Modify(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -352,7 +352,7 @@ func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestEndpointsDiscoveryWithService(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -362,12 +362,11 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
|
|||
Name: "testendpoints",
|
||||
Namespace: "default",
|
||||
Labels: map[string]string{
|
||||
"app": "test",
|
||||
"app/name": "test",
|
||||
},
|
||||
},
|
||||
}
|
||||
c.CoreV1().Services(obj.Namespace).Create(obj)
|
||||
w.Services().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -375,6 +374,8 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__address__": "1.2.3.4:9000",
|
||||
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
|
||||
"__meta_kubernetes_endpoint_node_name": "foobar",
|
||||
"__meta_kubernetes_endpoint_port_name": "testport",
|
||||
"__meta_kubernetes_endpoint_port_protocol": "TCP",
|
||||
"__meta_kubernetes_endpoint_ready": "true",
|
||||
|
@ -393,10 +394,11 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
|
|||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_endpoints_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_app": "test",
|
||||
"__meta_kubernetes_service_name": "testendpoints",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_endpoints_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_app_name": "test",
|
||||
"__meta_kubernetes_service_labelpresent_app_name": "true",
|
||||
"__meta_kubernetes_service_name": "testendpoints",
|
||||
},
|
||||
Source: "endpoints/default/testendpoints",
|
||||
},
|
||||
|
@ -405,7 +407,7 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -415,12 +417,11 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
|
|||
Name: "testendpoints",
|
||||
Namespace: "default",
|
||||
Labels: map[string]string{
|
||||
"app": "test",
|
||||
"app/name": "test",
|
||||
},
|
||||
},
|
||||
}
|
||||
c.CoreV1().Services(obj.Namespace).Create(obj)
|
||||
w.Services().Add(obj)
|
||||
},
|
||||
afterStart: func() {
|
||||
obj := &v1.Service{
|
||||
|
@ -428,13 +429,12 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
|
|||
Name: "testendpoints",
|
||||
Namespace: "default",
|
||||
Labels: map[string]string{
|
||||
"app": "svc",
|
||||
"app/name": "svc",
|
||||
"component": "testing",
|
||||
},
|
||||
},
|
||||
}
|
||||
c.CoreV1().Services(obj.Namespace).Update(obj)
|
||||
w.Services().Modify(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -442,6 +442,8 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__address__": "1.2.3.4:9000",
|
||||
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
|
||||
"__meta_kubernetes_endpoint_node_name": "foobar",
|
||||
"__meta_kubernetes_endpoint_port_name": "testport",
|
||||
"__meta_kubernetes_endpoint_port_protocol": "TCP",
|
||||
"__meta_kubernetes_endpoint_ready": "true",
|
||||
|
@ -460,11 +462,13 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
|
|||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_endpoints_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_app": "svc",
|
||||
"__meta_kubernetes_service_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_component": "testing",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_endpoints_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_app_name": "svc",
|
||||
"__meta_kubernetes_service_labelpresent_app_name": "true",
|
||||
"__meta_kubernetes_service_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_component": "testing",
|
||||
"__meta_kubernetes_service_labelpresent_component": "true",
|
||||
},
|
||||
Source: "endpoints/default/testendpoints",
|
||||
},
|
||||
|
@ -540,7 +544,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
n, _, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, objs...)
|
||||
n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, objs...)
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -550,6 +554,8 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__address__": "1.2.3.4:9000",
|
||||
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
|
||||
"__meta_kubernetes_endpoint_node_name": "foobar",
|
||||
"__meta_kubernetes_endpoint_port_name": "testport",
|
||||
"__meta_kubernetes_endpoint_port_protocol": "TCP",
|
||||
"__meta_kubernetes_endpoint_ready": "true",
|
||||
|
@ -568,10 +574,11 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
|
|||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_namespace": "ns1",
|
||||
"__meta_kubernetes_endpoints_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_app": "app1",
|
||||
"__meta_kubernetes_service_name": "testendpoints",
|
||||
"__meta_kubernetes_namespace": "ns1",
|
||||
"__meta_kubernetes_endpoints_name": "testendpoints",
|
||||
"__meta_kubernetes_service_label_app": "app1",
|
||||
"__meta_kubernetes_service_labelpresent_app": "true",
|
||||
"__meta_kubernetes_service_name": "testendpoints",
|
||||
},
|
||||
Source: "endpoints/ns1/testendpoints",
|
||||
},
|
||||
|
|
|
@ -15,16 +15,17 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
"k8s.io/api/extensions/v1beta1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
// Ingress implements discovery of Kubernetes ingresss.
|
||||
|
@ -118,7 +119,7 @@ func convertToIngress(o interface{}) (*v1beta1.Ingress, error) {
|
|||
return ingress, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Received unexpected object: %v", o)
|
||||
return nil, errors.Errorf("received unexpected object: %v", o)
|
||||
}
|
||||
|
||||
func ingressSource(s *v1beta1.Ingress) string {
|
||||
|
@ -130,12 +131,14 @@ func ingressSourceFromNamespaceAndName(namespace, name string) string {
|
|||
}
|
||||
|
||||
const (
|
||||
ingressNameLabel = metaLabelPrefix + "ingress_name"
|
||||
ingressLabelPrefix = metaLabelPrefix + "ingress_label_"
|
||||
ingressAnnotationPrefix = metaLabelPrefix + "ingress_annotation_"
|
||||
ingressSchemeLabel = metaLabelPrefix + "ingress_scheme"
|
||||
ingressHostLabel = metaLabelPrefix + "ingress_host"
|
||||
ingressPathLabel = metaLabelPrefix + "ingress_path"
|
||||
ingressNameLabel = metaLabelPrefix + "ingress_name"
|
||||
ingressLabelPrefix = metaLabelPrefix + "ingress_label_"
|
||||
ingressLabelPresentPrefix = metaLabelPrefix + "ingress_labelpresent_"
|
||||
ingressAnnotationPrefix = metaLabelPrefix + "ingress_annotation_"
|
||||
ingressAnnotationPresentPrefix = metaLabelPrefix + "ingress_annotationpresent_"
|
||||
ingressSchemeLabel = metaLabelPrefix + "ingress_scheme"
|
||||
ingressHostLabel = metaLabelPrefix + "ingress_host"
|
||||
ingressPathLabel = metaLabelPrefix + "ingress_path"
|
||||
)
|
||||
|
||||
func ingressLabels(ingress *v1beta1.Ingress) model.LabelSet {
|
||||
|
@ -144,13 +147,15 @@ func ingressLabels(ingress *v1beta1.Ingress) model.LabelSet {
|
|||
ls[namespaceLabel] = lv(ingress.Namespace)
|
||||
|
||||
for k, v := range ingress.Labels {
|
||||
ln := strutil.SanitizeLabelName(ingressLabelPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(ingressLabelPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(ingressLabelPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
|
||||
for k, v := range ingress.Annotations {
|
||||
ln := strutil.SanitizeLabelName(ingressAnnotationPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(ingressAnnotationPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(ingressAnnotationPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
return ls
|
||||
}
|
||||
|
|
|
@ -36,8 +36,8 @@ func makeIngress(tls TLSMode) *v1beta1.Ingress {
|
|||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testingress",
|
||||
Namespace: "default",
|
||||
Labels: map[string]string{"testlabel": "testvalue"},
|
||||
Annotations: map[string]string{"testannotation": "testannotationvalue"},
|
||||
Labels: map[string]string{"test/label": "testvalue"},
|
||||
Annotations: map[string]string{"test/annotation": "testannotationvalue"},
|
||||
},
|
||||
Spec: v1beta1.IngressSpec{
|
||||
TLS: nil,
|
||||
|
@ -118,10 +118,12 @@ func expectedTargetGroups(ns string, tls TLSMode) map[string]*targetgroup.Group
|
|||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_ingress_name": "testingress",
|
||||
"__meta_kubernetes_namespace": lv(ns),
|
||||
"__meta_kubernetes_ingress_label_testlabel": "testvalue",
|
||||
"__meta_kubernetes_ingress_annotation_testannotation": "testannotationvalue",
|
||||
"__meta_kubernetes_ingress_name": "testingress",
|
||||
"__meta_kubernetes_namespace": lv(ns),
|
||||
"__meta_kubernetes_ingress_label_test_label": "testvalue",
|
||||
"__meta_kubernetes_ingress_labelpresent_test_label": "true",
|
||||
"__meta_kubernetes_ingress_annotation_test_annotation": "testannotationvalue",
|
||||
"__meta_kubernetes_ingress_annotationpresent_test_annotation": "true",
|
||||
},
|
||||
Source: key,
|
||||
},
|
||||
|
@ -129,14 +131,13 @@ func expectedTargetGroups(ns string, tls TLSMode) map[string]*targetgroup.Group
|
|||
}
|
||||
|
||||
func TestIngressDiscoveryAdd(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
|
||||
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeIngress(TLSNo)
|
||||
c.ExtensionsV1beta1().Ingresses("default").Create(obj)
|
||||
w.Ingresses().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: expectedTargetGroups("default", TLSNo),
|
||||
|
@ -144,14 +145,13 @@ func TestIngressDiscoveryAdd(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestIngressDiscoveryAddTLS(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
|
||||
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeIngress(TLSYes)
|
||||
c.ExtensionsV1beta1().Ingresses("default").Create(obj)
|
||||
w.Ingresses().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: expectedTargetGroups("default", TLSYes),
|
||||
|
@ -159,14 +159,13 @@ func TestIngressDiscoveryAddTLS(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestIngressDiscoveryAddMixed(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
|
||||
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeIngress(TLSMixed)
|
||||
c.ExtensionsV1beta1().Ingresses("default").Create(obj)
|
||||
w.Ingresses().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: expectedTargetGroups("default", TLSMixed),
|
||||
|
@ -174,7 +173,7 @@ func TestIngressDiscoveryAddMixed(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestIngressDiscoveryNamespaces(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
|
||||
n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
|
||||
|
||||
expected := expectedTargetGroups("ns1", TLSNo)
|
||||
for k, v := range expectedTargetGroups("ns2", TLSNo) {
|
||||
|
@ -187,7 +186,6 @@ func TestIngressDiscoveryNamespaces(t *testing.T) {
|
|||
obj := makeIngress(TLSNo)
|
||||
obj.Namespace = ns
|
||||
c.ExtensionsV1beta1().Ingresses(obj.Namespace).Create(obj)
|
||||
w.Ingresses().Add(obj)
|
||||
}
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
|
|
|
@ -15,18 +15,16 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
extensionsv1beta1 "k8s.io/api/extensions/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
@ -35,6 +33,8 @@ import (
|
|||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -43,6 +43,7 @@ const (
|
|||
metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
|
||||
namespaceLabel = metaLabelPrefix + "namespace"
|
||||
metricsNamespace = "prometheus_sd_kubernetes"
|
||||
presentValue = model.LabelValue("true")
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -80,19 +81,16 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
case RoleNode, RolePod, RoleService, RoleEndpoint, RoleIngress:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Unknown Kubernetes SD role %q", *c)
|
||||
return errors.Errorf("unknown Kubernetes SD role %q", *c)
|
||||
}
|
||||
}
|
||||
|
||||
// SDConfig is the configuration for Kubernetes service discovery.
|
||||
type SDConfig struct {
|
||||
APIServer config_util.URL `yaml:"api_server,omitempty"`
|
||||
Role Role `yaml:"role"`
|
||||
BasicAuth *config_util.BasicAuth `yaml:"basic_auth,omitempty"`
|
||||
BearerToken config_util.Secret `yaml:"bearer_token,omitempty"`
|
||||
BearerTokenFile string `yaml:"bearer_token_file,omitempty"`
|
||||
TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"`
|
||||
NamespaceDiscovery NamespaceDiscovery `yaml:"namespaces,omitempty"`
|
||||
APIServer config_util.URL `yaml:"api_server,omitempty"`
|
||||
Role Role `yaml:"role"`
|
||||
HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"`
|
||||
NamespaceDiscovery NamespaceDiscovery `yaml:"namespaces,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
|
@ -104,18 +102,14 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if c.Role == "" {
|
||||
return fmt.Errorf("role missing (one of: pod, service, endpoints, node)")
|
||||
return errors.Errorf("role missing (one of: pod, service, endpoints, node, ingress)")
|
||||
}
|
||||
if len(c.BearerToken) > 0 && len(c.BearerTokenFile) > 0 {
|
||||
return fmt.Errorf("at most one of bearer_token & bearer_token_file must be configured")
|
||||
err = c.HTTPClientConfig.Validate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c.BasicAuth != nil && (len(c.BearerToken) > 0 || len(c.BearerTokenFile) > 0) {
|
||||
return fmt.Errorf("at most one of basic_auth, bearer_token & bearer_token_file must be configured")
|
||||
}
|
||||
if c.APIServer.URL == nil &&
|
||||
(c.BasicAuth != nil || c.BearerToken != "" || c.BearerTokenFile != "" ||
|
||||
c.TLSConfig.CAFile != "" || c.TLSConfig.CertFile != "" || c.TLSConfig.KeyFile != "") {
|
||||
return fmt.Errorf("to use custom authentication please provide the 'api_server' URL explicitly")
|
||||
if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config_util.HTTPClientConfig{}) {
|
||||
return errors.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -137,7 +131,7 @@ func init() {
|
|||
prometheus.MustRegister(eventCount)
|
||||
|
||||
// Initialize metric vectors.
|
||||
for _, role := range []string{"endpoints", "node", "pod", "service"} {
|
||||
for _, role := range []string{"endpoints", "node", "pod", "service", "ingress"} {
|
||||
for _, evt := range []string{"add", "delete", "update"} {
|
||||
eventCount.WithLabelValues(role, evt)
|
||||
}
|
||||
|
@ -195,50 +189,19 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Because the handling of configuration parameters changes
|
||||
// we should inform the user when their currently configured values
|
||||
// will be ignored due to precedence of InClusterConfig
|
||||
level.Info(l).Log("msg", "Using pod service account via in-cluster config")
|
||||
|
||||
if conf.TLSConfig.CAFile != "" {
|
||||
level.Warn(l).Log("msg", "Configured TLS CA file is ignored when using pod service account")
|
||||
}
|
||||
if conf.TLSConfig.CertFile != "" || conf.TLSConfig.KeyFile != "" {
|
||||
level.Warn(l).Log("msg", "Configured TLS client certificate is ignored when using pod service account")
|
||||
}
|
||||
if conf.BearerToken != "" {
|
||||
level.Warn(l).Log("msg", "Configured auth token is ignored when using pod service account")
|
||||
}
|
||||
if conf.BasicAuth != nil {
|
||||
level.Warn(l).Log("msg", "Configured basic authentication credentials are ignored when using pod service account")
|
||||
}
|
||||
} else {
|
||||
rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
kcfg = &rest.Config{
|
||||
Host: conf.APIServer.String(),
|
||||
TLSClientConfig: rest.TLSClientConfig{
|
||||
CAFile: conf.TLSConfig.CAFile,
|
||||
CertFile: conf.TLSConfig.CertFile,
|
||||
KeyFile: conf.TLSConfig.KeyFile,
|
||||
Insecure: conf.TLSConfig.InsecureSkipVerify,
|
||||
},
|
||||
}
|
||||
token := string(conf.BearerToken)
|
||||
if conf.BearerTokenFile != "" {
|
||||
bf, err := ioutil.ReadFile(conf.BearerTokenFile)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
token = string(bf)
|
||||
}
|
||||
kcfg.BearerToken = token
|
||||
|
||||
if conf.BasicAuth != nil {
|
||||
kcfg.Username = conf.BasicAuth.Username
|
||||
kcfg.Password = string(conf.BasicAuth.Password)
|
||||
Host: conf.APIServer.String(),
|
||||
Transport: rt,
|
||||
}
|
||||
}
|
||||
|
||||
kcfg.UserAgent = "prometheus/discovery"
|
||||
kcfg.UserAgent = "Prometheus/discovery"
|
||||
|
||||
c, err := kubernetes.NewForConfig(kcfg)
|
||||
if err != nil {
|
||||
|
|
|
@ -16,7 +16,6 @@ package kubernetes
|
|||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
@ -24,72 +23,21 @@ import (
|
|||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
k8stesting "k8s.io/client-go/testing"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
type watcherFactory struct {
|
||||
sync.RWMutex
|
||||
watchers map[schema.GroupVersionResource]*watch.FakeWatcher
|
||||
}
|
||||
|
||||
func (wf *watcherFactory) watchFor(gvr schema.GroupVersionResource) *watch.FakeWatcher {
|
||||
wf.Lock()
|
||||
defer wf.Unlock()
|
||||
|
||||
var fakewatch *watch.FakeWatcher
|
||||
fakewatch, ok := wf.watchers[gvr]
|
||||
if !ok {
|
||||
fakewatch = watch.NewFakeWithChanSize(128, true)
|
||||
wf.watchers[gvr] = fakewatch
|
||||
}
|
||||
return fakewatch
|
||||
}
|
||||
|
||||
func (wf *watcherFactory) Nodes() *watch.FakeWatcher {
|
||||
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "nodes"})
|
||||
}
|
||||
|
||||
func (wf *watcherFactory) Ingresses() *watch.FakeWatcher {
|
||||
return wf.watchFor(schema.GroupVersionResource{Group: "extensions", Version: "v1beta1", Resource: "ingresses"})
|
||||
}
|
||||
|
||||
func (wf *watcherFactory) Endpoints() *watch.FakeWatcher {
|
||||
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "endpoints"})
|
||||
}
|
||||
|
||||
func (wf *watcherFactory) Services() *watch.FakeWatcher {
|
||||
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "services"})
|
||||
}
|
||||
|
||||
func (wf *watcherFactory) Pods() *watch.FakeWatcher {
|
||||
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"})
|
||||
}
|
||||
|
||||
// makeDiscovery creates a kubernetes.Discovery instance for testing.
|
||||
func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface, *watcherFactory) {
|
||||
func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) {
|
||||
clientset := fake.NewSimpleClientset(objects...)
|
||||
// Current client-go we are using does not support push event on
|
||||
// Add/Update/Create, so we need to emit event manually.
|
||||
// See https://github.com/kubernetes/kubernetes/issues/54075.
|
||||
// TODO update client-go thChanSizeand related packages to kubernetes-1.10.0+
|
||||
wf := &watcherFactory{
|
||||
watchers: make(map[schema.GroupVersionResource]*watch.FakeWatcher),
|
||||
}
|
||||
clientset.PrependWatchReactor("*", func(action k8stesting.Action) (handled bool, ret watch.Interface, err error) {
|
||||
gvr := action.GetResource()
|
||||
return true, wf.watchFor(gvr), nil
|
||||
})
|
||||
|
||||
return &Discovery{
|
||||
client: clientset,
|
||||
logger: log.NewNopLogger(),
|
||||
role: role,
|
||||
namespaceDiscovery: &nsDiscovery,
|
||||
}, clientset, wf
|
||||
}, clientset
|
||||
}
|
||||
|
||||
type k8sDiscoveryTest struct {
|
||||
|
@ -106,6 +54,7 @@ type k8sDiscoveryTest struct {
|
|||
}
|
||||
|
||||
func (d k8sDiscoveryTest) Run(t *testing.T) {
|
||||
t.Helper()
|
||||
ch := make(chan []*targetgroup.Group)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
|
||||
defer cancel()
|
||||
|
@ -156,7 +105,7 @@ Loop:
|
|||
case <-time.After(timeout):
|
||||
// Because we use queue, an object that is created then
|
||||
// deleted or updated may be processed only once.
|
||||
// So possibliy we may skip events, timed out here.
|
||||
// So possibly we may skip events, timed out here.
|
||||
t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(allTgs), max)
|
||||
break Loop
|
||||
}
|
||||
|
@ -176,6 +125,7 @@ Loop:
|
|||
}
|
||||
|
||||
func requireTargetGroups(t *testing.T, expected, res map[string]*targetgroup.Group) {
|
||||
t.Helper()
|
||||
b1, err := json.Marshal(expected)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
|
|
@ -15,18 +15,19 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
"k8s.io/client-go/util/workqueue"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -127,7 +128,7 @@ func convertToNode(o interface{}) (*apiv1.Node, error) {
|
|||
return node, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Received unexpected object: %v", o)
|
||||
return nil, errors.Errorf("received unexpected object: %v", o)
|
||||
}
|
||||
|
||||
func nodeSource(n *apiv1.Node) string {
|
||||
|
@ -139,10 +140,12 @@ func nodeSourceFromName(name string) string {
|
|||
}
|
||||
|
||||
const (
|
||||
nodeNameLabel = metaLabelPrefix + "node_name"
|
||||
nodeLabelPrefix = metaLabelPrefix + "node_label_"
|
||||
nodeAnnotationPrefix = metaLabelPrefix + "node_annotation_"
|
||||
nodeAddressPrefix = metaLabelPrefix + "node_address_"
|
||||
nodeNameLabel = metaLabelPrefix + "node_name"
|
||||
nodeLabelPrefix = metaLabelPrefix + "node_label_"
|
||||
nodeLabelPresentPrefix = metaLabelPrefix + "node_labelpresent_"
|
||||
nodeAnnotationPrefix = metaLabelPrefix + "node_annotation_"
|
||||
nodeAnnotationPresentPrefix = metaLabelPrefix + "node_annotationpresent_"
|
||||
nodeAddressPrefix = metaLabelPrefix + "node_address_"
|
||||
)
|
||||
|
||||
func nodeLabels(n *apiv1.Node) model.LabelSet {
|
||||
|
@ -151,13 +154,15 @@ func nodeLabels(n *apiv1.Node) model.LabelSet {
|
|||
ls[nodeNameLabel] = lv(n.Name)
|
||||
|
||||
for k, v := range n.Labels {
|
||||
ln := strutil.SanitizeLabelName(nodeLabelPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(nodeLabelPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
|
||||
for k, v := range n.Annotations {
|
||||
ln := strutil.SanitizeLabelName(nodeAnnotationPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(nodeAnnotationPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(nodeAnnotationPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
return ls
|
||||
}
|
||||
|
@ -214,5 +219,5 @@ func nodeAddress(node *apiv1.Node) (string, map[apiv1.NodeAddressType][]string,
|
|||
if addresses, ok := m[apiv1.NodeHostName]; ok {
|
||||
return addresses[0], m, nil
|
||||
}
|
||||
return "", m, fmt.Errorf("host address unknown")
|
||||
return "", m, errors.New("host address unknown")
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ import (
|
|||
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
|
@ -51,7 +51,7 @@ func makeEnumeratedNode(i int) *v1.Node {
|
|||
}
|
||||
|
||||
func TestNodeDiscoveryBeforeStart(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -59,11 +59,10 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
|
|||
obj := makeNode(
|
||||
"test",
|
||||
"1.2.3.4",
|
||||
map[string]string{"testlabel": "testvalue"},
|
||||
map[string]string{"testannotation": "testannotationvalue"},
|
||||
map[string]string{"test-label": "testvalue"},
|
||||
map[string]string{"test-annotation": "testannotationvalue"},
|
||||
)
|
||||
c.CoreV1().Nodes().Create(obj)
|
||||
w.Nodes().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -76,9 +75,11 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
|
|||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_node_name": "test",
|
||||
"__meta_kubernetes_node_label_testlabel": "testvalue",
|
||||
"__meta_kubernetes_node_annotation_testannotation": "testannotationvalue",
|
||||
"__meta_kubernetes_node_name": "test",
|
||||
"__meta_kubernetes_node_label_test_label": "testvalue",
|
||||
"__meta_kubernetes_node_labelpresent_test_label": "true",
|
||||
"__meta_kubernetes_node_annotation_test_annotation": "testannotationvalue",
|
||||
"__meta_kubernetes_node_annotationpresent_test_annotation": "true",
|
||||
},
|
||||
Source: "node/test",
|
||||
},
|
||||
|
@ -87,14 +88,13 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestNodeDiscoveryAdd(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeEnumeratedNode(1)
|
||||
c.CoreV1().Nodes().Create(obj)
|
||||
w.Nodes().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -117,13 +117,12 @@ func TestNodeDiscoveryAdd(t *testing.T) {
|
|||
|
||||
func TestNodeDiscoveryDelete(t *testing.T) {
|
||||
obj := makeEnumeratedNode(0)
|
||||
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{}, obj)
|
||||
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}, obj)
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
c.CoreV1().Nodes().Delete(obj.Name, &metav1.DeleteOptions{})
|
||||
w.Nodes().Delete(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -135,14 +134,13 @@ func TestNodeDiscoveryDelete(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestNodeDiscoveryUpdate(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj1 := makeEnumeratedNode(0)
|
||||
c.CoreV1().Nodes().Create(obj1)
|
||||
w.Nodes().Add(obj1)
|
||||
obj2 := makeNode(
|
||||
"test0",
|
||||
"1.2.3.4",
|
||||
|
@ -150,7 +148,6 @@ func TestNodeDiscoveryUpdate(t *testing.T) {
|
|||
map[string]string{},
|
||||
)
|
||||
c.CoreV1().Nodes().Update(obj2)
|
||||
w.Nodes().Modify(obj2)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -163,8 +160,9 @@ func TestNodeDiscoveryUpdate(t *testing.T) {
|
|||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_node_label_Unschedulable": "true",
|
||||
"__meta_kubernetes_node_name": "test0",
|
||||
"__meta_kubernetes_node_label_Unschedulable": "true",
|
||||
"__meta_kubernetes_node_labelpresent_Unschedulable": "true",
|
||||
"__meta_kubernetes_node_name": "test0",
|
||||
},
|
||||
Source: "node/test0",
|
||||
},
|
||||
|
|
|
@ -15,13 +15,13 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
@ -131,7 +131,7 @@ func convertToPod(o interface{}) (*apiv1.Pod, error) {
|
|||
return pod, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Received unexpected object: %v", o)
|
||||
return nil, errors.Errorf("received unexpected object: %v", o)
|
||||
}
|
||||
|
||||
const (
|
||||
|
@ -141,10 +141,13 @@ const (
|
|||
podContainerPortNameLabel = metaLabelPrefix + "pod_container_port_name"
|
||||
podContainerPortNumberLabel = metaLabelPrefix + "pod_container_port_number"
|
||||
podContainerPortProtocolLabel = metaLabelPrefix + "pod_container_port_protocol"
|
||||
podContainerIsInit = metaLabelPrefix + "pod_container_init"
|
||||
podReadyLabel = metaLabelPrefix + "pod_ready"
|
||||
podPhaseLabel = metaLabelPrefix + "pod_phase"
|
||||
podLabelPrefix = metaLabelPrefix + "pod_label_"
|
||||
podLabelPresentPrefix = metaLabelPrefix + "pod_labelpresent_"
|
||||
podAnnotationPrefix = metaLabelPrefix + "pod_annotation_"
|
||||
podAnnotationPresentPrefix = metaLabelPrefix + "pod_annotationpresent_"
|
||||
podNodeNameLabel = metaLabelPrefix + "pod_node_name"
|
||||
podHostIPLabel = metaLabelPrefix + "pod_host_ip"
|
||||
podUID = metaLabelPrefix + "pod_uid"
|
||||
|
@ -185,13 +188,15 @@ func podLabels(pod *apiv1.Pod) model.LabelSet {
|
|||
}
|
||||
|
||||
for k, v := range pod.Labels {
|
||||
ln := strutil.SanitizeLabelName(podLabelPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(podLabelPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(podLabelPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
|
||||
for k, v := range pod.Annotations {
|
||||
ln := strutil.SanitizeLabelName(podAnnotationPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(podAnnotationPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(podAnnotationPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
|
||||
return ls
|
||||
|
@ -209,7 +214,10 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
|
|||
tg.Labels = podLabels(pod)
|
||||
tg.Labels[namespaceLabel] = lv(pod.Namespace)
|
||||
|
||||
for _, c := range pod.Spec.Containers {
|
||||
containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
|
||||
for i, c := range containers {
|
||||
isInit := i >= len(pod.Spec.Containers)
|
||||
|
||||
// If no ports are defined for the container, create an anonymous
|
||||
// target per container.
|
||||
if len(c.Ports) == 0 {
|
||||
|
@ -218,6 +226,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
|
|||
tg.Targets = append(tg.Targets, model.LabelSet{
|
||||
model.AddressLabel: lv(pod.Status.PodIP),
|
||||
podContainerNameLabel: lv(c.Name),
|
||||
podContainerIsInit: lv(strconv.FormatBool(isInit)),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
@ -232,6 +241,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
|
|||
podContainerPortNumberLabel: lv(ports),
|
||||
podContainerPortNameLabel: lv(port.Name),
|
||||
podContainerPortProtocolLabel: lv(string(port.Protocol)),
|
||||
podContainerIsInit: lv(strconv.FormatBool(isInit)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ import (
|
|||
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
)
|
||||
|
@ -33,8 +33,8 @@ func makeMultiPortPods() *v1.Pod {
|
|||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testpod",
|
||||
Namespace: "default",
|
||||
Labels: map[string]string{"testlabel": "testvalue"},
|
||||
Annotations: map[string]string{"testannotation": "testannotationvalue"},
|
||||
Labels: map[string]string{"test/label": "testvalue"},
|
||||
Annotations: map[string]string{"test/annotation": "testannotationvalue"},
|
||||
UID: types.UID("abc123"),
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
|
@ -117,6 +117,48 @@ func makePods() *v1.Pod {
|
|||
}
|
||||
}
|
||||
|
||||
func makeInitContainerPods() *v1.Pod {
|
||||
return &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testpod",
|
||||
Namespace: "default",
|
||||
UID: types.UID("abc123"),
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
NodeName: "testnode",
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "testcontainer",
|
||||
Ports: []v1.ContainerPort{
|
||||
{
|
||||
Name: "testport",
|
||||
Protocol: v1.ProtocolTCP,
|
||||
ContainerPort: int32(9000),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
InitContainers: []v1.Container{
|
||||
{
|
||||
Name: "initcontainer",
|
||||
},
|
||||
},
|
||||
},
|
||||
Status: v1.PodStatus{
|
||||
PodIP: "1.2.3.4",
|
||||
HostIP: "2.3.4.5",
|
||||
Phase: "Pending",
|
||||
Conditions: []v1.PodCondition{
|
||||
{
|
||||
Type: v1.PodReady,
|
||||
Status: v1.ConditionFalse,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
|
||||
key := fmt.Sprintf("pod/%s/testpod", ns)
|
||||
return map[string]*targetgroup.Group{
|
||||
|
@ -128,6 +170,7 @@ func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
|
|||
"__meta_kubernetes_pod_container_port_name": "testport",
|
||||
"__meta_kubernetes_pod_container_port_number": "9000",
|
||||
"__meta_kubernetes_pod_container_port_protocol": "TCP",
|
||||
"__meta_kubernetes_pod_container_init": "false",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
|
@ -146,14 +189,13 @@ func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
|
|||
}
|
||||
|
||||
func TestPodDiscoveryBeforeRun(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
beforeRun: func() {
|
||||
obj := makeMultiPortPods()
|
||||
c.CoreV1().Pods(obj.Namespace).Create(obj)
|
||||
w.Pods().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -165,6 +207,7 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
|
|||
"__meta_kubernetes_pod_container_port_name": "testport0",
|
||||
"__meta_kubernetes_pod_container_port_number": "9000",
|
||||
"__meta_kubernetes_pod_container_port_protocol": "TCP",
|
||||
"__meta_kubernetes_pod_container_init": "false",
|
||||
},
|
||||
{
|
||||
"__address__": "1.2.3.4:9001",
|
||||
|
@ -172,25 +215,29 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
|
|||
"__meta_kubernetes_pod_container_port_name": "testport1",
|
||||
"__meta_kubernetes_pod_container_port_number": "9001",
|
||||
"__meta_kubernetes_pod_container_port_protocol": "UDP",
|
||||
"__meta_kubernetes_pod_container_init": "false",
|
||||
},
|
||||
{
|
||||
"__address__": "1.2.3.4",
|
||||
"__meta_kubernetes_pod_container_name": "testcontainer1",
|
||||
"__meta_kubernetes_pod_container_init": "false",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_pod_name": "testpod",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_pod_label_testlabel": "testvalue",
|
||||
"__meta_kubernetes_pod_annotation_testannotation": "testannotationvalue",
|
||||
"__meta_kubernetes_pod_node_name": "testnode",
|
||||
"__meta_kubernetes_pod_ip": "1.2.3.4",
|
||||
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
|
||||
"__meta_kubernetes_pod_ready": "true",
|
||||
"__meta_kubernetes_pod_phase": "Running",
|
||||
"__meta_kubernetes_pod_uid": "abc123",
|
||||
"__meta_kubernetes_pod_controller_kind": "testcontrollerkind",
|
||||
"__meta_kubernetes_pod_controller_name": "testcontrollername",
|
||||
"__meta_kubernetes_pod_name": "testpod",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_pod_label_test_label": "testvalue",
|
||||
"__meta_kubernetes_pod_labelpresent_test_label": "true",
|
||||
"__meta_kubernetes_pod_annotation_test_annotation": "testannotationvalue",
|
||||
"__meta_kubernetes_pod_annotationpresent_test_annotation": "true",
|
||||
"__meta_kubernetes_pod_node_name": "testnode",
|
||||
"__meta_kubernetes_pod_ip": "1.2.3.4",
|
||||
"__meta_kubernetes_pod_host_ip": "2.3.4.5",
|
||||
"__meta_kubernetes_pod_ready": "true",
|
||||
"__meta_kubernetes_pod_phase": "Running",
|
||||
"__meta_kubernetes_pod_uid": "abc123",
|
||||
"__meta_kubernetes_pod_controller_kind": "testcontrollerkind",
|
||||
"__meta_kubernetes_pod_controller_name": "testcontrollername",
|
||||
},
|
||||
Source: "pod/default/testpod",
|
||||
},
|
||||
|
@ -198,15 +245,39 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
|
|||
}.Run(t)
|
||||
}
|
||||
|
||||
func TestPodDiscoveryInitContainer(t *testing.T) {
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
|
||||
ns := "default"
|
||||
key := fmt.Sprintf("pod/%s/testpod", ns)
|
||||
expected := expectedPodTargetGroups(ns)
|
||||
expected[key].Targets = append(expected[key].Targets, model.LabelSet{
|
||||
"__address__": "1.2.3.4",
|
||||
"__meta_kubernetes_pod_container_name": "initcontainer",
|
||||
"__meta_kubernetes_pod_container_init": "true",
|
||||
})
|
||||
expected[key].Labels["__meta_kubernetes_pod_phase"] = "Pending"
|
||||
expected[key].Labels["__meta_kubernetes_pod_ready"] = "false"
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
beforeRun: func() {
|
||||
obj := makeInitContainerPods()
|
||||
c.CoreV1().Pods(obj.Namespace).Create(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: expected,
|
||||
}.Run(t)
|
||||
}
|
||||
|
||||
func TestPodDiscoveryAdd(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makePods()
|
||||
c.CoreV1().Pods(obj.Namespace).Create(obj)
|
||||
w.Pods().Add(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedRes: expectedPodTargetGroups("default"),
|
||||
|
@ -215,14 +286,13 @@ func TestPodDiscoveryAdd(t *testing.T) {
|
|||
|
||||
func TestPodDiscoveryDelete(t *testing.T) {
|
||||
obj := makePods()
|
||||
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makePods()
|
||||
c.CoreV1().Pods(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{})
|
||||
w.Pods().Delete(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -260,14 +330,13 @@ func TestPodDiscoveryUpdate(t *testing.T) {
|
|||
HostIP: "2.3.4.5",
|
||||
},
|
||||
}
|
||||
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makePods()
|
||||
c.CoreV1().Pods(obj.Namespace).Create(obj)
|
||||
w.Pods().Modify(obj)
|
||||
c.CoreV1().Pods(obj.Namespace).Update(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: expectedPodTargetGroups("default"),
|
||||
|
@ -275,7 +344,7 @@ func TestPodDiscoveryUpdate(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
|
||||
initialPod := makePods()
|
||||
|
||||
updatedPod := makePods()
|
||||
|
@ -285,11 +354,9 @@ func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
|
|||
discovery: n,
|
||||
beforeRun: func() {
|
||||
c.CoreV1().Pods(initialPod.Namespace).Create(initialPod)
|
||||
w.Pods().Add(initialPod)
|
||||
},
|
||||
afterStart: func() {
|
||||
c.CoreV1().Pods(updatedPod.Namespace).Create(updatedPod)
|
||||
w.Pods().Modify(updatedPod)
|
||||
c.CoreV1().Pods(updatedPod.Namespace).Update(updatedPod)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -301,7 +368,7 @@ func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPodDiscoveryNamespaces(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
|
||||
n, c := makeDiscovery(RolePod, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
|
||||
|
||||
expected := expectedPodTargetGroups("ns1")
|
||||
for k, v := range expectedPodTargetGroups("ns2") {
|
||||
|
@ -314,7 +381,6 @@ func TestPodDiscoveryNamespaces(t *testing.T) {
|
|||
pod := makePods()
|
||||
pod.Namespace = ns
|
||||
c.CoreV1().Pods(pod.Namespace).Create(pod)
|
||||
w.Pods().Add(pod)
|
||||
}
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
|
|
|
@ -15,12 +15,12 @@ package kubernetes
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"strconv"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
apiv1 "k8s.io/api/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
|
@ -43,7 +43,7 @@ func NewService(l log.Logger, inf cache.SharedInformer) *Service {
|
|||
if l == nil {
|
||||
l = log.NewNopLogger()
|
||||
}
|
||||
s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")}
|
||||
s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")}
|
||||
s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: func(o interface{}) {
|
||||
eventCount.WithLabelValues("service", "add").Inc()
|
||||
|
@ -123,7 +123,7 @@ func convertToService(o interface{}) (*apiv1.Service, error) {
|
|||
if ok {
|
||||
return service, nil
|
||||
}
|
||||
return nil, fmt.Errorf("Received unexpected object: %v", o)
|
||||
return nil, errors.Errorf("received unexpected object: %v", o)
|
||||
}
|
||||
|
||||
func serviceSource(s *apiv1.Service) string {
|
||||
|
@ -135,11 +135,15 @@ func serviceSourceFromNamespaceAndName(namespace, name string) string {
|
|||
}
|
||||
|
||||
const (
|
||||
serviceNameLabel = metaLabelPrefix + "service_name"
|
||||
serviceLabelPrefix = metaLabelPrefix + "service_label_"
|
||||
serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_"
|
||||
servicePortNameLabel = metaLabelPrefix + "service_port_name"
|
||||
servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol"
|
||||
serviceNameLabel = metaLabelPrefix + "service_name"
|
||||
serviceLabelPrefix = metaLabelPrefix + "service_label_"
|
||||
serviceLabelPresentPrefix = metaLabelPrefix + "service_labelpresent_"
|
||||
serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_"
|
||||
serviceAnnotationPresentPrefix = metaLabelPrefix + "service_annotationpresent_"
|
||||
servicePortNameLabel = metaLabelPrefix + "service_port_name"
|
||||
servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol"
|
||||
serviceClusterIPLabel = metaLabelPrefix + "service_cluster_ip"
|
||||
serviceExternalNameLabel = metaLabelPrefix + "service_external_name"
|
||||
)
|
||||
|
||||
func serviceLabels(svc *apiv1.Service) model.LabelSet {
|
||||
|
@ -149,13 +153,15 @@ func serviceLabels(svc *apiv1.Service) model.LabelSet {
|
|||
ls[namespaceLabel] = lv(svc.Namespace)
|
||||
|
||||
for k, v := range svc.Labels {
|
||||
ln := strutil.SanitizeLabelName(serviceLabelPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(serviceLabelPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(serviceLabelPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
|
||||
for k, v := range svc.Annotations {
|
||||
ln := strutil.SanitizeLabelName(serviceAnnotationPrefix + k)
|
||||
ls[model.LabelName(ln)] = lv(v)
|
||||
ln := strutil.SanitizeLabelName(k)
|
||||
ls[model.LabelName(serviceAnnotationPrefix+ln)] = lv(v)
|
||||
ls[model.LabelName(serviceAnnotationPresentPrefix+ln)] = presentValue
|
||||
}
|
||||
return ls
|
||||
}
|
||||
|
@ -169,11 +175,19 @@ func (s *Service) buildService(svc *apiv1.Service) *targetgroup.Group {
|
|||
for _, port := range svc.Spec.Ports {
|
||||
addr := net.JoinHostPort(svc.Name+"."+svc.Namespace+".svc", strconv.FormatInt(int64(port.Port), 10))
|
||||
|
||||
tg.Targets = append(tg.Targets, model.LabelSet{
|
||||
labelSet := model.LabelSet{
|
||||
model.AddressLabel: lv(addr),
|
||||
servicePortNameLabel: lv(port.Name),
|
||||
servicePortProtocolLabel: lv(string(port.Protocol)),
|
||||
})
|
||||
}
|
||||
|
||||
if svc.Spec.Type == apiv1.ServiceTypeExternalName {
|
||||
labelSet[serviceExternalNameLabel] = lv(svc.Spec.ExternalName)
|
||||
} else {
|
||||
labelSet[serviceClusterIPLabel] = lv(svc.Spec.ClusterIP)
|
||||
}
|
||||
|
||||
tg.Targets = append(tg.Targets, labelSet)
|
||||
}
|
||||
|
||||
return tg
|
||||
|
|
|
@ -19,7 +19,7 @@ import (
|
|||
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
|
@ -28,8 +28,8 @@ func makeMultiPortService() *v1.Service {
|
|||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testservice",
|
||||
Namespace: "default",
|
||||
Labels: map[string]string{"testlabel": "testvalue"},
|
||||
Annotations: map[string]string{"testannotation": "testannotationvalue"},
|
||||
Labels: map[string]string{"test-label": "testvalue"},
|
||||
Annotations: map[string]string{"test-annotation": "testannotationvalue"},
|
||||
},
|
||||
Spec: v1.ServiceSpec{
|
||||
Ports: []v1.ServicePort{
|
||||
|
@ -44,6 +44,8 @@ func makeMultiPortService() *v1.Service {
|
|||
Port: int32(30901),
|
||||
},
|
||||
},
|
||||
Type: v1.ServiceTypeClusterIP,
|
||||
ClusterIP: "10.0.0.1",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -62,6 +64,8 @@ func makeSuffixedService(suffix string) *v1.Service {
|
|||
Port: int32(30900),
|
||||
},
|
||||
},
|
||||
Type: v1.ServiceTypeClusterIP,
|
||||
ClusterIP: "10.0.0.1",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -70,24 +74,46 @@ func makeService() *v1.Service {
|
|||
return makeSuffixedService("")
|
||||
}
|
||||
|
||||
func makeExternalService() *v1.Service {
|
||||
return &v1.Service{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "testservice-external",
|
||||
Namespace: "default",
|
||||
},
|
||||
Spec: v1.ServiceSpec{
|
||||
Ports: []v1.ServicePort{
|
||||
{
|
||||
Name: "testport",
|
||||
Protocol: v1.ProtocolTCP,
|
||||
Port: int32(31900),
|
||||
},
|
||||
},
|
||||
Type: v1.ServiceTypeExternalName,
|
||||
ExternalName: "FooExternalName",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestServiceDiscoveryAdd(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{})
|
||||
n, c := makeDiscovery(RoleService, NamespaceDiscovery{})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeService()
|
||||
c.CoreV1().Services(obj.Namespace).Create(obj)
|
||||
w.Services().Add(obj)
|
||||
obj = makeExternalService()
|
||||
c.CoreV1().Services(obj.Namespace).Create(obj)
|
||||
},
|
||||
expectedMaxItems: 1,
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
"svc/default/testservice": {
|
||||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__meta_kubernetes_service_port_protocol": "TCP",
|
||||
"__address__": "testservice.default.svc:30900",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
"__address__": "testservice.default.svc:30900",
|
||||
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
|
@ -96,19 +122,33 @@ func TestServiceDiscoveryAdd(t *testing.T) {
|
|||
},
|
||||
Source: "svc/default/testservice",
|
||||
},
|
||||
"svc/default/testservice-external": {
|
||||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__meta_kubernetes_service_port_protocol": "TCP",
|
||||
"__address__": "testservice-external.default.svc:31900",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
"__meta_kubernetes_service_external_name": "FooExternalName",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_service_name": "testservice-external",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
},
|
||||
Source: "svc/default/testservice-external",
|
||||
},
|
||||
},
|
||||
}.Run(t)
|
||||
}
|
||||
|
||||
func TestServiceDiscoveryDelete(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
|
||||
n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeService()
|
||||
c.CoreV1().Services(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{})
|
||||
w.Services().Delete(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -120,14 +160,13 @@ func TestServiceDiscoveryDelete(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestServiceDiscoveryUpdate(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
|
||||
n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
afterStart: func() {
|
||||
obj := makeMultiPortService()
|
||||
c.CoreV1().Services(obj.Namespace).Update(obj)
|
||||
w.Services().Modify(obj)
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
expectedRes: map[string]*targetgroup.Group{
|
||||
|
@ -135,20 +174,24 @@ func TestServiceDiscoveryUpdate(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__meta_kubernetes_service_port_protocol": "TCP",
|
||||
"__address__": "testservice.default.svc:30900",
|
||||
"__meta_kubernetes_service_port_name": "testport0",
|
||||
"__address__": "testservice.default.svc:30900",
|
||||
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
|
||||
"__meta_kubernetes_service_port_name": "testport0",
|
||||
},
|
||||
{
|
||||
"__meta_kubernetes_service_port_protocol": "UDP",
|
||||
"__address__": "testservice.default.svc:30901",
|
||||
"__meta_kubernetes_service_port_name": "testport1",
|
||||
"__address__": "testservice.default.svc:30901",
|
||||
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
|
||||
"__meta_kubernetes_service_port_name": "testport1",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
"__meta_kubernetes_service_name": "testservice",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_service_label_testlabel": "testvalue",
|
||||
"__meta_kubernetes_service_annotation_testannotation": "testannotationvalue",
|
||||
"__meta_kubernetes_service_name": "testservice",
|
||||
"__meta_kubernetes_namespace": "default",
|
||||
"__meta_kubernetes_service_label_test_label": "testvalue",
|
||||
"__meta_kubernetes_service_labelpresent_test_label": "true",
|
||||
"__meta_kubernetes_service_annotation_test_annotation": "testannotationvalue",
|
||||
"__meta_kubernetes_service_annotationpresent_test_annotation": "true",
|
||||
},
|
||||
Source: "svc/default/testservice",
|
||||
},
|
||||
|
@ -157,7 +200,7 @@ func TestServiceDiscoveryUpdate(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestServiceDiscoveryNamespaces(t *testing.T) {
|
||||
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
|
||||
n, c := makeDiscovery(RoleService, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
|
||||
|
||||
k8sDiscoveryTest{
|
||||
discovery: n,
|
||||
|
@ -166,7 +209,6 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
|
|||
obj := makeService()
|
||||
obj.Namespace = ns
|
||||
c.CoreV1().Services(obj.Namespace).Create(obj)
|
||||
w.Services().Add(obj)
|
||||
}
|
||||
},
|
||||
expectedMaxItems: 2,
|
||||
|
@ -175,8 +217,9 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__meta_kubernetes_service_port_protocol": "TCP",
|
||||
"__address__": "testservice.ns1.svc:30900",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
"__address__": "testservice.ns1.svc:30900",
|
||||
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
|
@ -189,8 +232,9 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
|
|||
Targets: []model.LabelSet{
|
||||
{
|
||||
"__meta_kubernetes_service_port_protocol": "TCP",
|
||||
"__address__": "testservice.ns2.svc:30900",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
"__address__": "testservice.ns2.svc:30900",
|
||||
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
|
||||
"__meta_kubernetes_service_port_name": "testport",
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
|
|
|
@ -41,11 +41,12 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
failedConfigs = prometheus.NewCounter(
|
||||
failedConfigs = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_configs_failed_total",
|
||||
Help: "Total number of service discovery configurations that failed to load.",
|
||||
},
|
||||
[]string{"name"},
|
||||
)
|
||||
discoveredTargets = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
|
@ -54,23 +55,26 @@ var (
|
|||
},
|
||||
[]string{"name", "config"},
|
||||
)
|
||||
receivedUpdates = prometheus.NewCounter(
|
||||
receivedUpdates = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_received_updates_total",
|
||||
Help: "Total number of update events received from the SD providers.",
|
||||
},
|
||||
[]string{"name"},
|
||||
)
|
||||
delayedUpdates = prometheus.NewCounter(
|
||||
delayedUpdates = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_updates_delayed_total",
|
||||
Help: "Total number of update events that couldn't be sent immediately.",
|
||||
},
|
||||
[]string{"name"},
|
||||
)
|
||||
sentUpdates = prometheus.NewCounter(
|
||||
sentUpdates = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_updates_total",
|
||||
Help: "Total number of update events sent to the SD consumers.",
|
||||
},
|
||||
[]string{"name"},
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -226,7 +230,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ
|
|||
case <-ctx.Done():
|
||||
return
|
||||
case tgs, ok := <-updates:
|
||||
receivedUpdates.Inc()
|
||||
receivedUpdates.WithLabelValues(m.name).Inc()
|
||||
if !ok {
|
||||
level.Debug(m.logger).Log("msg", "discoverer channel closed", "provider", p.name)
|
||||
return
|
||||
|
@ -255,11 +259,11 @@ func (m *Manager) sender() {
|
|||
case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
|
||||
select {
|
||||
case <-m.triggerSend:
|
||||
sentUpdates.Inc()
|
||||
sentUpdates.WithLabelValues(m.name).Inc()
|
||||
select {
|
||||
case m.syncCh <- m.allGroups():
|
||||
default:
|
||||
delayedUpdates.Inc()
|
||||
delayedUpdates.WithLabelValues(m.name).Inc()
|
||||
level.Debug(m.logger).Log("msg", "discovery receiver's channel was full so will retry the next cycle")
|
||||
select {
|
||||
case m.triggerSend <- struct{}{}:
|
||||
|
@ -314,11 +318,13 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
|
|||
}
|
||||
|
||||
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) {
|
||||
var added bool
|
||||
add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) {
|
||||
t := reflect.TypeOf(cfg).String()
|
||||
for _, p := range m.providers {
|
||||
if reflect.DeepEqual(cfg, p.config) {
|
||||
p.subs = append(p.subs, setName)
|
||||
added = true
|
||||
return
|
||||
}
|
||||
}
|
||||
|
@ -326,7 +332,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
|
|||
d, err := newDiscoverer()
|
||||
if err != nil {
|
||||
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t)
|
||||
failedConfigs.Inc()
|
||||
failedConfigs.WithLabelValues(m.name).Inc()
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -337,6 +343,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
|
|||
subs: []string{setName},
|
||||
}
|
||||
m.providers = append(m.providers, &provider)
|
||||
added = true
|
||||
}
|
||||
|
||||
for _, c := range cfg.DNSSDConfigs {
|
||||
|
@ -401,7 +408,17 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
|
|||
}
|
||||
if len(cfg.StaticConfigs) > 0 {
|
||||
add(setName, func() (Discoverer, error) {
|
||||
return &StaticProvider{cfg.StaticConfigs}, nil
|
||||
return &StaticProvider{TargetGroups: cfg.StaticConfigs}, nil
|
||||
})
|
||||
}
|
||||
if !added {
|
||||
// Add an empty target group to force the refresh of the corresponding
|
||||
// scrape pool and to notify the receiver that this target set has no
|
||||
// current targets.
|
||||
// It can happen because the combined set of SD configurations is empty
|
||||
// or because we fail to instantiate all the SD configurations.
|
||||
add(setName, func() (Discoverer, error) {
|
||||
return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ func TestTargetUpdatesOrder(t *testing.T) {
|
|||
expectedTargets: nil,
|
||||
},
|
||||
{
|
||||
title: "Multips TPs no updates",
|
||||
title: "Multiple TPs no updates",
|
||||
updates: map[string][]update{
|
||||
"tp1": {},
|
||||
"tp2": {},
|
||||
|
@ -719,6 +719,7 @@ func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group, msg fun
|
|||
}
|
||||
|
||||
func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) {
|
||||
t.Helper()
|
||||
if _, ok := tSets[poolKey]; !ok {
|
||||
t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets)
|
||||
return
|
||||
|
@ -741,7 +742,7 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou
|
|||
if !present {
|
||||
msg = "not"
|
||||
}
|
||||
t.Fatalf("'%s' should %s be present in Targets labels: %v", label, msg, mergedTargets)
|
||||
t.Fatalf("%q should %s be present in Targets labels: %q", label, msg, mergedTargets)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -781,7 +782,7 @@ scrape_configs:
|
|||
- targets: ["foo:9090"]
|
||||
`
|
||||
if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil {
|
||||
t.Fatalf("Unable to load YAML config sOne: %s", err)
|
||||
t.Fatalf("Unable to load YAML config sTwo: %s", err)
|
||||
}
|
||||
c = make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||
for _, v := range cfg.ScrapeConfigs {
|
||||
|
@ -794,6 +795,67 @@ scrape_configs:
|
|||
verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"bar:9090\"}", false)
|
||||
}
|
||||
|
||||
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
|
||||
// removing all targets from the static_configs sends an update with empty targetGroups.
|
||||
// This is required to signal the receiver that this target set has no current targets.
|
||||
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
|
||||
cfg := &config.Config{}
|
||||
|
||||
sOne := `
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ["foo:9090"]
|
||||
`
|
||||
if err := yaml.UnmarshalStrict([]byte(sOne), cfg); err != nil {
|
||||
t.Fatalf("Unable to load YAML config sOne: %s", err)
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
discoveryManager := NewManager(ctx, log.NewNopLogger())
|
||||
discoveryManager.updatert = 100 * time.Millisecond
|
||||
go discoveryManager.Run()
|
||||
|
||||
c := make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||
for _, v := range cfg.ScrapeConfigs {
|
||||
c[v.JobName] = v.ServiceDiscoveryConfig
|
||||
}
|
||||
discoveryManager.ApplyConfig(c)
|
||||
|
||||
<-discoveryManager.SyncCh()
|
||||
verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true)
|
||||
|
||||
sTwo := `
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
`
|
||||
if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil {
|
||||
t.Fatalf("Unable to load YAML config sTwo: %s", err)
|
||||
}
|
||||
c = make(map[string]sd_config.ServiceDiscoveryConfig)
|
||||
for _, v := range cfg.ScrapeConfigs {
|
||||
c[v.JobName] = v.ServiceDiscoveryConfig
|
||||
}
|
||||
discoveryManager.ApplyConfig(c)
|
||||
|
||||
<-discoveryManager.SyncCh()
|
||||
|
||||
pkey := poolKey{setName: "prometheus", provider: "string/0"}
|
||||
targetGroups, ok := discoveryManager.targets[pkey]
|
||||
if !ok {
|
||||
t.Fatalf("'%v' should be present in target groups", pkey)
|
||||
}
|
||||
group, ok := targetGroups[""]
|
||||
if !ok {
|
||||
t.Fatalf("missing '' key in target groups %v", targetGroups)
|
||||
}
|
||||
|
||||
if len(group.Targets) != 0 {
|
||||
t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets))
|
||||
}
|
||||
}
|
||||
|
||||
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
|
||||
tmpFile, err := ioutil.TempFile("", "sd")
|
||||
if err != nil {
|
||||
|
@ -926,7 +988,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
|
|||
expected: []expect{
|
||||
{
|
||||
tgs: map[string][]*targetgroup.Group{
|
||||
"once1": []*targetgroup.Group{
|
||||
"once1": {
|
||||
{
|
||||
Source: "tg1",
|
||||
Targets: []model.LabelSet{{"__instance__": "1"}},
|
||||
|
@ -936,13 +998,13 @@ func TestCoordinationWithReceiver(t *testing.T) {
|
|||
},
|
||||
{
|
||||
tgs: map[string][]*targetgroup.Group{
|
||||
"once1": []*targetgroup.Group{
|
||||
"once1": {
|
||||
{
|
||||
Source: "tg1",
|
||||
Targets: []model.LabelSet{{"__instance__": "1"}},
|
||||
},
|
||||
},
|
||||
"mock1": []*targetgroup.Group{
|
||||
"mock1": {
|
||||
{
|
||||
Source: "tg2",
|
||||
Targets: []model.LabelSet{{"__instance__": "2"}},
|
||||
|
@ -979,7 +1041,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
|
|||
{
|
||||
delay: 2 * updateDelay,
|
||||
tgs: map[string][]*targetgroup.Group{
|
||||
"mock1": []*targetgroup.Group{
|
||||
"mock1": {
|
||||
{
|
||||
Source: "tg1",
|
||||
Targets: []model.LabelSet{{"__instance__": "1"}},
|
||||
|
@ -990,7 +1052,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
|
|||
{
|
||||
delay: 4 * updateDelay,
|
||||
tgs: map[string][]*targetgroup.Group{
|
||||
"mock1": []*targetgroup.Group{
|
||||
"mock1": {
|
||||
{
|
||||
Source: "tg1",
|
||||
Targets: []model.LabelSet{{"__instance__": "1"}},
|
||||
|
|
|
@ -17,6 +17,7 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"net"
|
||||
|
@ -26,10 +27,11 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/pkg/errors"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
@ -54,29 +56,12 @@ const (
|
|||
portMappingLabelPrefix = metaLabelPrefix + "port_mapping_label_"
|
||||
// portDefinitionLabelPrefix is the prefix for the application portDefinitions labels.
|
||||
portDefinitionLabelPrefix = metaLabelPrefix + "port_definition_label_"
|
||||
|
||||
// Constants for instrumentation.
|
||||
namespace = "prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
refreshFailuresCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Name: "sd_marathon_refresh_failures_total",
|
||||
Help: "The number of Marathon-SD refresh failures.",
|
||||
})
|
||||
refreshDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Namespace: namespace,
|
||||
Name: "sd_marathon_refresh_duration_seconds",
|
||||
Help: "The duration of a Marathon-SD refresh in seconds.",
|
||||
})
|
||||
// DefaultSDConfig is the default Marathon SD configuration.
|
||||
DefaultSDConfig = SDConfig{
|
||||
RefreshInterval: model.Duration(30 * time.Second),
|
||||
}
|
||||
)
|
||||
// DefaultSDConfig is the default Marathon SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
RefreshInterval: model.Duration(30 * time.Second),
|
||||
}
|
||||
|
||||
// SDConfig is the configuration for services running on Marathon.
|
||||
type SDConfig struct {
|
||||
|
@ -96,43 +81,33 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if len(c.Servers) == 0 {
|
||||
return fmt.Errorf("marathon_sd: must contain at least one Marathon server")
|
||||
return errors.New("marathon_sd: must contain at least one Marathon server")
|
||||
}
|
||||
if len(c.AuthToken) > 0 && len(c.AuthTokenFile) > 0 {
|
||||
return fmt.Errorf("marathon_sd: at most one of auth_token & auth_token_file must be configured")
|
||||
return errors.New("marathon_sd: at most one of auth_token & auth_token_file must be configured")
|
||||
}
|
||||
if c.HTTPClientConfig.BasicAuth != nil && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) {
|
||||
return fmt.Errorf("marathon_sd: at most one of basic_auth, auth_token & auth_token_file must be configured")
|
||||
return errors.New("marathon_sd: at most one of basic_auth, auth_token & auth_token_file must be configured")
|
||||
}
|
||||
if (len(c.HTTPClientConfig.BearerToken) > 0 || len(c.HTTPClientConfig.BearerTokenFile) > 0) && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) {
|
||||
return fmt.Errorf("marathon_sd: at most one of bearer_token, bearer_token_file, auth_token & auth_token_file must be configured")
|
||||
return errors.New("marathon_sd: at most one of bearer_token, bearer_token_file, auth_token & auth_token_file must be configured")
|
||||
}
|
||||
return c.HTTPClientConfig.Validate()
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(refreshFailuresCount)
|
||||
prometheus.MustRegister(refreshDuration)
|
||||
}
|
||||
|
||||
const appListPath string = "/v2/apps/?embed=apps.tasks"
|
||||
|
||||
// Discovery provides service discovery based on a Marathon instance.
|
||||
type Discovery struct {
|
||||
client *http.Client
|
||||
servers []string
|
||||
refreshInterval time.Duration
|
||||
lastRefresh map[string]*targetgroup.Group
|
||||
appsClient AppListClient
|
||||
logger log.Logger
|
||||
*refresh.Discovery
|
||||
client *http.Client
|
||||
servers []string
|
||||
lastRefresh map[string]*targetgroup.Group
|
||||
appsClient appListClient
|
||||
}
|
||||
|
||||
// NewDiscovery returns a new Marathon Discovery.
|
||||
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
|
||||
rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -147,13 +122,18 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
return &Discovery{
|
||||
client: &http.Client{Transport: rt},
|
||||
servers: conf.Servers,
|
||||
refreshInterval: time.Duration(conf.RefreshInterval),
|
||||
appsClient: fetchApps,
|
||||
logger: logger,
|
||||
}, nil
|
||||
d := &Discovery{
|
||||
client: &http.Client{Transport: rt},
|
||||
servers: conf.Servers,
|
||||
appsClient: fetchApps,
|
||||
}
|
||||
d.Discovery = refresh.NewDiscovery(
|
||||
logger,
|
||||
"marathon",
|
||||
time.Duration(conf.RefreshInterval),
|
||||
d.refresh,
|
||||
)
|
||||
return d, nil
|
||||
}
|
||||
|
||||
type authTokenRoundTripper struct {
|
||||
|
@ -185,7 +165,7 @@ func newAuthTokenFileRoundTripper(tokenFile string, rt http.RoundTripper) (http.
|
|||
// fail-fast if we can't read the file.
|
||||
_, err := ioutil.ReadFile(tokenFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read auth token file %s: %s", tokenFile, err)
|
||||
return nil, errors.Wrapf(err, "unable to read auth token file %s", tokenFile)
|
||||
}
|
||||
return &authTokenFileRoundTripper{tokenFile, rt}, nil
|
||||
}
|
||||
|
@ -193,7 +173,7 @@ func newAuthTokenFileRoundTripper(tokenFile string, rt http.RoundTripper) (http.
|
|||
func (rt *authTokenFileRoundTripper) RoundTrip(request *http.Request) (*http.Response, error) {
|
||||
b, err := ioutil.ReadFile(rt.authTokenFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read auth token file %s: %s", rt.authTokenFile, err)
|
||||
return nil, errors.Wrapf(err, "unable to read auth token file %s", rt.authTokenFile)
|
||||
}
|
||||
authToken := strings.TrimSpace(string(b))
|
||||
|
||||
|
@ -204,33 +184,10 @@ func (rt *authTokenFileRoundTripper) RoundTrip(request *http.Request) (*http.Res
|
|||
return rt.rt.RoundTrip(request)
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(d.refreshInterval):
|
||||
err := d.updateServices(ctx, ch)
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Error while updating services", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) updateServices(ctx context.Context, ch chan<- []*targetgroup.Group) (err error) {
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
refreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
refreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
targetMap, err := d.fetchTargetGroups()
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
targetMap, err := d.fetchTargetGroups(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
all := make([]*targetgroup.Group, 0, len(targetMap))
|
||||
|
@ -240,154 +197,143 @@ func (d *Discovery) updateServices(ctx context.Context, ch chan<- []*targetgroup
|
|||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case ch <- all:
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Remove services which did disappear.
|
||||
for source := range d.lastRefresh {
|
||||
_, ok := targetMap[source]
|
||||
if !ok {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case ch <- []*targetgroup.Group{{Source: source}}:
|
||||
level.Debug(d.logger).Log("msg", "Removing group", "source", source)
|
||||
}
|
||||
all = append(all, &targetgroup.Group{Source: source})
|
||||
}
|
||||
}
|
||||
|
||||
d.lastRefresh = targetMap
|
||||
return nil
|
||||
return all, nil
|
||||
}
|
||||
|
||||
func (d *Discovery) fetchTargetGroups() (map[string]*targetgroup.Group, error) {
|
||||
url := RandomAppsURL(d.servers)
|
||||
apps, err := d.appsClient(d.client, url)
|
||||
func (d *Discovery) fetchTargetGroups(ctx context.Context) (map[string]*targetgroup.Group, error) {
|
||||
url := randomAppsURL(d.servers)
|
||||
apps, err := d.appsClient(ctx, d.client, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
groups := AppsToTargetGroups(apps)
|
||||
groups := appsToTargetGroups(apps)
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
// Task describes one instance of a service running on Marathon.
|
||||
type Task struct {
|
||||
// task describes one instance of a service running on Marathon.
|
||||
type task struct {
|
||||
ID string `json:"id"`
|
||||
Host string `json:"host"`
|
||||
Ports []uint32 `json:"ports"`
|
||||
IPAddresses []IPAddress `json:"ipAddresses"`
|
||||
IPAddresses []ipAddress `json:"ipAddresses"`
|
||||
}
|
||||
|
||||
// IPAddress describes the address and protocol the container's network interface is bound to.
|
||||
type IPAddress struct {
|
||||
// ipAddress describes the address and protocol the container's network interface is bound to.
|
||||
type ipAddress struct {
|
||||
Address string `json:"ipAddress"`
|
||||
Proto string `json:"protocol"`
|
||||
}
|
||||
|
||||
// PortMapping describes in which port the process are binding inside the docker container.
|
||||
type PortMapping struct {
|
||||
type portMapping struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
ContainerPort uint32 `json:"containerPort"`
|
||||
HostPort uint32 `json:"hostPort"`
|
||||
ServicePort uint32 `json:"servicePort"`
|
||||
}
|
||||
|
||||
// DockerContainer describes a container which uses the docker runtime.
|
||||
type DockerContainer struct {
|
||||
type dockerContainer struct {
|
||||
Image string `json:"image"`
|
||||
PortMappings []PortMapping `json:"portMappings"`
|
||||
PortMappings []portMapping `json:"portMappings"`
|
||||
}
|
||||
|
||||
// Container describes the runtime an app in running in.
|
||||
type Container struct {
|
||||
Docker DockerContainer `json:"docker"`
|
||||
PortMappings []PortMapping `json:"portMappings"`
|
||||
type container struct {
|
||||
Docker dockerContainer `json:"docker"`
|
||||
PortMappings []portMapping `json:"portMappings"`
|
||||
}
|
||||
|
||||
// PortDefinition describes which load balancer port you should access to access the service.
|
||||
type PortDefinition struct {
|
||||
type portDefinition struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
Port uint32 `json:"port"`
|
||||
}
|
||||
|
||||
// Network describes the name and type of network the container is attached to.
|
||||
type Network struct {
|
||||
type network struct {
|
||||
Name string `json:"name"`
|
||||
Mode string `json:"mode"`
|
||||
}
|
||||
|
||||
// App describes a service running on Marathon.
|
||||
type App struct {
|
||||
type app struct {
|
||||
ID string `json:"id"`
|
||||
Tasks []Task `json:"tasks"`
|
||||
Tasks []task `json:"tasks"`
|
||||
RunningTasks int `json:"tasksRunning"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
Container Container `json:"container"`
|
||||
PortDefinitions []PortDefinition `json:"portDefinitions"`
|
||||
Networks []Network `json:"networks"`
|
||||
Container container `json:"container"`
|
||||
PortDefinitions []portDefinition `json:"portDefinitions"`
|
||||
Networks []network `json:"networks"`
|
||||
RequirePorts bool `json:"requirePorts"`
|
||||
}
|
||||
|
||||
// isContainerNet checks if the app's first network is set to mode 'container'.
|
||||
func (app App) isContainerNet() bool {
|
||||
func (app app) isContainerNet() bool {
|
||||
return len(app.Networks) > 0 && app.Networks[0].Mode == "container"
|
||||
}
|
||||
|
||||
// AppList is a list of Marathon apps.
|
||||
type AppList struct {
|
||||
Apps []App `json:"apps"`
|
||||
// appList is a list of Marathon apps.
|
||||
type appList struct {
|
||||
Apps []app `json:"apps"`
|
||||
}
|
||||
|
||||
// AppListClient defines a function that can be used to get an application list from marathon.
|
||||
type AppListClient func(client *http.Client, url string) (*AppList, error)
|
||||
// appListClient defines a function that can be used to get an application list from marathon.
|
||||
type appListClient func(ctx context.Context, client *http.Client, url string) (*appList, error)
|
||||
|
||||
// fetchApps requests a list of applications from a marathon server.
|
||||
func fetchApps(client *http.Client, url string) (*AppList, error) {
|
||||
func fetchApps(ctx context.Context, client *http.Client, url string) (*appList, error) {
|
||||
request, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
request = request.WithContext(ctx)
|
||||
|
||||
resp, err := client.Do(request)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
io.Copy(ioutil.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
}()
|
||||
|
||||
if (resp.StatusCode < 200) || (resp.StatusCode >= 300) {
|
||||
return nil, fmt.Errorf("Non 2xx status '%v' response during marathon service discovery", resp.StatusCode)
|
||||
return nil, errors.Errorf("non 2xx status '%v' response during marathon service discovery", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
var apps appList
|
||||
err = json.NewDecoder(resp.Body).Decode(&apps)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, errors.Wrapf(err, "%q", url)
|
||||
}
|
||||
|
||||
apps, err := parseAppJSON(body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%v in %s", err, url)
|
||||
}
|
||||
return apps, nil
|
||||
return &apps, nil
|
||||
}
|
||||
|
||||
func parseAppJSON(body []byte) (*AppList, error) {
|
||||
apps := &AppList{}
|
||||
err := json.Unmarshal(body, apps)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return apps, nil
|
||||
}
|
||||
|
||||
// RandomAppsURL randomly selects a server from an array and creates
|
||||
// randomAppsURL randomly selects a server from an array and creates
|
||||
// an URL pointing to the app list.
|
||||
func RandomAppsURL(servers []string) string {
|
||||
func randomAppsURL(servers []string) string {
|
||||
// TODO: If possible update server list from Marathon at some point.
|
||||
server := servers[rand.Intn(len(servers))]
|
||||
return fmt.Sprintf("%s%s", server, appListPath)
|
||||
}
|
||||
|
||||
// AppsToTargetGroups takes an array of Marathon apps and converts them into target groups.
|
||||
func AppsToTargetGroups(apps *AppList) map[string]*targetgroup.Group {
|
||||
// appsToTargetGroups takes an array of Marathon apps and converts them into target groups.
|
||||
func appsToTargetGroups(apps *appList) map[string]*targetgroup.Group {
|
||||
tgroups := map[string]*targetgroup.Group{}
|
||||
for _, a := range apps.Apps {
|
||||
group := createTargetGroup(&a)
|
||||
|
@ -396,7 +342,7 @@ func AppsToTargetGroups(apps *AppList) map[string]*targetgroup.Group {
|
|||
return tgroups
|
||||
}
|
||||
|
||||
func createTargetGroup(app *App) *targetgroup.Group {
|
||||
func createTargetGroup(app *app) *targetgroup.Group {
|
||||
var (
|
||||
targets = targetsForApp(app)
|
||||
appName = model.LabelValue(app.ID)
|
||||
|
@ -419,7 +365,7 @@ func createTargetGroup(app *App) *targetgroup.Group {
|
|||
return tg
|
||||
}
|
||||
|
||||
func targetsForApp(app *App) []model.LabelSet {
|
||||
func targetsForApp(app *app) []model.LabelSet {
|
||||
targets := make([]model.LabelSet, 0, len(app.Tasks))
|
||||
|
||||
var ports []uint32
|
||||
|
@ -446,7 +392,11 @@ func targetsForApp(app *App) []model.LabelSet {
|
|||
|
||||
for i := 0; i < len(app.PortDefinitions); i++ {
|
||||
labels[i] = app.PortDefinitions[i].Labels
|
||||
ports[i] = app.PortDefinitions[i].Port
|
||||
// When requirePorts is false, this port becomes the 'servicePort', not the listen port.
|
||||
// In this case, the port needs to be taken from the task instead of the app.
|
||||
if app.RequirePorts {
|
||||
ports[i] = app.PortDefinitions[i].Port
|
||||
}
|
||||
}
|
||||
|
||||
prefix = portDefinitionLabelPrefix
|
||||
|
@ -467,6 +417,13 @@ func targetsForApp(app *App) []model.LabelSet {
|
|||
// Iterate over the ports we gathered using one of the methods above.
|
||||
for i, port := range ports {
|
||||
|
||||
// A zero port here means that either the portMapping has a zero port defined,
|
||||
// or there is a portDefinition with requirePorts set to false. This means the port
|
||||
// is auto-generated by Mesos and needs to be looked up in the task.
|
||||
if port == 0 && len(t.Ports) == len(ports) {
|
||||
port = t.Ports[i]
|
||||
}
|
||||
|
||||
// Each port represents a possible Prometheus target.
|
||||
targetAddress := targetEndpoint(&t, port, app.isContainerNet())
|
||||
target := model.LabelSet{
|
||||
|
@ -492,7 +449,7 @@ func targetsForApp(app *App) []model.LabelSet {
|
|||
}
|
||||
|
||||
// Generate a target endpoint string in host:port format.
|
||||
func targetEndpoint(task *Task, port uint32, containerNet bool) string {
|
||||
func targetEndpoint(task *task, port uint32, containerNet bool) string {
|
||||
|
||||
var host string
|
||||
|
||||
|
@ -507,7 +464,7 @@ func targetEndpoint(task *Task, port uint32, containerNet bool) string {
|
|||
}
|
||||
|
||||
// Get a list of ports and a list of labels from a PortMapping.
|
||||
func extractPortMapping(portMappings []PortMapping, containerNet bool) ([]uint32, []map[string]string) {
|
||||
func extractPortMapping(portMappings []portMapping, containerNet bool) ([]uint32, []map[string]string) {
|
||||
|
||||
ports := make([]uint32, len(portMappings))
|
||||
labels := make([]map[string]string, len(portMappings))
|
||||
|
@ -520,8 +477,10 @@ func extractPortMapping(portMappings []PortMapping, containerNet bool) ([]uint32
|
|||
// If the app is in a container network, connect directly to the container port.
|
||||
ports[i] = portMappings[i].ContainerPort
|
||||
} else {
|
||||
// Otherwise, connect to the randomly-generated service port.
|
||||
ports[i] = portMappings[i].ServicePort
|
||||
// Otherwise, connect to the allocated host port for the container.
|
||||
// Note that this host port is likely set to 0 in the app definition, which means it is
|
||||
// automatically generated and needs to be extracted from the task's 'ports' array at a later stage.
|
||||
ports[i] = portMappings[i].HostPort
|
||||
}
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -17,15 +17,15 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/gophercloud/gophercloud"
|
||||
"github.com/gophercloud/gophercloud/openstack"
|
||||
"github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors"
|
||||
"github.com/gophercloud/gophercloud/pagination"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
|
@ -42,74 +42,28 @@ type HypervisorDiscovery struct {
|
|||
provider *gophercloud.ProviderClient
|
||||
authOpts *gophercloud.AuthOptions
|
||||
region string
|
||||
interval time.Duration
|
||||
logger log.Logger
|
||||
port int
|
||||
}
|
||||
|
||||
// NewHypervisorDiscovery returns a new hypervisor discovery.
|
||||
func NewHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
|
||||
interval time.Duration, port int, region string, l log.Logger) *HypervisorDiscovery {
|
||||
// newHypervisorDiscovery returns a new hypervisor discovery.
|
||||
func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
|
||||
port int, region string, l log.Logger) *HypervisorDiscovery {
|
||||
return &HypervisorDiscovery{provider: provider, authOpts: opts,
|
||||
region: region, interval: interval, port: port, logger: l}
|
||||
region: region, port: port, logger: l}
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (h *HypervisorDiscovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
// Get an initial set right away.
|
||||
tg, err := h.refresh()
|
||||
func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
h.provider.Context = ctx
|
||||
err := openstack.Authenticate(h.provider, *h.authOpts)
|
||||
if err != nil {
|
||||
level.Error(h.logger).Log("msg", "Unable refresh target groups", "err", err.Error())
|
||||
} else {
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(h.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
tg, err := h.refresh()
|
||||
if err != nil {
|
||||
level.Error(h.logger).Log("msg", "Unable refresh target groups", "err", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *HypervisorDiscovery) refresh() (*targetgroup.Group, error) {
|
||||
var err error
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
refreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
refreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
err = openstack.Authenticate(h.provider, *h.authOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not authenticate to OpenStack: %s", err)
|
||||
return nil, errors.Wrap(err, "could not authenticate to OpenStack")
|
||||
}
|
||||
client, err := openstack.NewComputeV2(h.provider, gophercloud.EndpointOpts{
|
||||
Region: h.region,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not create OpenStack compute session: %s", err)
|
||||
return nil, errors.Wrap(err, "could not create OpenStack compute session")
|
||||
}
|
||||
|
||||
tg := &targetgroup.Group{
|
||||
|
@ -121,7 +75,7 @@ func (h *HypervisorDiscovery) refresh() (*targetgroup.Group, error) {
|
|||
err = pagerHypervisors.EachPage(func(page pagination.Page) (bool, error) {
|
||||
hypervisorList, err := hypervisors.ExtractHypervisors(page)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not extract hypervisors: %s", err)
|
||||
return false, errors.Wrap(err, "could not extract hypervisors")
|
||||
}
|
||||
for _, hypervisor := range hypervisorList {
|
||||
labels := model.LabelSet{}
|
||||
|
@ -140,5 +94,5 @@ func (h *HypervisorDiscovery) refresh() (*targetgroup.Group, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
return tg, nil
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
package openstack
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
@ -38,7 +40,7 @@ func (s *OpenstackSDHypervisorTestSuite) SetupTest(t *testing.T) {
|
|||
s.Mock.HandleAuthSuccessfully()
|
||||
}
|
||||
|
||||
func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (Discovery, error) {
|
||||
func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (refresher, error) {
|
||||
conf := SDConfig{
|
||||
IdentityEndpoint: s.Mock.Endpoint(),
|
||||
Password: "test",
|
||||
|
@ -47,7 +49,7 @@ func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (Discovery, erro
|
|||
Region: "RegionOne",
|
||||
Role: "hypervisor",
|
||||
}
|
||||
return NewDiscovery(&conf, nil)
|
||||
return newRefresher(&conf, nil)
|
||||
}
|
||||
|
||||
func TestOpenstackSDHypervisorRefresh(t *testing.T) {
|
||||
|
@ -56,7 +58,10 @@ func TestOpenstackSDHypervisorRefresh(t *testing.T) {
|
|||
mock.SetupTest(t)
|
||||
|
||||
hypervisor, _ := mock.openstackAuthSuccess()
|
||||
tg, err := hypervisor.refresh()
|
||||
ctx := context.Background()
|
||||
tgs, err := hypervisor.refresh(ctx)
|
||||
testutil.Equals(t, 1, len(tgs))
|
||||
tg := tgs[0]
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, tg != nil, "")
|
||||
testutil.Assert(t, tg.Targets != nil, "")
|
||||
|
@ -78,3 +83,17 @@ func TestOpenstackSDHypervisorRefresh(t *testing.T) {
|
|||
|
||||
mock.TearDownSuite()
|
||||
}
|
||||
|
||||
func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) {
|
||||
mock := &OpenstackSDHypervisorTestSuite{}
|
||||
mock.SetupTest(t)
|
||||
|
||||
hypervisor, _ := mock.openstackAuthSuccess()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
_, err := hypervisor.refresh(ctx)
|
||||
testutil.NotOk(t, err, "")
|
||||
testutil.Assert(t, strings.Contains(err.Error(), context.Canceled.Error()), "%q doesn't contain %q", err, context.Canceled)
|
||||
|
||||
mock.TearDownSuite()
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
|
@ -26,6 +25,7 @@ import (
|
|||
"github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips"
|
||||
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
|
||||
"github.com/gophercloud/gophercloud/pagination"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
|
@ -34,13 +34,15 @@ import (
|
|||
const (
|
||||
openstackLabelPrefix = model.MetaLabelPrefix + "openstack_"
|
||||
openstackLabelAddressPool = openstackLabelPrefix + "address_pool"
|
||||
openstackLabelInstanceFlavor = openstackLabelPrefix + "instance_flavor"
|
||||
openstackLabelInstanceID = openstackLabelPrefix + "instance_id"
|
||||
openstackLabelInstanceName = openstackLabelPrefix + "instance_name"
|
||||
openstackLabelInstanceStatus = openstackLabelPrefix + "instance_status"
|
||||
openstackLabelInstanceFlavor = openstackLabelPrefix + "instance_flavor"
|
||||
openstackLabelPublicIP = openstackLabelPrefix + "public_ip"
|
||||
openstackLabelPrivateIP = openstackLabelPrefix + "private_ip"
|
||||
openstackLabelProjectID = openstackLabelPrefix + "project_id"
|
||||
openstackLabelPublicIP = openstackLabelPrefix + "public_ip"
|
||||
openstackLabelTagPrefix = openstackLabelPrefix + "tag_"
|
||||
openstackLabelUserID = openstackLabelPrefix + "user_id"
|
||||
)
|
||||
|
||||
// InstanceDiscovery discovers OpenStack instances.
|
||||
|
@ -48,57 +50,19 @@ type InstanceDiscovery struct {
|
|||
provider *gophercloud.ProviderClient
|
||||
authOpts *gophercloud.AuthOptions
|
||||
region string
|
||||
interval time.Duration
|
||||
logger log.Logger
|
||||
port int
|
||||
allTenants bool
|
||||
}
|
||||
|
||||
// NewInstanceDiscovery returns a new instance discovery.
|
||||
func NewInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
|
||||
interval time.Duration, port int, region string, allTenants bool, l log.Logger) *InstanceDiscovery {
|
||||
func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
|
||||
port int, region string, allTenants bool, l log.Logger) *InstanceDiscovery {
|
||||
if l == nil {
|
||||
l = log.NewNopLogger()
|
||||
}
|
||||
return &InstanceDiscovery{provider: provider, authOpts: opts,
|
||||
region: region, interval: interval, port: port, allTenants: allTenants, logger: l}
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (i *InstanceDiscovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
// Get an initial set right away.
|
||||
tg, err := i.refresh()
|
||||
if err != nil {
|
||||
level.Error(i.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
|
||||
} else {
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(i.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
tg, err := i.refresh()
|
||||
if err != nil {
|
||||
level.Error(i.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- []*targetgroup.Group{tg}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
region: region, port: port, allTenants: allTenants, logger: l}
|
||||
}
|
||||
|
||||
type floatingIPKey struct {
|
||||
|
@ -106,25 +70,17 @@ type floatingIPKey struct {
|
|||
fixed string
|
||||
}
|
||||
|
||||
func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
|
||||
var err error
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
refreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
refreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
err = openstack.Authenticate(i.provider, *i.authOpts)
|
||||
func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
i.provider.Context = ctx
|
||||
err := openstack.Authenticate(i.provider, *i.authOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not authenticate to OpenStack: %s", err)
|
||||
return nil, errors.Wrap(err, "could not authenticate to OpenStack")
|
||||
}
|
||||
client, err := openstack.NewComputeV2(i.provider, gophercloud.EndpointOpts{
|
||||
Region: i.region,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not create OpenStack compute session: %s", err)
|
||||
return nil, errors.Wrap(err, "could not create OpenStack compute session")
|
||||
}
|
||||
|
||||
// OpenStack API reference
|
||||
|
@ -135,7 +91,7 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
|
|||
err = pagerFIP.EachPage(func(page pagination.Page) (bool, error) {
|
||||
result, err := floatingips.ExtractFloatingIPs(page)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not extract floatingips: %s", err)
|
||||
return false, errors.Wrap(err, "could not extract floatingips")
|
||||
}
|
||||
for _, ip := range result {
|
||||
// Skip not associated ips
|
||||
|
@ -161,9 +117,12 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
|
|||
Source: fmt.Sprintf("OS_" + i.region),
|
||||
}
|
||||
err = pager.EachPage(func(page pagination.Page) (bool, error) {
|
||||
if ctx.Err() != nil {
|
||||
return false, errors.Wrap(ctx.Err(), "could not extract instances")
|
||||
}
|
||||
instanceList, err := servers.ExtractServers(page)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("could not extract instances: %s", err)
|
||||
return false, errors.Wrap(err, "could not extract instances")
|
||||
}
|
||||
|
||||
for _, s := range instanceList {
|
||||
|
@ -176,6 +135,8 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
|
|||
openstackLabelInstanceID: model.LabelValue(s.ID),
|
||||
openstackLabelInstanceStatus: model.LabelValue(s.Status),
|
||||
openstackLabelInstanceName: model.LabelValue(s.Name),
|
||||
openstackLabelProjectID: model.LabelValue(s.TenantID),
|
||||
openstackLabelUserID: model.LabelValue(s.UserID),
|
||||
}
|
||||
|
||||
id, ok := s.Flavor["id"].(string)
|
||||
|
@ -234,5 +195,5 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
return tg, nil
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
|
|
@ -14,7 +14,9 @@
|
|||
package openstack
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
@ -40,7 +42,7 @@ func (s *OpenstackSDInstanceTestSuite) SetupTest(t *testing.T) {
|
|||
s.Mock.HandleAuthSuccessfully()
|
||||
}
|
||||
|
||||
func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (Discovery, error) {
|
||||
func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (refresher, error) {
|
||||
conf := SDConfig{
|
||||
IdentityEndpoint: s.Mock.Endpoint(),
|
||||
Password: "test",
|
||||
|
@ -50,7 +52,7 @@ func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (Discovery, error)
|
|||
Role: "instance",
|
||||
AllTenants: true,
|
||||
}
|
||||
return NewDiscovery(&conf, nil)
|
||||
return newRefresher(&conf, nil)
|
||||
}
|
||||
|
||||
func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
||||
|
@ -61,15 +63,19 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
|||
instance, err := mock.openstackAuthSuccess()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
tg, err := instance.refresh()
|
||||
ctx := context.Background()
|
||||
tgs, err := instance.refresh(ctx)
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, 1, len(tgs))
|
||||
|
||||
tg := tgs[0]
|
||||
testutil.Assert(t, tg != nil, "")
|
||||
testutil.Assert(t, tg.Targets != nil, "")
|
||||
testutil.Equals(t, 4, len(tg.Targets))
|
||||
|
||||
for i, lbls := range []model.LabelSet{
|
||||
model.LabelSet{
|
||||
{
|
||||
"__address__": model.LabelValue("10.0.0.32:0"),
|
||||
"__meta_openstack_instance_flavor": model.LabelValue("1"),
|
||||
"__meta_openstack_instance_id": model.LabelValue("ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"),
|
||||
|
@ -78,8 +84,10 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
|||
"__meta_openstack_private_ip": model.LabelValue("10.0.0.32"),
|
||||
"__meta_openstack_public_ip": model.LabelValue("10.10.10.2"),
|
||||
"__meta_openstack_address_pool": model.LabelValue("private"),
|
||||
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
|
||||
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
|
||||
},
|
||||
model.LabelSet{
|
||||
{
|
||||
"__address__": model.LabelValue("10.0.0.31:0"),
|
||||
"__meta_openstack_instance_flavor": model.LabelValue("1"),
|
||||
"__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682ba"),
|
||||
|
@ -87,8 +95,10 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
|||
"__meta_openstack_instance_name": model.LabelValue("derp"),
|
||||
"__meta_openstack_private_ip": model.LabelValue("10.0.0.31"),
|
||||
"__meta_openstack_address_pool": model.LabelValue("private"),
|
||||
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
|
||||
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
|
||||
},
|
||||
model.LabelSet{
|
||||
{
|
||||
"__address__": model.LabelValue("10.0.0.33:0"),
|
||||
"__meta_openstack_instance_flavor": model.LabelValue("4"),
|
||||
"__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"),
|
||||
|
@ -97,8 +107,10 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
|||
"__meta_openstack_private_ip": model.LabelValue("10.0.0.33"),
|
||||
"__meta_openstack_address_pool": model.LabelValue("private"),
|
||||
"__meta_openstack_tag_env": model.LabelValue("prod"),
|
||||
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
|
||||
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
|
||||
},
|
||||
model.LabelSet{
|
||||
{
|
||||
"__address__": model.LabelValue("10.0.0.34:0"),
|
||||
"__meta_openstack_instance_flavor": model.LabelValue("4"),
|
||||
"__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"),
|
||||
|
@ -108,6 +120,8 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
|||
"__meta_openstack_address_pool": model.LabelValue("private"),
|
||||
"__meta_openstack_tag_env": model.LabelValue("prod"),
|
||||
"__meta_openstack_public_ip": model.LabelValue("10.10.10.4"),
|
||||
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
|
||||
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
|
||||
},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
|
||||
|
@ -117,3 +131,17 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
|
|||
|
||||
mock.TearDownSuite()
|
||||
}
|
||||
|
||||
func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) {
|
||||
mock := &OpenstackSDHypervisorTestSuite{}
|
||||
mock.SetupTest(t)
|
||||
|
||||
hypervisor, _ := mock.openstackAuthSuccess()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
_, err := hypervisor.refresh(ctx)
|
||||
testutil.NotOk(t, err, "")
|
||||
testutil.Assert(t, strings.Contains(err.Error(), context.Canceled.Error()), "%q doesn't contain %q", err, context.Canceled)
|
||||
|
||||
mock.TearDownSuite()
|
||||
}
|
||||
|
|
|
@ -247,7 +247,7 @@ func (m *SDMock) HandleHypervisorListSuccessfully() {
|
|||
testHeader(m.t, r, "X-Auth-Token", tokenID)
|
||||
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, hypervisorListBody)
|
||||
fmt.Fprint(w, hypervisorListBody)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -544,7 +544,7 @@ func (m *SDMock) HandleServerListSuccessfully() {
|
|||
testHeader(m.t, r, "X-Auth-Token", tokenID)
|
||||
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, serverListBody)
|
||||
fmt.Fprint(w, serverListBody)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -583,6 +583,6 @@ func (m *SDMock) HandleFloatingIPListSuccessfully() {
|
|||
testHeader(m.t, r, "X-Auth-Token", tokenID)
|
||||
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
fmt.Fprintf(w, listOutput)
|
||||
fmt.Fprint(w, listOutput)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -15,58 +15,49 @@ package openstack
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/gophercloud/gophercloud"
|
||||
"github.com/gophercloud/gophercloud/openstack"
|
||||
"github.com/mwitkow/go-conntrack"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
conntrack "github.com/mwitkow/go-conntrack"
|
||||
"github.com/pkg/errors"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
refreshFailuresCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_openstack_refresh_failures_total",
|
||||
Help: "The number of OpenStack-SD scrape failures.",
|
||||
})
|
||||
refreshDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_openstack_refresh_duration_seconds",
|
||||
Help: "The duration of an OpenStack-SD refresh in seconds.",
|
||||
})
|
||||
// DefaultSDConfig is the default OpenStack SD configuration.
|
||||
DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
)
|
||||
// DefaultSDConfig is the default OpenStack SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
Port: 80,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
}
|
||||
|
||||
// SDConfig is the configuration for OpenStack based service discovery.
|
||||
type SDConfig struct {
|
||||
IdentityEndpoint string `yaml:"identity_endpoint"`
|
||||
Username string `yaml:"username"`
|
||||
UserID string `yaml:"userid"`
|
||||
Password config_util.Secret `yaml:"password"`
|
||||
ProjectName string `yaml:"project_name"`
|
||||
ProjectID string `yaml:"project_id"`
|
||||
DomainName string `yaml:"domain_name"`
|
||||
DomainID string `yaml:"domain_id"`
|
||||
Role Role `yaml:"role"`
|
||||
Region string `yaml:"region"`
|
||||
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
|
||||
Port int `yaml:"port"`
|
||||
AllTenants bool `yaml:"all_tenants,omitempty"`
|
||||
TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"`
|
||||
IdentityEndpoint string `yaml:"identity_endpoint"`
|
||||
Username string `yaml:"username"`
|
||||
UserID string `yaml:"userid"`
|
||||
Password config_util.Secret `yaml:"password"`
|
||||
ProjectName string `yaml:"project_name"`
|
||||
ProjectID string `yaml:"project_id"`
|
||||
DomainName string `yaml:"domain_name"`
|
||||
DomainID string `yaml:"domain_id"`
|
||||
ApplicationCredentialName string `yaml:"application_credential_name"`
|
||||
ApplicationCredentialID string `yaml:"application_credential_id"`
|
||||
ApplicationCredentialSecret config_util.Secret `yaml:"application_credential_secret"`
|
||||
Role Role `yaml:"role"`
|
||||
Region string `yaml:"region"`
|
||||
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
|
||||
Port int `yaml:"port"`
|
||||
AllTenants bool `yaml:"all_tenants,omitempty"`
|
||||
TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"`
|
||||
}
|
||||
|
||||
// OpenStackRole is role of the target in OpenStack.
|
||||
// Role is the role of the target in OpenStack.
|
||||
type Role string
|
||||
|
||||
// The valid options for OpenStackRole.
|
||||
|
@ -88,7 +79,7 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
case OpenStackRoleHypervisor, OpenStackRoleInstance:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unknown OpenStack SD role %q", *c)
|
||||
return errors.Errorf("unknown OpenStack SD role %q", *c)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,28 +92,34 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if c.Role == "" {
|
||||
return fmt.Errorf("role missing (one of: instance, hypervisor)")
|
||||
return errors.New("role missing (one of: instance, hypervisor)")
|
||||
}
|
||||
if c.Region == "" {
|
||||
return fmt.Errorf("Openstack SD configuration requires a region")
|
||||
return errors.New("openstack SD configuration requires a region")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(refreshFailuresCount)
|
||||
prometheus.MustRegister(refreshDuration)
|
||||
type refresher interface {
|
||||
refresh(context.Context) ([]*targetgroup.Group, error)
|
||||
}
|
||||
|
||||
// Discovery periodically performs OpenStack-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery interface {
|
||||
Run(ctx context.Context, ch chan<- []*targetgroup.Group)
|
||||
refresh() (tg *targetgroup.Group, err error)
|
||||
// NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets.
|
||||
func NewDiscovery(conf *SDConfig, l log.Logger) (*refresh.Discovery, error) {
|
||||
r, err := newRefresher(conf, l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return refresh.NewDiscovery(
|
||||
l,
|
||||
"openstack",
|
||||
time.Duration(conf.RefreshInterval),
|
||||
r.refresh,
|
||||
), nil
|
||||
|
||||
}
|
||||
|
||||
// NewDiscovery returns a new OpenStackDiscovery which periodically refreshes its targets.
|
||||
func NewDiscovery(conf *SDConfig, l log.Logger) (Discovery, error) {
|
||||
func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
|
||||
var opts gophercloud.AuthOptions
|
||||
if conf.IdentityEndpoint == "" {
|
||||
var err error
|
||||
|
@ -132,14 +129,17 @@ func NewDiscovery(conf *SDConfig, l log.Logger) (Discovery, error) {
|
|||
}
|
||||
} else {
|
||||
opts = gophercloud.AuthOptions{
|
||||
IdentityEndpoint: conf.IdentityEndpoint,
|
||||
Username: conf.Username,
|
||||
UserID: conf.UserID,
|
||||
Password: string(conf.Password),
|
||||
TenantName: conf.ProjectName,
|
||||
TenantID: conf.ProjectID,
|
||||
DomainName: conf.DomainName,
|
||||
DomainID: conf.DomainID,
|
||||
IdentityEndpoint: conf.IdentityEndpoint,
|
||||
Username: conf.Username,
|
||||
UserID: conf.UserID,
|
||||
Password: string(conf.Password),
|
||||
TenantName: conf.ProjectName,
|
||||
TenantID: conf.ProjectID,
|
||||
DomainName: conf.DomainName,
|
||||
DomainID: conf.DomainID,
|
||||
ApplicationCredentialID: conf.ApplicationCredentialID,
|
||||
ApplicationCredentialName: conf.ApplicationCredentialName,
|
||||
ApplicationCredentialSecret: string(conf.ApplicationCredentialSecret),
|
||||
}
|
||||
}
|
||||
client, err := openstack.NewClient(opts.IdentityEndpoint)
|
||||
|
@ -163,14 +163,9 @@ func NewDiscovery(conf *SDConfig, l log.Logger) (Discovery, error) {
|
|||
}
|
||||
switch conf.Role {
|
||||
case OpenStackRoleHypervisor:
|
||||
hypervisor := NewHypervisorDiscovery(client, &opts,
|
||||
time.Duration(conf.RefreshInterval), conf.Port, conf.Region, l)
|
||||
return hypervisor, nil
|
||||
return newHypervisorDiscovery(client, &opts, conf.Port, conf.Region, l), nil
|
||||
case OpenStackRoleInstance:
|
||||
instance := NewInstanceDiscovery(client, &opts,
|
||||
time.Duration(conf.RefreshInterval), conf.Port, conf.Region, conf.AllTenants, l)
|
||||
return instance, nil
|
||||
default:
|
||||
return nil, errors.New("unknown OpenStack discovery role")
|
||||
return newInstanceDiscovery(client, &opts, conf.Port, conf.Region, conf.AllTenants, l), nil
|
||||
}
|
||||
return nil, errors.New("unknown OpenStack discovery role")
|
||||
}
|
||||
|
|
118
discovery/refresh/refresh.go
Normal file
118
discovery/refresh/refresh.go
Normal file
|
@ -0,0 +1,118 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package refresh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
failuresCount = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_refresh_failures_total",
|
||||
Help: "Number of refresh failures for the given SD mechanism.",
|
||||
},
|
||||
[]string{"mechanism"},
|
||||
)
|
||||
duration = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_refresh_duration_seconds",
|
||||
Help: "The duration of a refresh in seconds for the given SD mechanism.",
|
||||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||||
},
|
||||
[]string{"mechanism"},
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(duration, failuresCount)
|
||||
}
|
||||
|
||||
// Discovery implements the Discoverer interface.
|
||||
type Discovery struct {
|
||||
logger log.Logger
|
||||
interval time.Duration
|
||||
refreshf func(ctx context.Context) ([]*targetgroup.Group, error)
|
||||
|
||||
failures prometheus.Counter
|
||||
duration prometheus.Observer
|
||||
}
|
||||
|
||||
// NewDiscovery returns a Discoverer function that calls a refresh() function at every interval.
|
||||
func NewDiscovery(l log.Logger, mech string, interval time.Duration, refreshf func(ctx context.Context) ([]*targetgroup.Group, error)) *Discovery {
|
||||
if l == nil {
|
||||
l = log.NewNopLogger()
|
||||
}
|
||||
return &Discovery{
|
||||
logger: l,
|
||||
interval: interval,
|
||||
refreshf: refreshf,
|
||||
failures: failuresCount.WithLabelValues(mech),
|
||||
duration: duration.WithLabelValues(mech),
|
||||
}
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
// Get an initial set right away.
|
||||
tgs, err := d.refresh(ctx)
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
|
||||
} else {
|
||||
select {
|
||||
case ch <- tgs:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(d.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
tgs, err := d.refresh(ctx)
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case ch <- tgs:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
now := time.Now()
|
||||
defer d.duration.Observe(time.Since(now).Seconds())
|
||||
tgs, err := d.refreshf(ctx)
|
||||
if err != nil {
|
||||
d.failures.Inc()
|
||||
}
|
||||
return tgs, err
|
||||
}
|
83
discovery/refresh/refresh_test.go
Normal file
83
discovery/refresh/refresh_test.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package refresh
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
func TestRefresh(t *testing.T) {
|
||||
tg1 := []*targetgroup.Group{
|
||||
{
|
||||
Source: "tg",
|
||||
Targets: []model.LabelSet{
|
||||
{
|
||||
model.LabelName("t1"): model.LabelValue("v1"),
|
||||
},
|
||||
{
|
||||
model.LabelName("t2"): model.LabelValue("v2"),
|
||||
},
|
||||
},
|
||||
Labels: model.LabelSet{
|
||||
model.LabelName("l1"): model.LabelValue("lv1"),
|
||||
},
|
||||
},
|
||||
}
|
||||
tg2 := []*targetgroup.Group{
|
||||
{
|
||||
Source: "tg",
|
||||
},
|
||||
}
|
||||
|
||||
var i int
|
||||
refresh := func(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
i++
|
||||
switch i {
|
||||
case 1:
|
||||
return tg1, nil
|
||||
case 2:
|
||||
return tg2, nil
|
||||
}
|
||||
return nil, fmt.Errorf("some error")
|
||||
}
|
||||
interval := time.Millisecond
|
||||
d := NewDiscovery(nil, "test", interval, refresh)
|
||||
|
||||
ch := make(chan []*targetgroup.Group)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go d.Run(ctx, ch)
|
||||
|
||||
tg := <-ch
|
||||
testutil.Equals(t, tg1, tg)
|
||||
|
||||
tg = <-ch
|
||||
testutil.Equals(t, tg2, tg)
|
||||
|
||||
tick := time.NewTicker(2 * interval)
|
||||
defer tick.Stop()
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("Unexpected target group")
|
||||
case <-tick.C:
|
||||
}
|
||||
}
|
|
@ -17,6 +17,7 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
@ -24,12 +25,12 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/mwitkow/go-conntrack"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
conntrack "github.com/mwitkow/go-conntrack"
|
||||
"github.com/pkg/errors"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/prometheus/discovery/refresh"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
)
|
||||
|
||||
|
@ -41,27 +42,14 @@ const (
|
|||
tritonLabelMachineBrand = tritonLabel + "machine_brand"
|
||||
tritonLabelMachineImage = tritonLabel + "machine_image"
|
||||
tritonLabelServerID = tritonLabel + "server_id"
|
||||
namespace = "prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
refreshFailuresCount = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_sd_triton_refresh_failures_total",
|
||||
Help: "The number of Triton-SD scrape failures.",
|
||||
})
|
||||
refreshDuration = prometheus.NewSummary(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_sd_triton_refresh_duration_seconds",
|
||||
Help: "The duration of a Triton-SD refresh in seconds.",
|
||||
})
|
||||
// DefaultSDConfig is the default Triton SD configuration.
|
||||
DefaultSDConfig = SDConfig{
|
||||
Port: 9163,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
Version: 1,
|
||||
}
|
||||
)
|
||||
// DefaultSDConfig is the default Triton SD configuration.
|
||||
var DefaultSDConfig = SDConfig{
|
||||
Port: 9163,
|
||||
RefreshInterval: model.Duration(60 * time.Second),
|
||||
Version: 1,
|
||||
}
|
||||
|
||||
// SDConfig is the configuration for Triton based service discovery.
|
||||
type SDConfig struct {
|
||||
|
@ -84,27 +72,22 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if c.Account == "" {
|
||||
return fmt.Errorf("Triton SD configuration requires an account")
|
||||
return errors.New("triton SD configuration requires an account")
|
||||
}
|
||||
if c.DNSSuffix == "" {
|
||||
return fmt.Errorf("Triton SD configuration requires a dns_suffix")
|
||||
return errors.New("triton SD configuration requires a dns_suffix")
|
||||
}
|
||||
if c.Endpoint == "" {
|
||||
return fmt.Errorf("Triton SD configuration requires an endpoint")
|
||||
return errors.New("triton SD configuration requires an endpoint")
|
||||
}
|
||||
if c.RefreshInterval <= 0 {
|
||||
return fmt.Errorf("Triton SD configuration requires RefreshInterval to be a positive integer")
|
||||
return errors.New("triton SD configuration requires RefreshInterval to be a positive integer")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(refreshFailuresCount)
|
||||
prometheus.MustRegister(refreshDuration)
|
||||
}
|
||||
|
||||
// DiscoveryResponse models a JSON response from the Triton discovery.
|
||||
type DiscoveryResponse struct {
|
||||
type discoveryResponse struct {
|
||||
Containers []struct {
|
||||
Groups []string `json:"groups"`
|
||||
ServerUUID string `json:"server_uuid"`
|
||||
|
@ -118,18 +101,14 @@ type DiscoveryResponse struct {
|
|||
// Discovery periodically performs Triton-SD requests. It implements
|
||||
// the Discoverer interface.
|
||||
type Discovery struct {
|
||||
*refresh.Discovery
|
||||
client *http.Client
|
||||
interval time.Duration
|
||||
logger log.Logger
|
||||
sdConfig *SDConfig
|
||||
}
|
||||
|
||||
// New returns a new Discovery which periodically refreshes its targets.
|
||||
func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
|
||||
tls, err := config_util.NewTLSConfig(&conf.TLSConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -144,79 +123,55 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
|
|||
}
|
||||
client := &http.Client{Transport: transport}
|
||||
|
||||
return &Discovery{
|
||||
d := &Discovery{
|
||||
client: client,
|
||||
interval: time.Duration(conf.RefreshInterval),
|
||||
logger: logger,
|
||||
sdConfig: conf,
|
||||
}, nil
|
||||
}
|
||||
d.Discovery = refresh.NewDiscovery(
|
||||
logger,
|
||||
"triton",
|
||||
time.Duration(conf.RefreshInterval),
|
||||
d.refresh,
|
||||
)
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Run implements the Discoverer interface.
|
||||
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
|
||||
defer close(ch)
|
||||
|
||||
ticker := time.NewTicker(d.interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// Get an initial set right away.
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Refreshing targets failed", "err", err)
|
||||
} else {
|
||||
ch <- []*targetgroup.Group{tg}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
tg, err := d.refresh()
|
||||
if err != nil {
|
||||
level.Error(d.logger).Log("msg", "Refreshing targets failed", "err", err)
|
||||
} else {
|
||||
ch <- []*targetgroup.Group{tg}
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
||||
t0 := time.Now()
|
||||
defer func() {
|
||||
refreshDuration.Observe(time.Since(t0).Seconds())
|
||||
if err != nil {
|
||||
refreshFailuresCount.Inc()
|
||||
}
|
||||
}()
|
||||
|
||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||
var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
|
||||
if len(d.sdConfig.Groups) > 0 {
|
||||
groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ","))
|
||||
endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups)
|
||||
}
|
||||
|
||||
tg = &targetgroup.Group{
|
||||
tg := &targetgroup.Group{
|
||||
Source: endpoint,
|
||||
}
|
||||
|
||||
resp, err := d.client.Get(endpoint)
|
||||
req, err := http.NewRequest("GET", endpoint, nil)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("an error occurred when requesting targets from the discovery endpoint. %s", err)
|
||||
return nil, err
|
||||
}
|
||||
req = req.WithContext(ctx)
|
||||
resp, err := d.client.Do(req)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "an error occurred when requesting targets from the discovery endpoint")
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
defer func() {
|
||||
io.Copy(ioutil.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
}()
|
||||
|
||||
data, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("an error occurred when reading the response body. %s", err)
|
||||
return nil, errors.Wrap(err, "an error occurred when reading the response body")
|
||||
}
|
||||
|
||||
dr := DiscoveryResponse{}
|
||||
dr := discoveryResponse{}
|
||||
err = json.Unmarshal(data, &dr)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("an error occurred unmarshaling the disovery response json. %s", err)
|
||||
return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json")
|
||||
}
|
||||
|
||||
for _, container := range dr.Containers {
|
||||
|
@ -238,5 +193,5 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
tg.Targets = append(tg.Targets, labels)
|
||||
}
|
||||
|
||||
return tg, nil
|
||||
return []*targetgroup.Group{tg}, nil
|
||||
}
|
||||
|
|
|
@ -23,11 +23,10 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
)
|
||||
|
||||
|
@ -67,8 +66,12 @@ var (
|
|||
}
|
||||
)
|
||||
|
||||
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
|
||||
return New(nil, &c)
|
||||
}
|
||||
|
||||
func TestTritonSDNew(t *testing.T) {
|
||||
td, err := New(nil, &conf)
|
||||
td, err := newTritonDiscovery(conf)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, td != nil, "")
|
||||
testutil.Assert(t, td.client != nil, "")
|
||||
|
@ -81,13 +84,13 @@ func TestTritonSDNew(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTritonSDNewBadConfig(t *testing.T) {
|
||||
td, err := New(nil, &badconf)
|
||||
td, err := newTritonDiscovery(badconf)
|
||||
testutil.NotOk(t, err, "")
|
||||
testutil.Assert(t, td == nil, "")
|
||||
}
|
||||
|
||||
func TestTritonSDNewGroupsConfig(t *testing.T) {
|
||||
td, err := New(nil, &groupsconf)
|
||||
td, err := newTritonDiscovery(groupsconf)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, td != nil, "")
|
||||
testutil.Assert(t, td.client != nil, "")
|
||||
|
@ -100,33 +103,6 @@ func TestTritonSDNewGroupsConfig(t *testing.T) {
|
|||
testutil.Equals(t, groupsconf.Port, td.sdConfig.Port)
|
||||
}
|
||||
|
||||
func TestTritonSDRun(t *testing.T) {
|
||||
var (
|
||||
td, err = New(nil, &conf)
|
||||
ch = make(chan []*targetgroup.Group)
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
)
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, td != nil, "")
|
||||
|
||||
wait := make(chan struct{})
|
||||
go func() {
|
||||
td.Run(ctx, ch)
|
||||
close(wait)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-time.After(60 * time.Millisecond):
|
||||
// Expected.
|
||||
case tgs := <-ch:
|
||||
t.Fatalf("Unexpected target groups in triton discovery: %s", tgs)
|
||||
}
|
||||
|
||||
cancel()
|
||||
<-wait
|
||||
}
|
||||
|
||||
func TestTritonSDRefreshNoTargets(t *testing.T) {
|
||||
tgts := testTritonSDRefresh(t, "{\"containers\":[]}")
|
||||
testutil.Assert(t, tgts == nil, "")
|
||||
|
@ -160,48 +136,55 @@ func TestTritonSDRefreshMultipleTargets(t *testing.T) {
|
|||
|
||||
func TestTritonSDRefreshNoServer(t *testing.T) {
|
||||
var (
|
||||
td, err = New(nil, &conf)
|
||||
td, _ = newTritonDiscovery(conf)
|
||||
)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, td != nil, "")
|
||||
|
||||
tg, rerr := td.refresh()
|
||||
testutil.NotOk(t, rerr, "")
|
||||
testutil.Equals(t, strings.Contains(rerr.Error(), "an error occurred when requesting targets from the discovery endpoint."), true)
|
||||
testutil.Assert(t, tg != nil, "")
|
||||
testutil.Assert(t, tg.Targets == nil, "")
|
||||
_, err := td.refresh(context.Background())
|
||||
testutil.NotOk(t, err, "")
|
||||
testutil.Equals(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint"), true)
|
||||
}
|
||||
|
||||
func TestTritonSDRefreshCancelled(t *testing.T) {
|
||||
var (
|
||||
td, _ = newTritonDiscovery(conf)
|
||||
)
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
_, err := td.refresh(ctx)
|
||||
testutil.NotOk(t, err, "")
|
||||
testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
|
||||
}
|
||||
|
||||
func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet {
|
||||
var (
|
||||
td, err = New(nil, &conf)
|
||||
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
td, _ = newTritonDiscovery(conf)
|
||||
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
fmt.Fprintln(w, dstr)
|
||||
}))
|
||||
)
|
||||
|
||||
defer s.Close()
|
||||
|
||||
u, uperr := url.Parse(s.URL)
|
||||
testutil.Ok(t, uperr)
|
||||
u, err := url.Parse(s.URL)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, u != nil, "")
|
||||
|
||||
host, strport, sherr := net.SplitHostPort(u.Host)
|
||||
testutil.Ok(t, sherr)
|
||||
host, strport, err := net.SplitHostPort(u.Host)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, host != "", "")
|
||||
testutil.Assert(t, strport != "", "")
|
||||
|
||||
port, atoierr := strconv.Atoi(strport)
|
||||
testutil.Ok(t, atoierr)
|
||||
port, err := strconv.Atoi(strport)
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, port != 0, "")
|
||||
|
||||
td.sdConfig.Port = port
|
||||
|
||||
tgs, err := td.refresh(context.Background())
|
||||
testutil.Ok(t, err)
|
||||
testutil.Assert(t, td != nil, "")
|
||||
|
||||
tg, err := td.refresh()
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, 1, len(tgs))
|
||||
tg := tgs[0]
|
||||
testutil.Assert(t, tg != nil, "")
|
||||
|
||||
return tg.Targets
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/samuel/go-zookeeper/zk"
|
||||
|
||||
|
@ -58,14 +59,14 @@ func (c *ServersetSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
|
|||
return err
|
||||
}
|
||||
if len(c.Servers) == 0 {
|
||||
return fmt.Errorf("serverset SD config must contain at least one Zookeeper server")
|
||||
return errors.New("serverset SD config must contain at least one Zookeeper server")
|
||||
}
|
||||
if len(c.Paths) == 0 {
|
||||
return fmt.Errorf("serverset SD config must contain at least one path")
|
||||
return errors.New("serverset SD config must contain at least one path")
|
||||
}
|
||||
for _, path := range c.Paths {
|
||||
if !strings.HasPrefix(path, "/") {
|
||||
return fmt.Errorf("serverset SD config paths must begin with '/': %s", path)
|
||||
return errors.Errorf("serverset SD config paths must begin with '/': %s", path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
@ -87,14 +88,14 @@ func (c *NerveSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
|||
return err
|
||||
}
|
||||
if len(c.Servers) == 0 {
|
||||
return fmt.Errorf("nerve SD config must contain at least one Zookeeper server")
|
||||
return errors.New("nerve SD config must contain at least one Zookeeper server")
|
||||
}
|
||||
if len(c.Paths) == 0 {
|
||||
return fmt.Errorf("nerve SD config must contain at least one path")
|
||||
return errors.New("nerve SD config must contain at least one path")
|
||||
}
|
||||
for _, path := range c.Paths {
|
||||
if !strings.HasPrefix(path, "/") {
|
||||
return fmt.Errorf("nerve SD config paths must begin with '/': %s", path)
|
||||
return errors.Errorf("nerve SD config paths must begin with '/': %s", path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
@ -223,7 +224,7 @@ func parseServersetMember(data []byte, path string) (model.LabelSet, error) {
|
|||
member := serversetMember{}
|
||||
|
||||
if err := json.Unmarshal(data, &member); err != nil {
|
||||
return nil, fmt.Errorf("error unmarshaling serverset member %q: %s", path, err)
|
||||
return nil, errors.Wrapf(err, "error unmarshaling serverset member %q", path)
|
||||
}
|
||||
|
||||
labels := model.LabelSet{}
|
||||
|
@ -265,7 +266,7 @@ func parseNerveMember(data []byte, path string) (model.LabelSet, error) {
|
|||
member := nerveMember{}
|
||||
err := json.Unmarshal(data, &member)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error unmarshaling nerve member %q: %s", path, err)
|
||||
return nil, errors.Wrapf(err, "error unmarshaling nerve member %q", path)
|
||||
}
|
||||
|
||||
labels := model.LabelSet{}
|
||||
|
|
|
@ -22,7 +22,7 @@ An example rules file with an alert would be:
|
|||
groups:
|
||||
- name: example
|
||||
rules:
|
||||
- alert: HighErrorRate
|
||||
- alert: HighRequestLatency
|
||||
expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5
|
||||
for: 10m
|
||||
labels:
|
||||
|
@ -42,9 +42,11 @@ The `annotations` clause specifies a set of informational labels that can be use
|
|||
|
||||
#### Templating
|
||||
|
||||
Label and annotation values can be templated using [console templates](https://prometheus.io/docs/visualization/consoles).
|
||||
The `$labels` variable holds the label key/value pairs of an alert instance
|
||||
and `$value` holds the evaluated value of an alert instance.
|
||||
Label and annotation values can be templated using [console
|
||||
templates](https://prometheus.io/docs/visualization/consoles). The `$labels`
|
||||
variable holds the label key/value pairs of an alert instance. The configured
|
||||
external labels can be accessed via the `$externalLabels` variable. The
|
||||
`$value` variable holds the evaluated value of an alert instance.
|
||||
|
||||
# To insert a firing element's label values:
|
||||
{{ $labels.<labelname> }}
|
||||
|
|
|
@ -24,7 +24,7 @@ This will also reload any configured rule files.
|
|||
|
||||
To specify which configuration file to load, use the `--config.file` flag.
|
||||
|
||||
The file is written in [YAML format](http://en.wikipedia.org/wiki/YAML),
|
||||
The file is written in [YAML format](https://en.wikipedia.org/wiki/YAML),
|
||||
defined by the scheme described below.
|
||||
Brackets indicate that a parameter is optional. For non-list parameters the
|
||||
value is set to the specified default.
|
||||
|
@ -135,6 +135,16 @@ job_name: <job_name>
|
|||
# when a time series does not have a given label yet and are ignored otherwise.
|
||||
[ honor_labels: <boolean> | default = false ]
|
||||
|
||||
# honor_timestamps controls whether Prometheus respects the timestamps present
|
||||
# in scraped data.
|
||||
#
|
||||
# If honor_timestamps is set to "true", the timestamps of the metrics exposed
|
||||
# by the target will be used.
|
||||
#
|
||||
# If honor_timestamps is set to "false", the timestamps of the metrics exposed
|
||||
# by the target will be ignored.
|
||||
[ honor_timestamps: <boolean> | default = true ]
|
||||
|
||||
# Configures the protocol scheme used for requests.
|
||||
[ scheme: <scheme> | default = http ]
|
||||
|
||||
|
@ -246,7 +256,7 @@ A `tls_config` allows configuring TLS connections.
|
|||
[ key_file: <filename> ]
|
||||
|
||||
# ServerName extension to indicate the name of the server.
|
||||
# http://tools.ietf.org/html/rfc4366#section-3.1
|
||||
# https://tools.ietf.org/html/rfc4366#section-3.1
|
||||
[ server_name: <string> ]
|
||||
|
||||
# Disable validation of the server certificate.
|
||||
|
@ -264,9 +274,12 @@ The following meta labels are available on targets during relabeling:
|
|||
* `__meta_azure_machine_name`: the machine name
|
||||
* `__meta_azure_machine_os_type`: the machine operating system
|
||||
* `__meta_azure_machine_private_ip`: the machine's private IP
|
||||
* `__meta_azure_machine_public_ip`: the machine's public IP if it exists
|
||||
* `__meta_azure_machine_resource_group`: the machine's resource group
|
||||
* `__meta_azure_machine_tag_<tagname>`: each tag value of the machine
|
||||
* `__meta_azure_machine_scale_set`: the name of the scale set which the vm is part of (this value is only set if you are using a [scale set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/))
|
||||
* `__meta_azure_subscription_id`: the subscription ID
|
||||
* `__meta_azure_tenant_id`: the tenant ID
|
||||
|
||||
See below for the configuration options for Azure discovery:
|
||||
|
||||
|
@ -274,14 +287,18 @@ See below for the configuration options for Azure discovery:
|
|||
# The information to access the Azure API.
|
||||
# The Azure environment.
|
||||
[ environment: <string> | default = AzurePublicCloud ]
|
||||
# The subscription ID.
|
||||
|
||||
# The authentication method, either OAuth or ManagedIdentity.
|
||||
# See https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview
|
||||
[ authentication_method: <string> | default = OAuth]
|
||||
# The subscription ID. Always required.
|
||||
subscription_id: <string>
|
||||
# The tenant ID.
|
||||
tenant_id: <string>
|
||||
# The client ID.
|
||||
client_id: <string>
|
||||
# The client secret.
|
||||
client_secret: <secret>
|
||||
# Optional tenant ID. Only required with authentication_method OAuth.
|
||||
[ tenant_id: <string> ]
|
||||
# Optional client ID. Only required with authentication_method OAuth.
|
||||
[ client_id: <string> ]
|
||||
# Optional client secret. Only required with authentication_method OAuth.
|
||||
[ client_secret: <secret> ]
|
||||
|
||||
# Refresh interval to re-read the instance list.
|
||||
[ refresh_interval: <duration> | default = 300s ]
|
||||
|
@ -300,6 +317,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
|
|||
|
||||
* `__meta_consul_address`: the address of the target
|
||||
* `__meta_consul_dc`: the datacenter name for the target
|
||||
* `__meta_consul_tagged_address_<key>`: each node tagged address key value of the target
|
||||
* `__meta_consul_metadata_<key>`: each node metadata key value of the target
|
||||
* `__meta_consul_node`: the node name defined for the target
|
||||
* `__meta_consul_service_address`: the service address of the target
|
||||
|
@ -330,8 +348,9 @@ services:
|
|||
# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more
|
||||
# about the possible filters that can be used.
|
||||
|
||||
# An optional tag used to filter nodes for a given service.
|
||||
[ tag: <string> ]
|
||||
# An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list.
|
||||
tags:
|
||||
[ - <string> ]
|
||||
|
||||
# Node metadata used to filter nodes for a given service.
|
||||
[ node_meta:
|
||||
|
@ -340,7 +359,7 @@ services:
|
|||
# The string by which Consul tags are joined into the tag label.
|
||||
[ tag_separator: <string> | default = , ]
|
||||
|
||||
# Allow stale Consul results (see https://www.consul.io/api/index.html#consistency-modes). Will reduce load on Consul.
|
||||
# Allow stale Consul results (see https://www.consul.io/api/features/consistency.html). Will reduce load on Consul.
|
||||
[ allow_stale: <bool> ]
|
||||
|
||||
# The time after which the provided names are refreshed.
|
||||
|
@ -407,6 +426,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
|
|||
* `__meta_ec2_owner_id`: the ID of the AWS account that owns the EC2 instance
|
||||
* `__meta_ec2_platform`: the Operating System platform, set to 'windows' on Windows servers, absent otherwise
|
||||
* `__meta_ec2_primary_subnet_id`: the subnet ID of the primary network interface, if available
|
||||
* `__meta_ec2_private_dns_name`: the private DNS name of the instance, if available
|
||||
* `__meta_ec2_private_ip`: the private IP address of the instance, if present
|
||||
* `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available
|
||||
* `__meta_ec2_public_ip`: the public IP address of the instance, if available
|
||||
|
@ -484,14 +504,16 @@ interface.
|
|||
|
||||
The following meta labels are available on targets during [relabeling](#relabel_config):
|
||||
|
||||
* `__meta_openstack_address_pool`: the pool of the private IP.
|
||||
* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance.
|
||||
* `__meta_openstack_instance_id`: the OpenStack instance ID.
|
||||
* `__meta_openstack_instance_name`: the OpenStack instance name.
|
||||
* `__meta_openstack_instance_status`: the status of the OpenStack instance.
|
||||
* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance.
|
||||
* `__meta_openstack_public_ip`: the public IP of the OpenStack instance.
|
||||
* `__meta_openstack_private_ip`: the private IP of the OpenStack instance.
|
||||
* `__meta_openstack_address_pool`: the pool of the private IP.
|
||||
* `__meta_openstack_project_id`: the project (tenant) owning this instance.
|
||||
* `__meta_openstack_public_ip`: the public IP of the OpenStack instance.
|
||||
* `__meta_openstack_tag_<tagkey>`: each tag value of the instance.
|
||||
* `__meta_openstack_user_id`: the user account owning the tenant.
|
||||
|
||||
See below for the configuration options for OpenStack discovery:
|
||||
|
||||
|
@ -532,6 +554,17 @@ region: <string>
|
|||
[ project_name: <string> ]
|
||||
[ project_id: <string> ]
|
||||
|
||||
# The application_credential_id or application_credential_name fields are
|
||||
# required if using an application credential to authenticate. Some providers
|
||||
# allow you to create an application credential to authenticate rather than a
|
||||
# password.
|
||||
[ application_credential_name: <string> ]
|
||||
[ application_credential_id: <string> ]
|
||||
|
||||
# The application_credential_secret field is required if using an application
|
||||
# credential to authenticate.
|
||||
[ application_credential_secret: <secret> ]
|
||||
|
||||
# Whether the service discovery should list all instances for all projects.
|
||||
# It is only relevant for the 'instance' role and usually requires admin permissions.
|
||||
[ all_tenants: <boolean> | default: false ]
|
||||
|
@ -659,7 +692,7 @@ service account and place the credential file in one of the expected locations.
|
|||
### `<kubernetes_sd_config>`
|
||||
|
||||
Kubernetes SD configurations allow retrieving scrape targets from
|
||||
[Kubernetes'](http://kubernetes.io/) REST API and always staying synchronized with
|
||||
[Kubernetes'](https://kubernetes.io/) REST API and always staying synchronized with
|
||||
the cluster state.
|
||||
|
||||
One of the following `role` types can be configured to discover targets:
|
||||
|
@ -676,7 +709,9 @@ Available meta labels:
|
|||
|
||||
* `__meta_kubernetes_node_name`: The name of the node object.
|
||||
* `__meta_kubernetes_node_label_<labelname>`: Each label from the node object.
|
||||
* `__meta_kubernetes_node_labelpresent_<labelname>`: `true` for each label from the node object.
|
||||
* `__meta_kubernetes_node_annotation_<annotationname>`: Each annotation from the node object.
|
||||
* `__meta_kubernetes_node_annotationpresent_<annotationname>`: `true` for each annotation from the node object.
|
||||
* `__meta_kubernetes_node_address_<address_type>`: The first address for each node address type, if it exists.
|
||||
|
||||
In addition, the `instance` label for the node will be set to the node name
|
||||
|
@ -692,9 +727,13 @@ service port.
|
|||
Available meta labels:
|
||||
|
||||
* `__meta_kubernetes_namespace`: The namespace of the service object.
|
||||
* `__meta_kubernetes_service_annotation_<annotationname>`: Each annotation from the service object.
|
||||
* `__meta_kubernetes_service_annotationpresent_<annotationname>`: "true" for each annotation of the service object.
|
||||
* `__meta_kubernetes_service_cluster_ip`: The cluster IP address of the service. (Does not apply to services of type ExternalName)
|
||||
* `__meta_kubernetes_service_external_name`: The DNS name of the service. (Applies to services of type ExternalName)
|
||||
* `__meta_kubernetes_service_label_<labelname>`: Each label from the service object.
|
||||
* `__meta_kubernetes_service_labelpresent_<labelname>`: `true` for each label of the service object.
|
||||
* `__meta_kubernetes_service_name`: The name of the service object.
|
||||
* `__meta_kubernetes_service_label_<labelname>`: The label of the service object.
|
||||
* `__meta_kubernetes_service_annotation_<annotationname>`: The annotation of the service object.
|
||||
* `__meta_kubernetes_service_port_name`: Name of the service port for the target.
|
||||
* `__meta_kubernetes_service_port_number`: Number of the service port for the target.
|
||||
* `__meta_kubernetes_service_port_protocol`: Protocol of the service port for the target.
|
||||
|
@ -710,8 +749,11 @@ Available meta labels:
|
|||
* `__meta_kubernetes_namespace`: The namespace of the pod object.
|
||||
* `__meta_kubernetes_pod_name`: The name of the pod object.
|
||||
* `__meta_kubernetes_pod_ip`: The pod IP of the pod object.
|
||||
* `__meta_kubernetes_pod_label_<labelname>`: The label of the pod object.
|
||||
* `__meta_kubernetes_pod_annotation_<annotationname>`: The annotation of the pod object.
|
||||
* `__meta_kubernetes_pod_label_<labelname>`: Each label from the pod object.
|
||||
* `__meta_kubernetes_pod_labelpresent_<labelname>`: `true`for each label from the pod object.
|
||||
* `__meta_kubernetes_pod_annotation_<annotationname>`: Each annotation from the pod object.
|
||||
* `__meta_kubernetes_pod_annotationpresent_<annotationname>`: `true` for each annotation from the pod object.
|
||||
* `__meta_kubernetes_pod_container_init`: `true` if the container is an [InitContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/)
|
||||
* `__meta_kubernetes_pod_container_name`: Name of the container the target address points to.
|
||||
* `__meta_kubernetes_pod_container_port_name`: Name of the container port.
|
||||
* `__meta_kubernetes_pod_container_port_number`: Number of the container port.
|
||||
|
@ -737,6 +779,8 @@ Available meta labels:
|
|||
* `__meta_kubernetes_endpoints_name`: The names of the endpoints object.
|
||||
* For all targets discovered directly from the endpoints list (those not additionally inferred
|
||||
from underlying pods), the following labels are attached:
|
||||
* `__meta_kubernetes_endpoint_hostname`: Hostname of the endpoint.
|
||||
* `__meta_kubernetes_endpoint_node_name`: Name of the node hosting the endpoint.
|
||||
* `__meta_kubernetes_endpoint_ready`: Set to `true` or `false` for the endpoint's ready state.
|
||||
* `__meta_kubernetes_endpoint_port_name`: Name of the endpoint port.
|
||||
* `__meta_kubernetes_endpoint_port_protocol`: Protocol of the endpoint port.
|
||||
|
@ -755,8 +799,10 @@ Available meta labels:
|
|||
|
||||
* `__meta_kubernetes_namespace`: The namespace of the ingress object.
|
||||
* `__meta_kubernetes_ingress_name`: The name of the ingress object.
|
||||
* `__meta_kubernetes_ingress_label_<labelname>`: The label of the ingress object.
|
||||
* `__meta_kubernetes_ingress_annotation_<annotationname>`: The annotation of the ingress object.
|
||||
* `__meta_kubernetes_ingress_label_<labelname>`: Each label from the ingress object.
|
||||
* `__meta_kubernetes_ingress_labelpresent_<labelname>`: `true` for each label from the ingress object.
|
||||
* `__meta_kubernetes_ingress_annotation_<annotationname>`: Each annotation from the ingress object.
|
||||
* `__meta_kubernetes_ingress_annotationpresent_<annotationname>`: `true` for each annotation from the ingress object.
|
||||
* `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, `https` if TLS
|
||||
config is set. Defaults to `http`.
|
||||
* `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`.
|
||||
|
@ -791,6 +837,9 @@ basic_auth:
|
|||
# Optional bearer token file authentication information.
|
||||
[ bearer_token_file: <filename> ]
|
||||
|
||||
# Optional proxy URL.
|
||||
[ proxy_url: <string> ]
|
||||
|
||||
# TLS configuration.
|
||||
tls_config:
|
||||
[ <tls_config> ]
|
||||
|
@ -914,7 +963,7 @@ Serverset SD configurations allow retrieving scrape targets from [Serversets]
|
|||
(https://github.com/twitter/finagle/tree/master/finagle-serversets) which are
|
||||
stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are commonly
|
||||
used by [Finagle](https://twitter.github.io/finagle/) and
|
||||
[Aurora](http://aurora.apache.org/).
|
||||
[Aurora](https://aurora.apache.org/).
|
||||
|
||||
The following meta labels are available on targets during relabeling:
|
||||
|
||||
|
@ -974,7 +1023,7 @@ groups:
|
|||
# The port to use for discovery and metric scraping.
|
||||
[ port: <int> | default = 9163 ]
|
||||
|
||||
# The interval which should should be used for refreshing target containers.
|
||||
# The interval which should be used for refreshing target containers.
|
||||
[ refresh_interval: <duration> | default = 60s ]
|
||||
|
||||
# The Triton discovery API version.
|
||||
|
@ -1020,7 +1069,8 @@ Additional labels prefixed with `__meta_` may be available during the
|
|||
relabeling phase. They are set by the service discovery mechanism that provided
|
||||
the target and vary between mechanisms.
|
||||
|
||||
Labels starting with `__` will be removed from the label set after relabeling is completed.
|
||||
Labels starting with `__` will be removed from the label set after target
|
||||
relabeling is completed.
|
||||
|
||||
If a relabeling step needs to store a label value only temporarily (as the
|
||||
input to a subsequent relabeling step), use the `__tmp` label name prefix. This
|
||||
|
@ -1242,12 +1292,12 @@ queue_config:
|
|||
[ capacity: <int> | default = 10000 ]
|
||||
# Maximum number of shards, i.e. amount of concurrency.
|
||||
[ max_shards: <int> | default = 1000 ]
|
||||
# Minimum number of shards, i.e. amount of concurrency.
|
||||
[ min_shards: <int> | default = 1 ]
|
||||
# Maximum number of samples per send.
|
||||
[ max_samples_per_send: <int> | default = 100]
|
||||
# Maximum time a sample will wait in buffer.
|
||||
[ batch_send_deadline: <duration> | default = 5s ]
|
||||
# Maximum number of times to retry a batch on recoverable errors.
|
||||
[ max_retries: <int> | default = 3 ]
|
||||
# Initial retry delay. Gets doubled for every retry.
|
||||
[ min_backoff: <duration> | default = 30ms ]
|
||||
# Maximum retry delay.
|
||||
|
|
|
@ -9,7 +9,7 @@ Prometheus supports templating in the annotations and labels of alerts,
|
|||
as well as in served console pages. Templates have the ability to run
|
||||
queries against the local database, iterate over data, use conditionals,
|
||||
format data, etc. The Prometheus templating language is based on the [Go
|
||||
templating](http://golang.org/pkg/text/template/) system.
|
||||
templating](https://golang.org/pkg/text/template/) system.
|
||||
|
||||
## Simple alert field templates
|
||||
|
||||
|
@ -60,7 +60,7 @@ formatting of results, and linking to the [expression browser](https://prometheu
|
|||
|
||||
```go
|
||||
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" .Params.instance | query }}
|
||||
{{ . | first | value | humanize1024}}B
|
||||
{{ . | first | value | humanize1024 }}B
|
||||
{{ end }}
|
||||
```
|
||||
|
||||
|
@ -80,7 +80,7 @@ If accessed as `console.html?instance=hostname`, `.Params.instance` will evaluat
|
|||
<td>Transmitted</td>
|
||||
<td>{{ with printf "rate(node_network_transmit_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}</td>
|
||||
</tr>{{ end }}
|
||||
<table>
|
||||
</table>
|
||||
```
|
||||
|
||||
Here we iterate over all network devices and display the network traffic for each.
|
||||
|
|
|
@ -9,7 +9,7 @@ Prometheus supports templating in the annotations and labels of alerts,
|
|||
as well as in served console pages. Templates have the ability to run
|
||||
queries against the local database, iterate over data, use conditionals,
|
||||
format data, etc. The Prometheus templating language is based on the [Go
|
||||
templating](http://golang.org/pkg/text/template/) system.
|
||||
templating](https://golang.org/pkg/text/template/) system.
|
||||
|
||||
## Data Structures
|
||||
|
||||
|
@ -31,7 +31,7 @@ The metric name of the sample is encoded in a special `__name__` label in the `L
|
|||
## Functions
|
||||
|
||||
In addition to the [default
|
||||
functions](http://golang.org/pkg/text/template/#hdr-Functions) provided by Go
|
||||
functions](https://golang.org/pkg/text/template/#hdr-Functions) provided by Go
|
||||
templating, Prometheus provides functions for easier processing of query
|
||||
results in templates.
|
||||
|
||||
|
@ -53,9 +53,10 @@ If functions are used in a pipeline, the pipeline value is passed as the last ar
|
|||
|
||||
| Name | Arguments | Returns | Notes |
|
||||
| ------------- | --------------| --------| --------- |
|
||||
| humanize | number | string | Converts a number to a more readable format, using [metric prefixes](http://en.wikipedia.org/wiki/Metric_prefix).
|
||||
| humanize | number | string | Converts a number to a more readable format, using [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix).
|
||||
| humanize1024 | number | string | Like `humanize`, but uses 1024 as the base rather than 1000. |
|
||||
| humanizeDuration | number | string | Converts a duration in seconds to a more readable format. |
|
||||
| humanizePercentage | number | string | Converts a ratio value to a fraction of 100. |
|
||||
| humanizeTimestamp | number | string | Converts a Unix timestamp in seconds to a more readable format. |
|
||||
|
||||
Humanizing functions are intended to produce reasonable output for consumption
|
||||
|
@ -66,11 +67,11 @@ versions.
|
|||
|
||||
| Name | Arguments | Returns | Notes |
|
||||
| ------------- | ------------- | ------- | ----------- |
|
||||
| title | string | string | [strings.Title](http://golang.org/pkg/strings/#Title), capitalises first character of each word.|
|
||||
| toUpper | string | string | [strings.ToUpper](http://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.|
|
||||
| toLower | string | string | [strings.ToLower](http://golang.org/pkg/strings/#ToLower), converts all characters to lower case.|
|
||||
| match | pattern, text | boolean | [regexp.MatchString](http://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. |
|
||||
| reReplaceAll | pattern, replacement, text | string | [Regexp.ReplaceAllString](http://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. |
|
||||
| title | string | string | [strings.Title](https://golang.org/pkg/strings/#Title), capitalises first character of each word.|
|
||||
| toUpper | string | string | [strings.ToUpper](https://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.|
|
||||
| toLower | string | string | [strings.ToLower](https://golang.org/pkg/strings/#ToLower), converts all characters to lower case.|
|
||||
| match | pattern, text | boolean | [regexp.MatchString](https://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. |
|
||||
| reReplaceAll | pattern, replacement, text | string | [Regexp.ReplaceAllString](https://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. |
|
||||
| graphLink | expr | string | Returns path to graph view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. |
|
||||
| tableLink | expr | string | Returns path to tabular ("Console") view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. |
|
||||
|
||||
|
@ -89,8 +90,10 @@ parameterize templates, and have a few other differences.
|
|||
|
||||
### Alert field templates
|
||||
|
||||
`.Value` and `.Labels` contain the alert value and labels. They are also exposed
|
||||
as the `$value` and `$labels` variables for convenience.
|
||||
`.Value`, `.Labels`, and `ExternalLabels` contain the alert value, the alert
|
||||
labels, and the globally configured external labels, respectively. They are
|
||||
also exposed as the `$value`, `$labels`, and `$externalLabels` variables for
|
||||
convenience.
|
||||
|
||||
### Console templates
|
||||
|
||||
|
@ -98,13 +101,15 @@ Consoles are exposed on `/consoles/`, and sourced from the directory pointed to
|
|||
by the `-web.console.templates` flag.
|
||||
|
||||
Console templates are rendered with
|
||||
[html/template](http://golang.org/pkg/html/template/), which provides
|
||||
[html/template](https://golang.org/pkg/html/template/), which provides
|
||||
auto-escaping. To bypass the auto-escaping use the `safe*` functions.,
|
||||
|
||||
URL parameters are available as a map in `.Params`. To access multiple URL
|
||||
parameters by the same name, `.RawParams` is a map of the list values for each
|
||||
parameter. The URL path is available in `.Path`, excluding the `/consoles/`
|
||||
prefix.
|
||||
prefix. The globally configured external labels are available as
|
||||
`.ExternalLabels`. There are also convenience variables for all four:
|
||||
`$rawParams`, `$params`, `$path`, and `$externalLabels`.
|
||||
|
||||
Consoles also have access to all the templates defined with `{{define
|
||||
"templateName"}}...{{end}}` found in `*.lib` files in the directory pointed to
|
||||
|
|
|
@ -18,7 +18,7 @@ You can use `promtool` to test your rules.
|
|||
## Test file format
|
||||
|
||||
```yaml
|
||||
# This is a list of rule files to consider for testing.
|
||||
# This is a list of rule files to consider for testing. Globs are supported.
|
||||
rule_files:
|
||||
[ - <file_name> ]
|
||||
|
||||
|
|
|
@ -222,8 +222,7 @@ groups:
|
|||
expr: avg(rate(rpc_durations_seconds_count[5m])) by (job, service)
|
||||
```
|
||||
|
||||
To make Prometheus pick up this new rule, add a `rule_files` statement to the
|
||||
`global` configuration section in your `prometheus.yml`. The config should now
|
||||
To make Prometheus pick up this new rule, add a `rule_files` statement in your `prometheus.yml`. The config should now
|
||||
look like this:
|
||||
|
||||
```yaml
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
# todo: internal
|
||||
---
|
||||
|
||||
# Prometheus 2.0
|
||||
# Prometheus
|
||||
|
||||
Welcome to the documentation of the Prometheus server.
|
||||
|
||||
The documentation is available alongside all the project documentation at
|
||||
[prometheus.io](https://prometheus.io/docs/prometheus/2.0/).
|
||||
[prometheus.io](https://prometheus.io/docs/prometheus/latest/).
|
||||
|
||||
## Content
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ the respective repository.
|
|||
|
||||
All Prometheus services are available as Docker images on
|
||||
[Quay.io](https://quay.io/repository/prometheus/prometheus) or
|
||||
[Docker Hub[(https://hub.docker.com/u/prom/).
|
||||
[Docker Hub](https://hub.docker.com/u/prom/).
|
||||
|
||||
Running Prometheus on Docker is as simple as `docker run -p 9090:9090
|
||||
prom/prometheus`. This starts Prometheus with a sample
|
||||
|
|
|
@ -18,12 +18,16 @@ and one of the following HTTP response codes:
|
|||
|
||||
- `400 Bad Request` when parameters are missing or incorrect.
|
||||
- `422 Unprocessable Entity` when an expression can't be executed
|
||||
([RFC4918](http://tools.ietf.org/html/rfc4918#page-78)).
|
||||
([RFC4918](https://tools.ietf.org/html/rfc4918#page-78)).
|
||||
- `503 Service Unavailable` when queries time out or abort.
|
||||
|
||||
Other non-`2xx` codes may be returned for errors occurring before the API
|
||||
endpoint is reached.
|
||||
|
||||
An array of warnings may be returned if there are errors that do
|
||||
not inhibit the request execution. All of the data that was successfully
|
||||
collected will be returned in the data field.
|
||||
|
||||
The JSON response envelope format is as follows:
|
||||
|
||||
```
|
||||
|
@ -34,7 +38,11 @@ The JSON response envelope format is as follows:
|
|||
// Only set if status is "error". The data field may still hold
|
||||
// additional data.
|
||||
"errorType": "<string>",
|
||||
"error": "<string>"
|
||||
"error": "<string>",
|
||||
|
||||
// Only if there were warnings while executing the request.
|
||||
// There will still be data in the data field.
|
||||
"warnings": ["<string>"]
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -66,6 +74,7 @@ The following endpoint evaluates an instant query at a single point in time:
|
|||
|
||||
```
|
||||
GET /api/v1/query
|
||||
POST /api/v1/query
|
||||
```
|
||||
|
||||
URL query parameters:
|
||||
|
@ -77,6 +86,10 @@ URL query parameters:
|
|||
|
||||
The current server time is used if the `time` parameter is omitted.
|
||||
|
||||
You can URL-encode these parameters directly in the request body by using the `POST` method and
|
||||
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
|
||||
query that may breach server-side URL character limits.
|
||||
|
||||
The `data` section of the query result has the following format:
|
||||
|
||||
```
|
||||
|
@ -127,6 +140,7 @@ The following endpoint evaluates an expression query over a range of time:
|
|||
|
||||
```
|
||||
GET /api/v1/query_range
|
||||
POST /api/v1/query_range
|
||||
```
|
||||
|
||||
URL query parameters:
|
||||
|
@ -138,6 +152,10 @@ URL query parameters:
|
|||
- `timeout=<duration>`: Evaluation timeout. Optional. Defaults to and
|
||||
is capped by the value of the `-query.timeout` flag.
|
||||
|
||||
You can URL-encode these parameters directly in the request body by using the `POST` method and
|
||||
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
|
||||
query that may breach server-side URL character limits.
|
||||
|
||||
The `data` section of the query result has the following format:
|
||||
|
||||
```
|
||||
|
@ -197,6 +215,7 @@ The following endpoint returns the list of time series that match a certain labe
|
|||
|
||||
```
|
||||
GET /api/v1/series
|
||||
POST /api/v1/series
|
||||
```
|
||||
|
||||
URL query parameters:
|
||||
|
@ -206,6 +225,10 @@ URL query parameters:
|
|||
- `start=<rfc3339 | unix_timestamp>`: Start timestamp.
|
||||
- `end=<rfc3339 | unix_timestamp>`: End timestamp.
|
||||
|
||||
You can URL-encode these parameters directly in the request body by using the `POST` method and
|
||||
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
|
||||
or dynamic number of series selectors that may breach server-side URL character limits.
|
||||
|
||||
The `data` section of the query result consists of a list of objects that
|
||||
contain the label name/value pairs which identify each series.
|
||||
|
||||
|
@ -213,7 +236,7 @@ The following example returns all series that match either of the selectors
|
|||
`up` or `process_start_time_seconds{job="prometheus"}`:
|
||||
|
||||
```json
|
||||
$ curl -g 'http://localhost:9090/api/v1/series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}'
|
||||
$ curl -g 'http://localhost:9090/api/v1/series?' --data-urlencode='match[]=up' --data-urlencode='match[]=process_start_time_seconds{job="prometheus"}'
|
||||
{
|
||||
"status" : "success",
|
||||
"data" : [
|
||||
|
@ -236,6 +259,49 @@ $ curl -g 'http://localhost:9090/api/v1/series?match[]=up&match[]=process_start_
|
|||
}
|
||||
```
|
||||
|
||||
### Getting label names
|
||||
|
||||
The following endpoint returns a list of label names:
|
||||
|
||||
```
|
||||
GET /api/v1/labels
|
||||
POST /api/v1/labels
|
||||
```
|
||||
|
||||
The `data` section of the JSON response is a list of string label names.
|
||||
|
||||
Here is an example.
|
||||
|
||||
```json
|
||||
$ curl 'localhost:9090/api/v1/labels'
|
||||
{
|
||||
"status": "success",
|
||||
"data": [
|
||||
"__name__",
|
||||
"call",
|
||||
"code",
|
||||
"config",
|
||||
"dialer_name",
|
||||
"endpoint",
|
||||
"event",
|
||||
"goversion",
|
||||
"handler",
|
||||
"instance",
|
||||
"interval",
|
||||
"job",
|
||||
"le",
|
||||
"listener_name",
|
||||
"name",
|
||||
"quantile",
|
||||
"reason",
|
||||
"role",
|
||||
"scrape_job",
|
||||
"slice",
|
||||
"version"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Querying label values
|
||||
|
||||
The following endpoint returns a list of label values for a provided label name:
|
||||
|
@ -244,7 +310,7 @@ The following endpoint returns a list of label values for a provided label name:
|
|||
GET /api/v1/label/<label_name>/values
|
||||
```
|
||||
|
||||
The `data` section of the JSON response is a list of string label names.
|
||||
The `data` section of the JSON response is a list of string label values.
|
||||
|
||||
This example queries for all label values for the `job` label:
|
||||
|
||||
|
@ -399,7 +465,7 @@ $ curl http://localhost:9090/api/v1/rules
|
|||
"severity": "page"
|
||||
},
|
||||
"state": "firing",
|
||||
"value": 1
|
||||
"value": "1e+00"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
|
@ -456,7 +522,7 @@ $ curl http://localhost:9090/api/v1/alerts
|
|||
"alertname": "my-alert"
|
||||
},
|
||||
"state": "firing",
|
||||
"value": 1
|
||||
"value": "1e+00"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
@ -642,9 +708,14 @@ Snapshot creates a snapshot of all current data into `snapshots/<datetime>-<rand
|
|||
It will optionally skip snapshotting data that is only present in the head block, and which has not yet been compacted to disk.
|
||||
|
||||
```
|
||||
POST /api/v1/admin/tsdb/snapshot?skip_head=<bool>
|
||||
POST /api/v1/admin/tsdb/snapshot
|
||||
PUT /api/v1/admin/tsdb/snapshot
|
||||
```
|
||||
|
||||
URL query parameters:
|
||||
|
||||
- `skip_head=<bool>`: Skip data present in the head block. Optional.
|
||||
|
||||
```json
|
||||
$ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot
|
||||
{
|
||||
|
@ -654,10 +725,9 @@ $ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot
|
|||
}
|
||||
}
|
||||
```
|
||||
|
||||
The snapshot now exists at `<data-dir>/snapshots/20171210T211224Z-2be650b6d019eb54`
|
||||
|
||||
*New in v2.1*
|
||||
*New in v2.1 and supports PUT from v2.9*
|
||||
|
||||
### Delete Series
|
||||
DeleteSeries deletes data for a selection of series in a time range. The actual data still exists on disk and is cleaned up in future compactions or can be explicitly cleaned up by hitting the Clean Tombstones endpoint.
|
||||
|
@ -666,6 +736,7 @@ If successful, a `204` is returned.
|
|||
|
||||
```
|
||||
POST /api/v1/admin/tsdb/delete_series
|
||||
PUT /api/v1/admin/tsdb/delete_series
|
||||
```
|
||||
|
||||
URL query parameters:
|
||||
|
@ -682,7 +753,7 @@ Example:
|
|||
$ curl -X POST \
|
||||
-g 'http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}'
|
||||
```
|
||||
*New in v2.1*
|
||||
*New in v2.1 and supports PUT from v2.9*
|
||||
|
||||
### Clean Tombstones
|
||||
CleanTombstones removes the deleted data from disk and cleans up the existing tombstones. This can be used after deleting series to free up space.
|
||||
|
@ -691,6 +762,7 @@ If successful, a `204` is returned.
|
|||
|
||||
```
|
||||
POST /api/v1/admin/tsdb/clean_tombstones
|
||||
PUT /api/v1/admin/tsdb/clean_tombstones
|
||||
```
|
||||
|
||||
This takes no parameters or body.
|
||||
|
@ -699,4 +771,4 @@ This takes no parameters or body.
|
|||
$ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/clean_tombstones
|
||||
```
|
||||
|
||||
*New in v2.1*
|
||||
*New in v2.1 and supports PUT from v2.9*
|
||||
|
|
|
@ -6,10 +6,11 @@ sort_rank: 1
|
|||
|
||||
# Querying Prometheus
|
||||
|
||||
Prometheus provides a functional expression language that lets the user select
|
||||
and aggregate time series data in real time. The result of an expression can
|
||||
either be shown as a graph, viewed as tabular data in Prometheus's expression
|
||||
browser, or consumed by external systems via the [HTTP API](api.md).
|
||||
Prometheus provides a functional query language called PromQL (Prometheus Query
|
||||
Language) that lets the user select and aggregate time series data in real
|
||||
time. The result of an expression can either be shown as a graph, viewed as
|
||||
tabular data in Prometheus's expression browser, or consumed by external
|
||||
systems via the [HTTP API](api.md).
|
||||
|
||||
## Examples
|
||||
|
||||
|
@ -87,8 +88,8 @@ against regular expressions. The following label matching operators exist:
|
|||
|
||||
* `=`: Select labels that are exactly equal to the provided string.
|
||||
* `!=`: Select labels that are not equal to the provided string.
|
||||
* `=~`: Select labels that regex-match the provided string (or substring).
|
||||
* `!~`: Select labels that do not regex-match the provided string (or substring).
|
||||
* `=~`: Select labels that regex-match the provided string.
|
||||
* `!~`: Select labels that do not regex-match the provided string.
|
||||
|
||||
For example, this selects all `http_requests_total` time series for `staging`,
|
||||
`testing`, and `development` environments and HTTP methods other than `GET`.
|
||||
|
@ -169,6 +170,14 @@ The same works for range vectors. This returns the 5-minutes rate that
|
|||
|
||||
rate(http_requests_total[5m] offset 1w)
|
||||
|
||||
## Subquery
|
||||
|
||||
Subquery allows you to run an instant query for a given range and resolution. The result of a subquery is a range vector.
|
||||
|
||||
Syntax: `<instant_query> '[' <range> ':' [<resolution>] ']' [ offset <duration> ]`
|
||||
|
||||
* `<resolution>` is optional. Default is the global evaluation interval.
|
||||
|
||||
## Operators
|
||||
|
||||
Prometheus supports many binary and aggregation operators. These are described
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue