Merge pull request #5681 from prometheus/beorn7/mixin

Merge master into mixin
This commit is contained in:
Björn Rabenstein 2019-06-19 23:17:41 +02:00 committed by GitHub
commit 498d31e178
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2393 changed files with 444593 additions and 93496 deletions

View file

@ -6,17 +6,28 @@ executors:
# should also be updated. # should also be updated.
golang: golang:
docker: docker:
- image: circleci/golang:1.11 - image: circleci/golang:1.12
jobs: jobs:
test: test:
executor: golang executor: golang
resource_class: large
steps: steps:
- checkout - checkout
- run: make promu - run: make promu
- run: make check_license style unused staticcheck build check_assets - run:
command: make check_license style unused lint build check_assets
environment:
# Run garbage collection more aggresively to avoid getting OOMed during the lint phase.
GOGC: "20"
- run:
command: |
curl -s -L https://github.com/protocolbuffers/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip > /tmp/protoc.zip
unzip -d /tmp /tmp/protoc.zip
chmod +x /tmp/bin/protoc
echo 'export PATH=/tmp/bin:$PATH' >> $BASH_ENV
source $BASH_ENV
make proto
- run: git diff --exit-code - run: git diff --exit-code
- store_artifacts: - store_artifacts:
path: prometheus path: prometheus
@ -46,11 +57,11 @@ jobs:
steps: steps:
- checkout - checkout
- setup_remote_docker - setup_remote_docker:
version: 18.06.0-ce
- run: docker run --privileged linuxkit/binfmt:v0.6
- attach_workspace: - attach_workspace:
at: . at: .
- run: ln -s .build/linux-amd64/prometheus prometheus
- run: ln -s .build/linux-amd64/promtool promtool
- run: make docker - run: make docker
- run: make docker DOCKER_REPO=quay.io/prometheus - run: make docker DOCKER_REPO=quay.io/prometheus
- run: docker images - run: docker images
@ -58,16 +69,17 @@ jobs:
- run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io - run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io
- run: make docker-publish - run: make docker-publish
- run: make docker-publish DOCKER_REPO=quay.io/prometheus - run: make docker-publish DOCKER_REPO=quay.io/prometheus
- run: make docker-manifest
- run: make docker-manifest DOCKER_REPO=quay.io/prometheus
docker_hub_release_tags: docker_hub_release_tags:
executor: golang executor: golang
steps: steps:
- checkout - checkout
- setup_remote_docker - setup_remote_docker:
- run: mkdir -v -p ${HOME}/bin version: 18.06.0-ce
- run: curl -L 'https://github.com/aktau/github-release/releases/download/v0.7.2/linux-amd64-github-release.tar.bz2' | tar xvjf - --strip-components 3 -C ${HOME}/bin - run: docker run --privileged linuxkit/binfmt:v0.6
- run: echo 'export PATH=${HOME}/bin:${PATH}' >> ${BASH_ENV}
- attach_workspace: - attach_workspace:
at: . at: .
- run: make promu - run: make promu
@ -77,19 +89,29 @@ jobs:
- store_artifacts: - store_artifacts:
path: .tarballs path: .tarballs
destination: releases destination: releases
- run: ln -s .build/linux-amd64/prometheus prometheus
- run: ln -s .build/linux-amd64/promtool promtool
- run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG - run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG
- run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG DOCKER_REPO=quay.io/prometheus - run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG DOCKER_REPO=quay.io/prometheus
- run: docker login -u $DOCKER_LOGIN -p $DOCKER_PASSWORD - run: docker login -u $DOCKER_LOGIN -p $DOCKER_PASSWORD
- run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io - run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io
- run: make docker-publish DOCKER_IMAGE_TAG="$CIRCLE_TAG"
- run: make docker-publish DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus
- run: make docker-manifest DOCKER_IMAGE_TAG="$CIRCLE_TAG"
- run: make docker-manifest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus
- run: | - run: |
if [[ "$CIRCLE_TAG" =~ ^v[0-9]+(\.[0-9]+){2}$ ]]; then if [[ "$CIRCLE_TAG" =~ ^v[0-9]+(\.[0-9]+){2}$ ]]; then
make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG" make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG"
make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus
make docker-publish DOCKER_IMAGE_TAG="latest"
make docker-publish DOCKER_IMAGE_TAG="latest" DOCKER_REPO=quay.io/prometheus
make docker-manifest DOCKER_IMAGE_TAG="latest"
make docker-manifest DOCKER_IMAGE_TAG="latest" DOCKER_REPO=quay.io/prometheus
fi fi
- run: make docker-publish
- run: make docker-publish DOCKER_REPO=quay.io/prometheus makefile_sync:
executor: golang
steps:
- checkout
- run: ./scripts/sync_makefiles.sh
workflows: workflows:
version: 2 version: 2
@ -104,6 +126,7 @@ workflows:
tags: tags:
only: /.*/ only: /.*/
- docker_hub_master: - docker_hub_master:
context: org-context
requires: requires:
- test - test
- build - build
@ -111,6 +134,7 @@ workflows:
branches: branches:
only: master only: master
- docker_hub_release_tags: - docker_hub_release_tags:
context: org-context
requires: requires:
- test - test
- build - build
@ -119,3 +143,14 @@ workflows:
only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/ only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/
branches: branches:
ignore: /.*/ ignore: /.*/
nightly:
triggers:
- schedule:
cron: "0 0 * * *"
filters:
branches:
only:
- master
jobs:
- makefile_sync:
context: org-context

View file

@ -3,3 +3,5 @@ data/
.tarballs/ .tarballs/
!.build/linux-amd64/ !.build/linux-amd64/
!.build/linux-armv7/
!.build/linux-arm64/

35
.github/lock.yml vendored Normal file
View file

@ -0,0 +1,35 @@
# Configuration for Lock Threads - https://github.com/dessant/lock-threads
# Number of days of inactivity before a closed issue or pull request is locked
daysUntilLock: 180
# Skip issues and pull requests created before a given timestamp. Timestamp must
# follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable
skipCreatedBefore: false
# Issues and pull requests with these labels will be ignored. Set to `[]` to disable
exemptLabels: []
# Label to add before locking, such as `outdated`. Set to `false` to disable
lockLabel: false
# Comment to post before locking. Set to `false` to disable
lockComment: false
# Assign `resolved` as the reason for locking. Set to `false` to disable
setLockReason: false
# Limit to only `issues` or `pulls`
only: issues
# Optionally, specify configuration settings just for `issues` or `pulls`
# issues:
# exemptLabels:
# - help-wanted
# lockLabel: outdated
# pulls:
# daysUntilLock: 30
# Repository to extend settings from
# _extends: repo

7
.gitignore vendored
View file

@ -1,9 +1,7 @@
*# *#
.#* .#*
*-stamp
/*.yaml /*.yaml
/*.yml /*.yml
/*.rules
*.exe *.exe
/prometheus /prometheus
@ -12,12 +10,9 @@ benchmark.txt
/data /data
/cmd/prometheus/data /cmd/prometheus/data
/cmd/prometheus/debug /cmd/prometheus/debug
/.build
/.release
/.tarballs
!/circle.yml
!/.travis.yml !/.travis.yml
!/.promu.yml !/.promu.yml
!/.golangci.yml
/documentation/examples/remote_storage/remote_storage_adapter/remote_storage_adapter /documentation/examples/remote_storage/remote_storage_adapter/remote_storage_adapter
/documentation/examples/remote_storage/example_write_adapter/example_writer_adapter /documentation/examples/remote_storage/example_write_adapter/example_writer_adapter

13
.golangci.yml Normal file
View file

@ -0,0 +1,13 @@
run:
modules-download-mode: vendor
deadline: 5m
issues:
exclude-rules:
- path: _test.go
linters:
- errcheck
linters-settings:
errcheck:
exclude: scripts/errcheck_excludes.txt

View file

@ -1,7 +1,7 @@
go: go:
# Whenever the Go version is updated here, .travis.yml and # Whenever the Go version is updated here, .travis.yml and
# .circle/config.yml should also be updated. # .circle/config.yml should also be updated.
version: 1.11 version: 1.12
repository: repository:
path: github.com/prometheus/prometheus path: github.com/prometheus/prometheus
build: build:
@ -12,11 +12,11 @@ build:
path: ./cmd/promtool path: ./cmd/promtool
flags: -mod=vendor -a -tags netgo flags: -mod=vendor -a -tags netgo
ldflags: | ldflags: |
-X {{repoPath}}/vendor/github.com/prometheus/common/version.Version={{.Version}} -X github.com/prometheus/common/version.Version={{.Version}}
-X {{repoPath}}/vendor/github.com/prometheus/common/version.Revision={{.Revision}} -X github.com/prometheus/common/version.Revision={{.Revision}}
-X {{repoPath}}/vendor/github.com/prometheus/common/version.Branch={{.Branch}} -X github.com/prometheus/common/version.Branch={{.Branch}}
-X {{repoPath}}/vendor/github.com/prometheus/common/version.BuildUser={{user}}@{{host}} -X github.com/prometheus/common/version.BuildUser={{user}}@{{host}}
-X {{repoPath}}/vendor/github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} -X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}}
tarball: tarball:
files: files:
- consoles - consoles

View file

@ -1,14 +1,18 @@
sudo: false
language: go language: go
# Whenever the Go version is updated here, .circleci/config.yml and .promu.yml # Whenever the Go version is updated here, .circleci/config.yml and .promu.yml
# should also be updated. # should also be updated.
go: go:
- 1.11.x - 1.12.x
go_import_path: github.com/prometheus/prometheus go_import_path: github.com/prometheus/prometheus
# This ensures that the local cache is filled before running the CI.
# travis_retry retries the command 3 times if it fails as we've experienced
# random issues on Travis.
before_install:
- travis_retry make deps
script: script:
- make check_license style unused test staticcheck check_assets - make check_license style unused test lint check_assets
- git diff --exit-code - git diff --exit-code

View file

@ -1,3 +1,185 @@
## 2.10.0 / 2019-05-25
* [CHANGE/BUGFIX] API: Encode alert values as string to correctly represent Inf/NaN. #5582
* [FEATURE] Template expansion: Make external labels available as `$externalLabels` in alert and console template expansion. #5463
* [FEATURE] TSDB: Add `prometheus_tsdb_wal_segment_current` metric for the WAL segment index that TSDB is currently writing to. tsdb#601
* [FEATURE] Scrape: Add `scrape_series_added` per-scrape metric. #5546
* [ENHANCEMENT] Discovery/kubernetes: Add labels `__meta_kubernetes_endpoint_node_name` and `__meta_kubernetes_endpoint_hostname`. #5571
* [ENHANCEMENT] Discovery/azure: Add label `__meta_azure_machine_public_ip`. #5475
* [ENHANCEMENT] TSDB: Simplify mergedPostings.Seek, resulting in better performance if there are many posting lists. tsdb#595
* [ENHANCEMENT] Log filesystem type on startup. #5558
* [ENHANCEMENT] Cmd/promtool: Use POST requests for Query and QueryRange. client_golang#557
* [ENHANCEMENT] Web: Sort alerts by group name. #5448
* [ENHANCEMENT] Console templates: Add convenience variables `$rawParams`, `$params`, `$path`. #5463
* [BUGFIX] TSDB: Don't panic when running out of disk space and recover nicely from the condition. tsdb#582
* [BUGFIX] TSDB: Correctly handle empty labels. tsdb#594
* [BUGFIX] TSDB: Don't crash on an unknown tombstone reference. tsdb#604
* [BUGFIX] Storage/remote: Remove queue-manager specific metrics if queue no longer exists. #5445 #5485 #5555
* [BUGFIX] PromQL: Correctly display `{__name__="a"}`. #5552
* [BUGFIX] Discovery/kubernetes: Use `service` rather than `ingress` as the name for the service workqueue. #5520
* [BUGFIX] Discovery/azure: Don't panic on a VM with a public IP. #5587
* [BUGFIX] Discovery/triton: Always read HTTP body to completion. #5596
* [BUGFIX] Web: Fixed Content-Type for js and css instead of using `/etc/mime.types`. #5551
## 2.9.2 / 2019-04-24
* [BUGFIX] Make sure subquery range is taken into account for selection #5467
* [BUGFIX] Exhaust every request body before closing it #5166
* [BUGFIX] Cmd/promtool: return errors from rule evaluations #5483
* [BUGFIX] Remote Storage: string interner should not panic in release #5487
* [BUGFIX] Fix memory allocation regression in mergedPostings.Seek tsdb#586
## 2.9.1 / 2019-04-16
* [BUGFIX] Discovery/kubernetes: fix missing label sanitization #5462
* [BUGFIX] Remote_write: Prevent reshard concurrent with calling stop #5460
## 2.9.0 / 2019-04-15
This releases uses Go 1.12, which includes a change in how memory is released
to Linux. This will cause RSS to be reported as higher, however this is harmless
and the memory is available to the kernel when it needs it.
* [CHANGE/ENHANCEMENT] Update Consul to support catalog.ServiceMultipleTags. #5151
* [FEATURE] Add honor_timestamps scrape option. #5304
* [ENHANCEMENT] Discovery/kubernetes: add present labels for labels/annotations. #5443
* [ENHANCEMENT] OpenStack SD: Add ProjectID and UserID meta labels. #5431
* [ENHANCEMENT] Add GODEBUG and retention to the runtime page. #5324 #5322
* [ENHANCEMENT] Add support for POSTing to /series endpoint. #5422
* [ENHANCEMENT] Support PUT methods for Lifecycle and Admin APIs. #5376
* [ENHANCEMENT] Scrape: Add global jitter for HA server. #5181
* [ENHANCEMENT] Check for cancellation on every step of a range evaluation. #5131
* [ENHANCEMENT] String interning for labels & values in the remote_write path. #5316
* [ENHANCEMENT] Don't lose the scrape cache on a failed scrape. #5414
* [ENHANCEMENT] Reload cert files from disk automatically. common#173
* [ENHANCEMENT] Use fixed length millisecond timestamp format for logs. common#172
* [ENHANCEMENT] Performance improvements for postings. tsdb#509 tsdb#572
* [BUGFIX] Remote Write: fix checkpoint reading. #5429
* [BUGFIX] Check if label value is valid when unmarshaling external labels from YAML. #5316
* [BUGFIX] Promparse: sort all labels when parsing. #5372
* [BUGFIX] Reload rules: copy state on both name and labels. #5368
* [BUGFIX] Exponentation operator to drop metric name in result of operation. #5329
* [BUGFIX] Config: resolve more file paths. #5284
* [BUGFIX] Promtool: resolve relative paths in alert test files. #5336
* [BUGFIX] Set TLSHandshakeTimeout in HTTP transport. common#179
* [BUGFIX] Use fsync to be more resilient to machine crashes. tsdb#573 tsdb#578
* [BUGFIX] Keep series that are still in WAL in checkpoints. tsdb#577
* [BUGFIX] Fix output sample values for scalar-to-vector comparison operations. #5454
## 2.8.1 / 2019-03-28
* [BUGFIX] Display the job labels in `/targets` which was removed accidentally. #5406
## 2.8.0 / 2019-03-12
This release uses Write-Ahead Logging (WAL) for the remote_write API. This currently causes a slight increase in memory usage, which will be addressed in future releases.
* [CHANGE] Default time retention is used only when no size based retention is specified. These are flags where time retention is specified by the flag `--storage.tsdb.retention` and size retention by `--storage.tsdb.retention.size`. #5216
* [CHANGE] `prometheus_tsdb_storage_blocks_bytes_total` is now `prometheus_tsdb_storage_blocks_bytes`. prometheus/tsdb#506
* [FEATURE] [EXPERIMENTAL] Time overlapping blocks are now allowed; vertical compaction and vertical query merge. It is an optional feature which is controlled by the `--storage.tsdb.allow-overlapping-blocks` flag, disabled by default. prometheus/tsdb#370
* [ENHANCEMENT] Use the WAL for remote_write API. #4588
* [ENHANCEMENT] Query performance improvements. prometheus/tsdb#531
* [ENHANCEMENT] UI enhancements with upgrade to Bootstrap 4. #5226
* [ENHANCEMENT] Reduce time that Alertmanagers are in flux when reloaded. #5126
* [ENHANCEMENT] Limit number of metrics displayed on UI to 10000. #5139
* [ENHANCEMENT] (1) Remember All/Unhealthy choice on target-overview when reloading page. (2) Resize text-input area on Graph page on mouseclick. #5201
* [ENHANCEMENT] In `histogram_quantile` merge buckets with equivalent le values. #5158.
* [ENHANCEMENT] Show list of offending labels in the error message in many-to-many scenarios. #5189
* [ENHANCEMENT] Show `Storage Retention` criteria in effect on `/status` page. #5322
* [BUGFIX] Fix sorting of rule groups. #5260
* [BUGFIX] Fix support for password_file and bearer_token_file in Kubernetes SD. #5211
* [BUGFIX] Scrape: catch errors when creating HTTP clients #5182. Adds new metrics:
* `prometheus_target_scrape_pools_total`
* `prometheus_target_scrape_pools_failed_total`
* `prometheus_target_scrape_pool_reloads_total`
* `prometheus_target_scrape_pool_reloads_failed_total`
* [BUGFIX] Fix panic when aggregator param is not a literal. #5290
## 2.7.2 / 2019-03-02
* [BUGFIX] `prometheus_rule_group_last_evaluation_timestamp_seconds` is now a unix timestamp. #5186
## 2.7.1 / 2019-01-31
This release has a fix for a Stored DOM XSS vulnerability that can be triggered when using the query history functionality. Thanks to Dor Tumarkin from Checkmarx for reporting it.
* [BUGFIX/SECURITY] Fix a Stored DOM XSS vulnerability with query history. #5163
* [BUGFIX] `prometheus_rule_group_last_duration_seconds` now reports seconds instead of nanoseconds. #5153
* [BUGFIX] Make sure the targets are consistently sorted in the targets page. #5161
## 2.7.0 / 2019-01-28
We're rolling back the Dockerfile changes introduced in 2.6.0. If you made changes to your docker deployment in 2.6.0, you will need to roll them back. This release also adds experimental support for disk size based retention. To accommodate that we are deprecating the flag `storage.tsdb.retention` in favour of `storage.tsdb.retention.time`. We print a warning if the flag is in use, but it will function without breaking until Prometheus 3.0.
* [CHANGE] Rollback Dockerfile to version at 2.5.0. Rollback of the breaking change introduced in 2.6.0. #5122
* [FEATURE] Add subqueries to PromQL. #4831
* [FEATURE] [EXPERIMENTAL] Add support for disk size based retention. Note that we don't consider the WAL size which could be significant and the time based retention policy also applies. #5109 prometheus/tsdb#343
* [FEATURE] Add CORS origin flag. #5011
* [ENHANCEMENT] Consul SD: Add tagged address to the discovery metadata. #5001
* [ENHANCEMENT] Kubernetes SD: Add service external IP and external name to the discovery metadata. #4940
* [ENHANCEMENT] Azure SD: Add support for Managed Identity authentication. #4590
* [ENHANCEMENT] Azure SD: Add tenant and subscription IDs to the discovery metadata. #4969
* [ENHANCEMENT] OpenStack SD: Add support for application credentials based authentication. #4968
* [ENHANCEMENT] Add metric for number of rule groups loaded. #5090
* [BUGFIX] Avoid duplicate tests for alert unit tests. #4964
* [BUGFIX] Don't depend on given order when comparing samples in alert unit testing. #5049
* [BUGFIX] Make sure the retention period doesn't overflow. #5112
* [BUGFIX] Make sure the blocks don't get very large. #5112
* [BUGFIX] Don't generate blocks with no samples. prometheus/tsdb#374
* [BUGFIX] Reintroduce metric for WAL corruptions. prometheus/tsdb#473
## 2.6.1 / 2019-01-15
* [BUGFIX] Azure SD: Fix discovery getting stuck sometimes. #5088
* [BUGFIX] Marathon SD: Use `Tasks.Ports` when `RequirePorts` is `false`. #5026
* [BUGFIX] Promtool: Fix "out-of-order sample" errors when testing rules. #5069
## 2.6.0 / 2018-12-17
* [CHANGE] Remove default flags from the container's entrypoint, run Prometheus from `/etc/prometheus` and symlink the storage directory to `/etc/prometheus/data`. #4976
* [CHANGE] Promtool: Remove the `update` command. #3839
* [FEATURE] Add JSON log format via the `--log.format` flag. #4876
* [FEATURE] API: Add /api/v1/labels endpoint to get all label names. #4835
* [FEATURE] Web: Allow setting the page's title via the `--web.ui-title` flag. #4841
* [ENHANCEMENT] Add `prometheus_tsdb_lowest_timestamp_seconds`, `prometheus_tsdb_head_min_time_seconds` and `prometheus_tsdb_head_max_time_seconds` metrics. #4888
* [ENHANCEMENT] Add `rule_group_last_evaluation_timestamp_seconds` metric. #4852
* [ENHANCEMENT] Add `prometheus_template_text_expansion_failures_total` and `prometheus_template_text_expansions_total` metrics. #4747
* [ENHANCEMENT] Set consistent User-Agent header in outgoing requests. #4891
* [ENHANCEMENT] Azure SD: Error out at load time when authentication parameters are missing. #4907
* [ENHANCEMENT] EC2 SD: Add the machine's private DNS name to the discovery metadata. #4693
* [ENHANCEMENT] EC2 SD: Add the operating system's platform to the discovery metadata. #4663
* [ENHANCEMENT] Kubernetes SD: Add the pod's phase to the discovery metadata. #4824
* [ENHANCEMENT] Kubernetes SD: Log Kubernetes messages. #4931
* [ENHANCEMENT] Promtool: Collect CPU and trace profiles. #4897
* [ENHANCEMENT] Promtool: Support writing output as JSON. #4848
* [ENHANCEMENT] Remote Read: Return available data if remote read fails partially. #4832
* [ENHANCEMENT] Remote Write: Improve queue performance. #4772
* [ENHANCEMENT] Remote Write: Add min_shards parameter to set the minimum number of shards. #4924
* [ENHANCEMENT] TSDB: Improve WAL reading. #4953
* [ENHANCEMENT] TSDB: Memory improvements. #4953
* [ENHANCEMENT] Web: Log stack traces on panic. #4221
* [ENHANCEMENT] Web UI: Add copy to clipboard button for configuration. #4410
* [ENHANCEMENT] Web UI: Support console queries at specific times. #4764
* [ENHANCEMENT] Web UI: group targets by job then instance. #4898 #4806
* [BUGFIX] Deduplicate handler labels for HTTP metrics. #4732
* [BUGFIX] Fix leaked queriers causing shutdowns to hang. #4922
* [BUGFIX] Fix configuration loading panics on nil pointer slice elements. #4942
* [BUGFIX] API: Correctly skip mismatching targets on /api/v1/targets/metadata. #4905
* [BUGFIX] API: Better rounding for incoming query timestamps. #4941
* [BUGFIX] Azure SD: Fix panic. #4867
* [BUGFIX] Console templates: Fix hover when the metric has a null value. #4906
* [BUGFIX] Discovery: Remove all targets when the scrape configuration gets empty. #4819
* [BUGFIX] Marathon SD: Fix leaked connections. #4915
* [BUGFIX] Marathon SD: Use 'hostPort' member of portMapping to construct target endpoints. #4887
* [BUGFIX] PromQL: Fix a goroutine leak in the lexer/parser. #4858
* [BUGFIX] Scrape: Pass through content-type for non-compressed output. #4912
* [BUGFIX] Scrape: Fix deadlock in the scrape's manager. #4894
* [BUGFIX] Scrape: Scrape targets at fixed intervals even after Prometheus restarts. #4926
* [BUGFIX] TSDB: Support restored snapshots including the head properly. #4953
* [BUGFIX] TSDB: Repair WAL when the last record in a segment is torn. #4953
* [BUGFIX] TSDB: Fix unclosed file readers on Windows systems. #4997
* [BUGFIX] Web: Avoid proxy to connect to the local gRPC server. #4572
## 2.5.0 / 2018-11-06 ## 2.5.0 / 2018-11-06
* [CHANGE] Group targets by scrape config instead of job name. #4806 #4526 * [CHANGE] Group targets by scrape config instead of job name. #4806 #4526
@ -60,7 +242,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement
* [ENHANCEMENT] Send EndsAt along with the alert to Alertmanager #4550 * [ENHANCEMENT] Send EndsAt along with the alert to Alertmanager #4550
* [ENHANCEMENT] Limit the samples returned by remote read endpoint #4532 * [ENHANCEMENT] Limit the samples returned by remote read endpoint #4532
* [ENHANCEMENT] Limit the data read in through remote read #4239 * [ENHANCEMENT] Limit the data read in through remote read #4239
* [ENHANCEMENT] Coalesce identical SD configuations #3912 * [ENHANCEMENT] Coalesce identical SD configurations #3912
* [ENHANCEMENT] `promtool`: Add new commands for debugging and querying #4247 #4308 #4346 #4454 * [ENHANCEMENT] `promtool`: Add new commands for debugging and querying #4247 #4308 #4346 #4454
* [ENHANCEMENT] Update console examples for node_exporter v0.16.0 #4208 * [ENHANCEMENT] Update console examples for node_exporter v0.16.0 #4208
* [ENHANCEMENT] Optimize PromQL aggregations #4248 * [ENHANCEMENT] Optimize PromQL aggregations #4248
@ -90,13 +272,13 @@ This release includes multiple bugfixes and features. Further, the WAL implement
* [BUGFIX] discovery/kubernetes/ingress: fix scheme discovery #4329 * [BUGFIX] discovery/kubernetes/ingress: fix scheme discovery #4329
* [BUGFIX] Fix race in zookeeper sd #4355 * [BUGFIX] Fix race in zookeeper sd #4355
* [BUGFIX] Better timeout handling in promql #4291 #4300 * [BUGFIX] Better timeout handling in promql #4291 #4300
* [BUGFIX] Propogate errors when selecting series from the tsdb #4136 * [BUGFIX] Propagate errors when selecting series from the tsdb #4136
## 2.3.1 / 2018-06-19 ## 2.3.1 / 2018-06-19
* [BUGFIX] Avoid infinite loop on duplicate NaN values. #4275 * [BUGFIX] Avoid infinite loop on duplicate NaN values. #4275
* [BUGFIX] Fix nil pointer deference when using various API endpoints #4282 * [BUGFIX] Fix nil pointer deference when using various API endpoints #4282
* [BUGFIX] config: set target group source index during unmarshalling #4245 * [BUGFIX] config: set target group source index during unmarshaling #4245
* [BUGFIX] discovery/file: fix logging #4178 * [BUGFIX] discovery/file: fix logging #4178
* [BUGFIX] kubernetes_sd: fix namespace filtering #4285 * [BUGFIX] kubernetes_sd: fix namespace filtering #4285
* [BUGFIX] web: restore old path prefix behavior #4273 * [BUGFIX] web: restore old path prefix behavior #4273
@ -110,7 +292,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement
* [FEATURE] Add security headers to HTTP server responses * [FEATURE] Add security headers to HTTP server responses
* [FEATURE] Pass query hints via remote read API * [FEATURE] Pass query hints via remote read API
* [FEATURE] Basic auth passwords can now be configured via file across all configuration * [FEATURE] Basic auth passwords can now be configured via file across all configuration
* [ENHANCEMENT] Optimise PromQL and API serialization for memory usage and allocations * [ENHANCEMENT] Optimize PromQL and API serialization for memory usage and allocations
* [ENHANCEMENT] Limit number of dropped targets in web UI * [ENHANCEMENT] Limit number of dropped targets in web UI
* [ENHANCEMENT] Consul and EC2 service discovery allow using server-side filtering for performance improvement * [ENHANCEMENT] Consul and EC2 service discovery allow using server-side filtering for performance improvement
* [ENHANCEMENT] Add advanced filtering configuration to EC2 service discovery * [ENHANCEMENT] Add advanced filtering configuration to EC2 service discovery
@ -133,7 +315,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement
* [BUGFIX] Fix data loss in TSDB on compaction * [BUGFIX] Fix data loss in TSDB on compaction
* [BUGFIX] Correctly stop timer in remote-write path * [BUGFIX] Correctly stop timer in remote-write path
* [BUGFIX] Fix deadlock triggerd by loading targets page * [BUGFIX] Fix deadlock triggered by loading targets page
* [BUGFIX] Fix incorrect buffering of samples on range selection queries * [BUGFIX] Fix incorrect buffering of samples on range selection queries
* [BUGFIX] Handle large index files on windows properly * [BUGFIX] Handle large index files on windows properly
@ -526,7 +708,7 @@ This is a breaking change to the Kubernetes service discovery.
* [ENHANCEMENT] Message on empty Alerts page. * [ENHANCEMENT] Message on empty Alerts page.
* [ENHANCEMENT] Various internal code refactorings and clean-ups. * [ENHANCEMENT] Various internal code refactorings and clean-ups.
* [ENHANCEMENT] Various improvements in the build system. * [ENHANCEMENT] Various improvements in the build system.
* [BUGFIX] Catch errors when unmarshalling delta/doubleDelta encoded chunks. * [BUGFIX] Catch errors when unmarshaling delta/doubleDelta encoded chunks.
* [BUGFIX] Fix data race in lexer and lexer test. * [BUGFIX] Fix data race in lexer and lexer test.
* [BUGFIX] Trim stray whitespace from bearer token file. * [BUGFIX] Trim stray whitespace from bearer token file.
* [BUGFIX] Avoid divide-by-zero panic on query_range?step=0. * [BUGFIX] Avoid divide-by-zero panic on query_range?step=0.
@ -1118,7 +1300,7 @@ All changes:
from embedding into the binary. Those files are only used for debugging, from embedding into the binary. Those files are only used for debugging,
and then you can use -web.use-local-assets. By including fewer files, the and then you can use -web.use-local-assets. By including fewer files, the
RAM usage during compilation is much more manageable. RAM usage during compilation is much more manageable.
* [ENHANCEMENT] Help link points to http://prometheus.github.io now. * [ENHANCEMENT] Help link points to https://prometheus.github.io now.
* [FEATURE] Consoles for haproxy and cloudwatch. * [FEATURE] Consoles for haproxy and cloudwatch.
* [BUGFIX] Several fixes to graphs in consoles. * [BUGFIX] Several fixes to graphs in consoles.
* [CLEANUP] Removed a file size check that did not check anything. * [CLEANUP] Removed a file size check that did not check anything.
@ -1211,4 +1393,4 @@ All changes:
* [BUGFIX] Built from Go 1.2.1, which has internal fixes to race conditions in garbage collection handling. * [BUGFIX] Built from Go 1.2.1, which has internal fixes to race conditions in garbage collection handling.
* [ENHANCEMENT] Internal storage interface refactoring that allows building e.g. the `rule_checker` tool without LevelDB dynamic library dependencies. * [ENHANCEMENT] Internal storage interface refactoring that allows building e.g. the `rule_checker` tool without LevelDB dynamic library dependencies.
* [ENHANCEMENT] Cleanups around shutdown handling. * [ENHANCEMENT] Cleanups around shutdown handling.
* [PERFORMANCE] Preparations for better memory reuse during marshalling / unmarshalling. * [PERFORMANCE] Preparations for better memory reuse during marshaling / unmarshaling.

View file

@ -17,7 +17,7 @@ Prometheus uses GitHub to manage reviews of pull requests.
Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments) Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments)
and the _Formatting and style_ section of Peter Bourgon's [Go: Best and the _Formatting and style_ section of Peter Bourgon's [Go: Best
Practices for Production Practices for Production
Environments](http://peter.bourgon.org/go-in-production/#formatting-and-style). Environments](https://peter.bourgon.org/go-in-production/#formatting-and-style).
* Be sure to sign off on the [DCO](https://github.com/probot/dco#how-it-works) * Be sure to sign off on the [DCO](https://github.com/probot/dco#how-it-works)
@ -40,7 +40,9 @@ go build ./cmd/prometheus/
make test # Make sure all the tests pass before you commit and push :) make test # Make sure all the tests pass before you commit and push :)
``` ```
All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labelling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action.
All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions).
## Pull Request Checklist ## Pull Request Checklist
@ -54,7 +56,7 @@ All our issues are regularly tagged so that you can also filter down the issues
## Dependency management ## Dependency management
The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.11 or greater installed. The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.12 or greater installed.
All dependencies are vendored in the `vendor/` directory. All dependencies are vendored in the `vendor/` directory.

View file

@ -1,8 +1,12 @@
FROM quay.io/prometheus/busybox:latest ARG ARCH="amd64"
ARG OS="linux"
FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest
LABEL maintainer="The Prometheus Authors <prometheus-developers@googlegroups.com>" LABEL maintainer="The Prometheus Authors <prometheus-developers@googlegroups.com>"
COPY prometheus /bin/prometheus ARG ARCH="amd64"
COPY promtool /bin/promtool ARG OS="linux"
COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus
COPY .build/${OS}-${ARCH}/promtool /bin/promtool
COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml
COPY console_libraries/ /usr/share/prometheus/console_libraries/ COPY console_libraries/ /usr/share/prometheus/console_libraries/
COPY consoles/ /usr/share/prometheus/consoles/ COPY consoles/ /usr/share/prometheus/consoles/
@ -15,8 +19,8 @@ USER nobody
EXPOSE 9090 EXPOSE 9090
VOLUME [ "/prometheus" ] VOLUME [ "/prometheus" ]
WORKDIR /prometheus WORKDIR /prometheus
ENTRYPOINT [ "/bin/prometheus", \ ENTRYPOINT [ "/bin/prometheus" ]
CMD [ "--config.file=/etc/prometheus/prometheus.yml", \
"--storage.tsdb.path=/prometheus", \ "--storage.tsdb.path=/prometheus", \
"--web.console.libraries=/etc/prometheus/console_libraries", \ "--web.console.libraries=/usr/share/prometheus/console_libraries", \
"--web.console.templates=/etc/prometheus/consoles", \ "--web.console.templates=/usr/share/prometheus/consoles" ]
"--config.file=/etc/prometheus/prometheus.yml" ]

View file

@ -1,6 +1,6 @@
Maintainers of this repository with their focus areas: Maintainers of this repository with their focus areas:
* Brian Brazil <brian.brazil@robustperception.io> @brian-brazil: Console templates; semantics of PromQL, service discovery, and relabeling. * Brian Brazil <brian.brazil@robustperception.io> @brian-brazil: Console templates; semantics of PromQL, service discovery, and relabeling.
* Fabian Reinartz <fabian.reinartz@coreos.com> @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery. * Fabian Reinartz <freinartz@google.com> @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery.
* Julius Volz <julius.volz@gmail.com> @juliusv: Remote storage integrations; web UI. * Julius Volz <julius.volz@gmail.com> @juliusv: Remote storage integrations; web UI.

View file

@ -11,24 +11,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Needs to be defined before including Makefile.common to auto-generate targets
DOCKER_ARCHS ?= amd64 armv7 arm64
include Makefile.common include Makefile.common
STATICCHECK_IGNORE = \
github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:SA1019 \
github.com/prometheus/prometheus/discovery/kubernetes/node.go:SA1019 \
github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/main.go:SA1019 \
github.com/prometheus/prometheus/pkg/textparse/promlex.l.go:SA4006 \
github.com/prometheus/prometheus/pkg/textparse/openmetricslex.l.go:SA4006 \
github.com/prometheus/prometheus/pkg/pool/pool.go:SA6002 \
github.com/prometheus/prometheus/promql/engine.go:SA6002 \
github.com/prometheus/prometheus/prompb/rpc.pb.gw.go:SA1019
DOCKER_IMAGE_NAME ?= prometheus DOCKER_IMAGE_NAME ?= prometheus
# Go modules needs the bzr binary because of the dependency on launchpad.net/gocheck.
$(eval $(call PRECHECK_COMMAND_template,bzr))
PRECHECK_OPTIONS_bzr = version
.PHONY: assets .PHONY: assets
assets: assets:
@echo ">> writing assets" @echo ">> writing assets"

View file

@ -29,12 +29,15 @@ GO ?= go
GOFMT ?= $(GO)fmt GOFMT ?= $(GO)fmt
FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH))) FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH)))
GOOPTS ?= GOOPTS ?=
GOHOSTOS ?= $(shell $(GO) env GOHOSTOS)
GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH)
GO_VERSION ?= $(shell $(GO) version) GO_VERSION ?= $(shell $(GO) version)
GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION)) GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION))
PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.') PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.')
unexport GOVENDOR GOVENDOR :=
GO111MODULE :=
ifeq (, $(PRE_GO_111)) ifeq (, $(PRE_GO_111))
ifneq (,$(wildcard go.mod)) ifneq (,$(wildcard go.mod))
# Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI). # Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI).
@ -55,32 +58,58 @@ $(warning Some recipes may not work as expected as the current Go runtime is '$(
# This repository isn't using Go modules (yet). # This repository isn't using Go modules (yet).
GOVENDOR := $(FIRST_GOPATH)/bin/govendor GOVENDOR := $(FIRST_GOPATH)/bin/govendor
endif endif
unexport GO111MODULE
endif endif
PROMU := $(FIRST_GOPATH)/bin/promu PROMU := $(FIRST_GOPATH)/bin/promu
STATICCHECK := $(FIRST_GOPATH)/bin/staticcheck
pkgs = ./... pkgs = ./...
GO_VERSION ?= $(shell $(GO) version) ifeq (arm, $(GOHOSTARCH))
GO_BUILD_PLATFORM ?= $(subst /,-,$(lastword $(GO_VERSION))) GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM)
GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM)
else
GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)
endif
PROMU_VERSION ?= 0.2.0 PROMU_VERSION ?= 0.4.0
PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz
GOLANGCI_LINT :=
GOLANGCI_LINT_OPTS ?=
GOLANGCI_LINT_VERSION ?= v1.16.0
# golangci-lint only supports linux, darwin and windows platforms on i386/amd64.
# windows isn't included here because of the path separator being different.
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386))
GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint
endif
endif
PREFIX ?= $(shell pwd) PREFIX ?= $(shell pwd)
BIN_DIR ?= $(shell pwd) BIN_DIR ?= $(shell pwd)
DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))
DOCKERFILE_PATH ?= ./
DOCKER_REPO ?= prom DOCKER_REPO ?= prom
.PHONY: all DOCKER_ARCHS ?= amd64
all: precheck style staticcheck unused build test
BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS))
PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS))
TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS))
ifeq ($(GOHOSTARCH),amd64)
ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows))
# Only supported on amd64
test-flags := -race
endif
endif
# This rule is used to forward a target like "build" to "common-build". This # This rule is used to forward a target like "build" to "common-build". This
# allows a new "build" target to be defined in a Makefile which includes this # allows a new "build" target to be defined in a Makefile which includes this
# one and override "common-build" without override warnings. # one and override "common-build" without override warnings.
%: common-% ; %: common-% ;
.PHONY: common-all
common-all: precheck style check_license lint unused build test
.PHONY: common-style .PHONY: common-style
common-style: common-style:
@echo ">> checking code style" @echo ">> checking code style"
@ -102,6 +131,15 @@ common-check_license:
exit 1; \ exit 1; \
fi fi
.PHONY: common-deps
common-deps:
@echo ">> getting dependencies"
ifdef GO111MODULE
GO111MODULE=$(GO111MODULE) $(GO) mod download
else
$(GO) get $(GOOPTS) -t ./...
endif
.PHONY: common-test-short .PHONY: common-test-short
common-test-short: common-test-short:
@echo ">> running short tests" @echo ">> running short tests"
@ -110,26 +148,35 @@ common-test-short:
.PHONY: common-test .PHONY: common-test
common-test: common-test:
@echo ">> running all tests" @echo ">> running all tests"
GO111MODULE=$(GO111MODULE) $(GO) test -race $(GOOPTS) $(pkgs) GO111MODULE=$(GO111MODULE) $(GO) test $(test-flags) $(GOOPTS) $(pkgs)
.PHONY: common-format .PHONY: common-format
common-format: common-format:
@echo ">> formatting code" @echo ">> formatting code"
GO111MODULE=$(GO111MODULE) $(GO) fmt $(GOOPTS) $(pkgs) GO111MODULE=$(GO111MODULE) $(GO) fmt $(pkgs)
.PHONY: common-vet .PHONY: common-vet
common-vet: common-vet:
@echo ">> vetting code" @echo ">> vetting code"
GO111MODULE=$(GO111MODULE) $(GO) vet $(GOOPTS) $(pkgs) GO111MODULE=$(GO111MODULE) $(GO) vet $(GOOPTS) $(pkgs)
.PHONY: common-staticcheck .PHONY: common-lint
common-staticcheck: $(STATICCHECK) common-lint: $(GOLANGCI_LINT)
@echo ">> running staticcheck" ifdef GOLANGCI_LINT
@echo ">> running golangci-lint"
ifdef GO111MODULE ifdef GO111MODULE
GO111MODULE=$(GO111MODULE) $(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" -checks "SA*" $(pkgs) # 'go list' needs to be executed before staticcheck to prepopulate the modules cache.
# Otherwise staticcheck might fail randomly for some reason not yet explained.
GO111MODULE=$(GO111MODULE) $(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null
GO111MODULE=$(GO111MODULE) $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs)
else else
$(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" $(pkgs) $(GOLANGCI_LINT) run $(pkgs)
endif endif
endif
# For backward-compatibility.
.PHONY: common-staticcheck
common-staticcheck: lint
.PHONY: common-unused .PHONY: common-unused
common-unused: $(GOVENDOR) common-unused: $(GOVENDOR)
@ -140,8 +187,9 @@ else
ifdef GO111MODULE ifdef GO111MODULE
@echo ">> running check for unused/missing packages in go.mod" @echo ">> running check for unused/missing packages in go.mod"
GO111MODULE=$(GO111MODULE) $(GO) mod tidy GO111MODULE=$(GO111MODULE) $(GO) mod tidy
ifeq (,$(wildcard vendor))
@git diff --exit-code -- go.sum go.mod @git diff --exit-code -- go.sum go.mod
ifneq (,$(wildcard vendor)) else
@echo ">> running check for unused packages in vendor/" @echo ">> running check for unused packages in vendor/"
GO111MODULE=$(GO111MODULE) $(GO) mod vendor GO111MODULE=$(GO111MODULE) $(GO) mod vendor
@git diff --exit-code -- go.sum go.mod vendor/ @git diff --exit-code -- go.sum go.mod vendor/
@ -159,45 +207,50 @@ common-tarball: promu
@echo ">> building release tarball" @echo ">> building release tarball"
$(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR)
.PHONY: common-docker .PHONY: common-docker $(BUILD_DOCKER_ARCHS)
common-docker: common-docker: $(BUILD_DOCKER_ARCHS)
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" . $(BUILD_DOCKER_ARCHS): common-docker-%:
docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" \
--build-arg ARCH="$*" \
--build-arg OS="linux" \
$(DOCKERFILE_PATH)
.PHONY: common-docker-publish .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS)
common-docker-publish: common-docker-publish: $(PUBLISH_DOCKER_ARCHS)
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)" $(PUBLISH_DOCKER_ARCHS): common-docker-publish-%:
docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)"
.PHONY: common-docker-tag-latest .PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS)
common-docker-tag-latest: common-docker-tag-latest: $(TAG_DOCKER_ARCHS)
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):latest" $(TAG_DOCKER_ARCHS): common-docker-tag-latest-%:
docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest"
.PHONY: common-docker-manifest
common-docker-manifest:
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(DOCKER_IMAGE_TAG))
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)"
.PHONY: promu .PHONY: promu
promu: $(PROMU) promu: $(PROMU)
$(PROMU): $(PROMU):
curl -s -L $(PROMU_URL) | tar -xvz -C /tmp $(eval PROMU_TMP := $(shell mktemp -d))
mkdir -v -p $(FIRST_GOPATH)/bin curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP)
cp -v /tmp/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(PROMU) mkdir -p $(FIRST_GOPATH)/bin
cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu
rm -r $(PROMU_TMP)
.PHONY: proto .PHONY: proto
proto: proto:
@echo ">> generating code from proto files" @echo ">> generating code from proto files"
@./scripts/genproto.sh @./scripts/genproto.sh
.PHONY: $(STATICCHECK) ifdef GOLANGCI_LINT
$(STATICCHECK): $(GOLANGCI_LINT):
ifdef GO111MODULE mkdir -p $(FIRST_GOPATH)/bin
# Get staticcheck from a temporary directory to avoid modifying the local go.{mod,sum}. curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/$(GOLANGCI_LINT_VERSION)/install.sh \
# See https://github.com/golang/go/issues/27643. | sed -e '/install -d/d' \
# For now, we are using the next branch of staticcheck because master isn't compatible yet with Go modules. | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION)
tmpModule=$$(mktemp -d 2>&1) && \
mkdir -p $${tmpModule}/staticcheck && \
cd "$${tmpModule}"/staticcheck && \
GO111MODULE=on $(GO) mod init example.com/staticcheck && \
GO111MODULE=on GOOS= GOARCH= $(GO) get -u honnef.co/go/tools/cmd/staticcheck@next && \
rm -rf $${tmpModule};
else
GOOS= GOARCH= GO111MODULE=off $(GO) get -u honnef.co/go/tools/cmd/staticcheck
endif endif
ifdef GOVENDOR ifdef GOVENDOR
@ -212,9 +265,8 @@ precheck::
define PRECHECK_COMMAND_template = define PRECHECK_COMMAND_template =
precheck:: $(1)_precheck precheck:: $(1)_precheck
PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1))) PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1)))
.PHONE: $(1)_precheck .PHONY: $(1)_precheck
$(1)_precheck: $(1)_precheck:
@if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \ @if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \
echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \ echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \

8
NOTICE
View file

@ -2,13 +2,13 @@ The Prometheus systems and service monitoring server
Copyright 2012-2015 The Prometheus Authors Copyright 2012-2015 The Prometheus Authors
This product includes software developed at This product includes software developed at
SoundCloud Ltd. (http://soundcloud.com/). SoundCloud Ltd. (https://soundcloud.com/).
The following components are included in this product: The following components are included in this product:
Bootstrap Bootstrap
http://getbootstrap.com https://getbootstrap.com
Copyright 2011-2014 Twitter, Inc. Copyright 2011-2014 Twitter, Inc.
Licensed under the MIT License Licensed under the MIT License
@ -52,7 +52,7 @@ Copyright jQuery Foundation and other contributors
Licensed under the MIT License Licensed under the MIT License
Protocol Buffers for Go with Gadgets Protocol Buffers for Go with Gadgets
http://github.com/gogo/protobuf/ https://github.com/gogo/protobuf/
Copyright (c) 2013, The GoGo Authors. Copyright (c) 2013, The GoGo Authors.
See source code for license details. See source code for license details.
@ -67,7 +67,7 @@ Copyright 2013 Matt T. Proud
Licensed under the Apache License, Version 2.0 Licensed under the Apache License, Version 2.0
DNS library in Go DNS library in Go
http://miek.nl/posts/2014/Aug/16/go-dns-package/ https://miek.nl/2014/august/16/go-dns-package/
Copyright 2009 The Go Authors, 2011 Miek Gieben Copyright 2009 The Go Authors, 2011 Miek Gieben
See https://github.com/miekg/dns/blob/master/LICENSE for license details. See https://github.com/miekg/dns/blob/master/LICENSE for license details.

View file

@ -1,5 +1,6 @@
# Prometheus [![Build Status](https://travis-ci.org/prometheus/prometheus.svg)][travis] # Prometheus
[![Build Status](https://travis-ci.org/prometheus/prometheus.svg)][travis]
[![CircleCI](https://circleci.com/gh/prometheus/prometheus/tree/master.svg?style=shield)][circleci] [![CircleCI](https://circleci.com/gh/prometheus/prometheus/tree/master.svg?style=shield)][circleci]
[![Docker Repository on Quay](https://quay.io/repository/prometheus/prometheus/status)][quay] [![Docker Repository on Quay](https://quay.io/repository/prometheus/prometheus/status)][quay]
[![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub] [![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub]
@ -57,7 +58,7 @@ Prometheus will now be reachable at http://localhost:9090/.
### Building from source ### Building from source
To build Prometheus from the source code yourself you need to have a working To build Prometheus from the source code yourself you need to have a working
Go environment with [version 1.11 or greater installed](http://golang.org/doc/install). Go environment with [version 1.12 or greater installed](https://golang.org/doc/install).
You can directly use the `go` tool to download and install the `prometheus` You can directly use the `go` tool to download and install the `prometheus`
and `promtool` binaries into your `GOPATH`: and `promtool` binaries into your `GOPATH`:
@ -86,7 +87,7 @@ The Makefile provides several targets:
## More information ## More information
* The source code is periodically indexed: [Prometheus Core](http://godoc.org/github.com/prometheus/prometheus). * The source code is periodically indexed: [Prometheus Core](https://godoc.org/github.com/prometheus/prometheus).
* You will find a Travis CI configuration in `.travis.yml`. * You will find a Travis CI configuration in `.travis.yml`.
* See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels. * See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels.

View file

@ -1,28 +1,35 @@
# Releases # Releases
This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release schepherds. Release shepards are chosen on a voluntary basis. This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release shepherd. Release shepherds are chosen on a voluntary basis.
## Release schedule ## Release schedule
Release cadence of first pre-releases being cut is 6 weeks. Release cadence of first pre-releases being cut is 6 weeks.
| release series | date of first pre-release (year-month-day) | release shepard | | release series | date of first pre-release (year-month-day) | release shepherd |
|----------------|--------------------------------------------|---------------------------------------------| |----------------|--------------------------------------------|---------------------------------------------|
| v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) | | v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) |
| v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) | | v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) |
| v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) | | v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) |
| v2.7 | 2019-01-16 | **searching for volunteer** | | v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) |
| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) |
| v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) |
| v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) |
| v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) |
| v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) |
| v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) |
| v2.14 | 2019-11-06 | **searching for volunteer** |
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
## Release shepard responsibilities ## Release shepherd responsibilities
The release shepard is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process starts with the initial pre-release. The release shepherd is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process formally starts with the initial pre-release, but some preparations should be done a few days in advance.
* The first pre-release is scheduled according to the above schedule. * We aim to keep the master branch in a working state at all times. In principle, it should be possible to cut a release from master at any time. In practice, things might not work out as nicely. A few days before the pre-release is scheduled, the shepherd should check the state of master. Following their best judgement, the shepherd should try to expedite bug fixes that are still in progress but should make it into the release. On the other hand, the shepherd may hold back merging last-minute invasive and risky changes that are better suited for the next minor release.
* With the pre-release the release shepard is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release. * On the date listed in the table above, the release shepherd cuts the first pre-release (using the suffix `-rc.0`) and creates a new branch called `release-<major>.<minor>` starting at the commit tagged for the pre-release. In general, a pre-release is considered a release candidate (that's what `rc` stands for) and should therefore not contain any known bugs that are planned to be fixed in the final release.
* Once a pre-release has been released, the `master` branch of the repository is frozen for any feature work, only critical bug fix work concerning the minor release is merged. * With the pre-release, the release shepherd is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release.
* Pre-releases are done from `master`, after pre-releases are promoted to the stable release a `release-major.minor` branch is created. * If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.).
See the next section for details on cutting an individual release. See the next section for details on cutting an individual release.
@ -32,13 +39,13 @@ These instructions are currently valid for the Prometheus server, i.e. the [prom
### Branch management and versioning strategy ### Branch management and versioning strategy
We use [Semantic Versioning](http://semver.org/). We use [Semantic Versioning](https://semver.org/).
We maintain a separate branch for each minor release, named `release-<major>.<minor>`, e.g. `release-1.1`, `release-2.0`. We maintain a separate branch for each minor release, named `release-<major>.<minor>`, e.g. `release-1.1`, `release-2.0`.
The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. Whether merging master back into a release branch makes more sense is left up to the shepard's judgement. The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. As long as master hasn't deviated from the release branch, new commits can also go to master, followed by merging master back into the release branch.
If a bug fix got accidentally merged into master, cherry-pick commits have to be created in the latest release branch, which then have to be merged back into master. Try to avoid that situation. If a bug fix got accidentally merged into master after non-bug-fix changes in master, the bug-fix commits have to be cherry-picked into the release branch, which then have to be merged back into master. Try to avoid that situation.
Maintaining the release branches for older minor releases happens on a best effort basis. Maintaining the release branches for older minor releases happens on a best effort basis.
@ -88,13 +95,13 @@ If the release has happened in the latest release branch, merge the changes into
To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration. To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration.
Once the binaries have been uploaded, announce the release on `prometheus-users@googlegroups.com`. Start the subject with `[ANN]`. Check out previous announcement mails for inspiration. Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration.
### Pre-releases ### Pre-releases
The following changes to the above procedures apply: The following changes to the above procedures apply:
* In line with [Semantic Versioning](http://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.). * In line with [Semantic Versioning](https://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.).
* Tick the _This is a pre-release_ box when drafting the release in the Github UI. * Tick the _This is a pre-release_ box when drafting the release in the Github UI.
* Still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update. * Still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update.

View file

@ -1 +1 @@
2.5.0 2.10.0

View file

@ -26,6 +26,7 @@ import (
"os" "os"
"os/signal" "os/signal"
"path/filepath" "path/filepath"
"regexp"
"runtime" "runtime"
"strings" "strings"
"sync" "sync"
@ -34,22 +35,23 @@ import (
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/oklog/oklog/pkg/group" conntrack "github.com/mwitkow/go-conntrack"
"github.com/oklog/run"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/common/version"
prom_runtime "github.com/prometheus/prometheus/pkg/runtime"
"gopkg.in/alecthomas/kingpin.v2"
k8s_runtime "k8s.io/apimachinery/pkg/util/runtime"
"github.com/mwitkow/go-conntrack"
"github.com/prometheus/common/promlog" "github.com/prometheus/common/promlog"
"github.com/prometheus/common/version"
kingpin "gopkg.in/alecthomas/kingpin.v2"
"k8s.io/klog"
promlogflag "github.com/prometheus/common/promlog/flag" promlogflag "github.com/prometheus/common/promlog/flag"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery"
sd_config "github.com/prometheus/prometheus/discovery/config" sd_config "github.com/prometheus/prometheus/discovery/config"
"github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/pkg/relabel"
prom_runtime "github.com/prometheus/prometheus/pkg/runtime"
"github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape" "github.com/prometheus/prometheus/scrape"
@ -69,10 +71,19 @@ var (
Name: "prometheus_config_last_reload_success_timestamp_seconds", Name: "prometheus_config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.", Help: "Timestamp of the last successful configuration reload.",
}) })
defaultRetentionString = "15d"
defaultRetentionDuration model.Duration
) )
func init() { func init() {
prometheus.MustRegister(version.NewCollector("prometheus")) prometheus.MustRegister(version.NewCollector("prometheus"))
var err error
defaultRetentionDuration, err = model.ParseDuration(defaultRetentionString)
if err != nil {
panic(err)
}
} }
func main() { func main() {
@ -81,6 +92,11 @@ func main() {
runtime.SetMutexProfileFraction(20) runtime.SetMutexProfileFraction(20)
} }
var (
oldFlagRetentionDuration model.Duration
newFlagRetentionDuration model.Duration
)
cfg := struct { cfg := struct {
configFile string configFile string
@ -100,12 +116,14 @@ func main() {
RemoteFlushDeadline model.Duration RemoteFlushDeadline model.Duration
prometheusURL string prometheusURL string
corsRegexString string
logLevel promlog.AllowedLevel promlogConfig promlog.Config
}{ }{
notifier: notifier.Options{ notifier: notifier.Options{
Registerer: prometheus.DefaultRegisterer, Registerer: prometheus.DefaultRegisterer,
}, },
promlogConfig: promlog.Config{},
} }
a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server") a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server")
@ -150,6 +168,12 @@ func main() {
a.Flag("web.console.libraries", "Path to the console library directory."). a.Flag("web.console.libraries", "Path to the console library directory.").
Default("console_libraries").StringVar(&cfg.web.ConsoleLibrariesPath) Default("console_libraries").StringVar(&cfg.web.ConsoleLibrariesPath)
a.Flag("web.page-title", "Document title of Prometheus instance.").
Default("Prometheus Time Series Collection and Processing Server").StringVar(&cfg.web.PageTitle)
a.Flag("web.cors.origin", `Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1|domain2)\.com'`).
Default(".*").StringVar(&cfg.corsRegexString)
a.Flag("storage.tsdb.path", "Base path for metrics storage."). a.Flag("storage.tsdb.path", "Base path for metrics storage.").
Default("data/").StringVar(&cfg.localStoragePath) Default("data/").StringVar(&cfg.localStoragePath)
@ -157,15 +181,28 @@ func main() {
Hidden().Default("2h").SetValue(&cfg.tsdb.MinBlockDuration) Hidden().Default("2h").SetValue(&cfg.tsdb.MinBlockDuration)
a.Flag("storage.tsdb.max-block-duration", a.Flag("storage.tsdb.max-block-duration",
"Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period)."). "Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period.)").
Hidden().PlaceHolder("<duration>").SetValue(&cfg.tsdb.MaxBlockDuration) Hidden().PlaceHolder("<duration>").SetValue(&cfg.tsdb.MaxBlockDuration)
a.Flag("storage.tsdb.retention", "How long to retain samples in storage."). a.Flag("storage.tsdb.wal-segment-size",
Default("15d").SetValue(&cfg.tsdb.Retention) "Size at which to split the tsdb WAL segment files. Example: 100MB").
Hidden().PlaceHolder("<bytes>").BytesVar(&cfg.tsdb.WALSegmentSize)
a.Flag("storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead.").
SetValue(&oldFlagRetentionDuration)
a.Flag("storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+".").
SetValue(&newFlagRetentionDuration)
a.Flag("storage.tsdb.retention.size", "[EXPERIMENTAL] Maximum number of bytes that can be stored for blocks. Units supported: KB, MB, GB, TB, PB. This flag is experimental and can be changed in future releases.").
BytesVar(&cfg.tsdb.MaxBytes)
a.Flag("storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). a.Flag("storage.tsdb.no-lockfile", "Do not create lockfile in data directory.").
Default("false").BoolVar(&cfg.tsdb.NoLockfile) Default("false").BoolVar(&cfg.tsdb.NoLockfile)
a.Flag("storage.tsdb.allow-overlapping-blocks", "[EXPERIMENTAL] Allow overlapping blocks, which in turn enables vertical compaction and vertical query merge.").
Default("false").BoolVar(&cfg.tsdb.AllowOverlappingBlocks)
a.Flag("storage.remote.flush-deadline", "How long to wait flushing sample on shutdown or config reload."). a.Flag("storage.remote.flush-deadline", "How long to wait flushing sample on shutdown or config reload.").
Default("1m").PlaceHolder("<duration>").SetValue(&cfg.RemoteFlushDeadline) Default("1m").PlaceHolder("<duration>").SetValue(&cfg.RemoteFlushDeadline)
@ -175,10 +212,10 @@ func main() {
a.Flag("storage.remote.read-concurrent-limit", "Maximum number of concurrent remote read calls. 0 means no limit."). a.Flag("storage.remote.read-concurrent-limit", "Maximum number of concurrent remote read calls. 0 means no limit.").
Default("10").IntVar(&cfg.web.RemoteReadConcurrencyLimit) Default("10").IntVar(&cfg.web.RemoteReadConcurrencyLimit)
a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring 'for' state of alert."). a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring \"for\" state of alert.").
Default("1h").SetValue(&cfg.outageTolerance) Default("1h").SetValue(&cfg.outageTolerance)
a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored 'for' state. This is maintained only for alerts with configured 'for' time greater than grace period."). a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored \"for\" state. This is maintained only for alerts with configured \"for\" time greater than grace period.").
Default("10m").SetValue(&cfg.forGracePeriod) Default("10m").SetValue(&cfg.forGracePeriod)
a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager.").
@ -190,7 +227,7 @@ func main() {
a.Flag("alertmanager.timeout", "Timeout for sending alerts to Alertmanager."). a.Flag("alertmanager.timeout", "Timeout for sending alerts to Alertmanager.").
Default("10s").SetValue(&cfg.notifierTimeout) Default("10s").SetValue(&cfg.notifierTimeout)
a.Flag("query.lookback-delta", "The delta difference allowed for retrieving metrics during expression evaluations."). a.Flag("query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations.").
Default("5m").SetValue(&cfg.lookbackDelta) Default("5m").SetValue(&cfg.lookbackDelta)
a.Flag("query.timeout", "Maximum time a query may take before being aborted."). a.Flag("query.timeout", "Maximum time a query may take before being aborted.").
@ -198,10 +235,11 @@ func main() {
a.Flag("query.max-concurrency", "Maximum number of queries executed concurrently."). a.Flag("query.max-concurrency", "Maximum number of queries executed concurrently.").
Default("20").IntVar(&cfg.queryConcurrency) Default("20").IntVar(&cfg.queryConcurrency)
a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they would load more samples than this into memory, so this also limits the number of samples a query can return.").
a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return.").
Default("50000000").IntVar(&cfg.queryMaxSamples) Default("50000000").IntVar(&cfg.queryMaxSamples)
promlogflag.AddFlags(a, &cfg.logLevel) promlogflag.AddFlags(a, &cfg.promlogConfig)
_, err := a.Parse(os.Args[1:]) _, err := a.Parse(os.Args[1:])
if err != nil { if err != nil {
@ -210,12 +248,20 @@ func main() {
os.Exit(2) os.Exit(2)
} }
logger := promlog.New(&cfg.promlogConfig)
cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress) cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress)
if err != nil { if err != nil {
fmt.Fprintln(os.Stderr, errors.Wrapf(err, "parse external URL %q", cfg.prometheusURL)) fmt.Fprintln(os.Stderr, errors.Wrapf(err, "parse external URL %q", cfg.prometheusURL))
os.Exit(2) os.Exit(2)
} }
cfg.web.CORSOrigin, err = compileCORSRegexString(cfg.corsRegexString)
if err != nil {
fmt.Fprintln(os.Stderr, errors.Wrapf(err, "could not compile CORS regex string %q", cfg.corsRegexString))
os.Exit(2)
}
cfg.web.ReadTimeout = time.Duration(cfg.webTimeout) cfg.web.ReadTimeout = time.Duration(cfg.webTimeout)
// Default -web.route-prefix to path of -web.external-url. // Default -web.route-prefix to path of -web.external-url.
if cfg.web.RoutePrefix == "" { if cfg.web.RoutePrefix == "" {
@ -224,22 +270,54 @@ func main() {
// RoutePrefix must always be at least '/'. // RoutePrefix must always be at least '/'.
cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/")
{ // Time retention settings.
if oldFlagRetentionDuration != 0 {
level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.")
cfg.tsdb.RetentionDuration = oldFlagRetentionDuration
}
// When the new flag is set it takes precedence.
if newFlagRetentionDuration != 0 {
cfg.tsdb.RetentionDuration = newFlagRetentionDuration
}
if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 {
cfg.tsdb.RetentionDuration = defaultRetentionDuration
level.Info(logger).Log("msg", "no time or size retention was set so using the default time retention", "duration", defaultRetentionDuration)
}
// Check for overflows. This limits our max retention to 100y.
if cfg.tsdb.RetentionDuration < 0 {
y, err := model.ParseDuration("100y")
if err != nil {
panic(err)
}
cfg.tsdb.RetentionDuration = y
level.Warn(logger).Log("msg", "time retention value is too high. Limiting to: "+y.String())
}
}
{ // Max block size settings.
if cfg.tsdb.MaxBlockDuration == 0 { if cfg.tsdb.MaxBlockDuration == 0 {
cfg.tsdb.MaxBlockDuration = cfg.tsdb.Retention / 10 maxBlockDuration, err := model.ParseDuration("31d")
if err != nil {
panic(err)
}
// When the time retention is set and not too big use to define the max block duration.
if cfg.tsdb.RetentionDuration != 0 && cfg.tsdb.RetentionDuration/10 < maxBlockDuration {
maxBlockDuration = cfg.tsdb.RetentionDuration / 10
}
cfg.tsdb.MaxBlockDuration = maxBlockDuration
}
} }
promql.LookbackDelta = time.Duration(cfg.lookbackDelta) promql.LookbackDelta = time.Duration(cfg.lookbackDelta)
promql.SetDefaultEvaluationInterval(time.Duration(config.DefaultGlobalConfig.EvaluationInterval))
logger := promlog.New(cfg.logLevel) // Above level 6, the k8s client would log bearer tokens in clear-text.
klog.ClampLevel(6)
// XXX(fabxc): Kubernetes does background logging which we can only customize by modifying klog.SetLogger(log.With(logger, "component", "k8s_client_runtime"))
// a global variable.
// Ultimately, here is the best place to set it.
k8s_runtime.ErrorHandlers = []func(error){
func(err error) {
level.Error(log.With(logger, "component", "k8s_client_runtime")).Log("err", err)
},
}
level.Info(logger).Log("msg", "Starting Prometheus", "version", version.Info()) level.Info(logger).Log("msg", "Starting Prometheus", "version", version.Info())
level.Info(logger).Log("build_context", version.BuildContext()) level.Info(logger).Log("build_context", version.BuildContext())
@ -249,7 +327,7 @@ func main() {
var ( var (
localStorage = &tsdb.ReadyStorage{} localStorage = &tsdb.ReadyStorage{}
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), localStorage.StartTime, time.Duration(cfg.RemoteFlushDeadline)) remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, cfg.localStoragePath, time.Duration(cfg.RemoteFlushDeadline))
fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage)
) )
@ -257,7 +335,7 @@ func main() {
ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxWeb, cancelWeb = context.WithCancel(context.Background())
ctxRule = context.Background() ctxRule = context.Background()
notifier = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier"))
ctxScrape, cancelScrape = context.WithCancel(context.Background()) ctxScrape, cancelScrape = context.WithCancel(context.Background())
discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape")) discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape"))
@ -280,7 +358,7 @@ func main() {
Appendable: fanoutStorage, Appendable: fanoutStorage,
TSDB: localStorage, TSDB: localStorage,
QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage), QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage),
NotifyFunc: sendAlerts(notifier, cfg.web.ExternalURL.String()), NotifyFunc: sendAlerts(notifierManager, cfg.web.ExternalURL.String()),
Context: ctxRule, Context: ctxRule,
ExternalURL: cfg.web.ExternalURL, ExternalURL: cfg.web.ExternalURL,
Registerer: prometheus.DefaultRegisterer, Registerer: prometheus.DefaultRegisterer,
@ -297,7 +375,8 @@ func main() {
cfg.web.QueryEngine = queryEngine cfg.web.QueryEngine = queryEngine
cfg.web.ScrapeManager = scrapeManager cfg.web.ScrapeManager = scrapeManager
cfg.web.RuleManager = ruleManager cfg.web.RuleManager = ruleManager
cfg.web.Notifier = notifier cfg.web.Notifier = notifierManager
cfg.web.TSDBCfg = cfg.tsdb
cfg.web.Version = &web.PrometheusVersion{ cfg.web.Version = &web.PrometheusVersion{
Version: version.Version, Version: version.Version,
@ -333,7 +412,6 @@ func main() {
webHandler.ApplyConfig, webHandler.ApplyConfig,
// The Scrape and notifier managers need to reload before the Discovery manager as // The Scrape and notifier managers need to reload before the Discovery manager as
// they need to read the most updated config when receiving the new targets list. // they need to read the most updated config when receiving the new targets list.
notifier.ApplyConfig,
scrapeManager.ApplyConfig, scrapeManager.ApplyConfig,
func(cfg *config.Config) error { func(cfg *config.Config) error {
c := make(map[string]sd_config.ServiceDiscoveryConfig) c := make(map[string]sd_config.ServiceDiscoveryConfig)
@ -342,6 +420,7 @@ func main() {
} }
return discoveryManagerScrape.ApplyConfig(c) return discoveryManagerScrape.ApplyConfig(c)
}, },
notifierManager.ApplyConfig,
func(cfg *config.Config) error { func(cfg *config.Config) error {
c := make(map[string]sd_config.ServiceDiscoveryConfig) c := make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfg.AlertingConfig.AlertmanagerConfigs { for _, v := range cfg.AlertingConfig.AlertmanagerConfigs {
@ -355,17 +434,21 @@ func main() {
return discoveryManagerNotify.ApplyConfig(c) return discoveryManagerNotify.ApplyConfig(c)
}, },
func(cfg *config.Config) error { func(cfg *config.Config) error {
// Get all rule files matching the configuration oaths. // Get all rule files matching the configuration paths.
var files []string var files []string
for _, pat := range cfg.RuleFiles { for _, pat := range cfg.RuleFiles {
fs, err := filepath.Glob(pat) fs, err := filepath.Glob(pat)
if err != nil { if err != nil {
// The only error can be a bad pattern. // The only error can be a bad pattern.
return fmt.Errorf("error retrieving rule files for %s: %s", pat, err) return errors.Wrapf(err, "error retrieving rule files for %s", pat)
} }
files = append(files, fs...) files = append(files, fs...)
} }
return ruleManager.Update(time.Duration(cfg.GlobalConfig.EvaluationInterval), files) return ruleManager.Update(
time.Duration(cfg.GlobalConfig.EvaluationInterval),
files,
cfg.GlobalConfig.ExternalLabels,
)
}, },
} }
@ -392,7 +475,7 @@ func main() {
}) })
} }
var g group.Group var g run.Group
{ {
// Termination handler. // Termination handler.
term := make(chan os.Signal, 1) term := make(chan os.Signal, 1)
@ -522,7 +605,7 @@ func main() {
} }
if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil { if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil {
return fmt.Errorf("error loading config from %q: %s", cfg.configFile, err) return errors.Wrapf(err, "error loading config from %q", cfg.configFile)
} }
reloadReady.Close() reloadReady.Close()
@ -560,6 +643,11 @@ func main() {
g.Add( g.Add(
func() error { func() error {
level.Info(logger).Log("msg", "Starting TSDB ...") level.Info(logger).Log("msg", "Starting TSDB ...")
if cfg.tsdb.WALSegmentSize != 0 {
if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 {
return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB")
}
}
db, err := tsdb.Open( db, err := tsdb.Open(
cfg.localStoragePath, cfg.localStoragePath,
log.With(logger, "component", "tsdb"), log.With(logger, "component", "tsdb"),
@ -567,9 +655,19 @@ func main() {
&cfg.tsdb, &cfg.tsdb,
) )
if err != nil { if err != nil {
return fmt.Errorf("opening storage failed: %s", err) return errors.Wrapf(err, "opening storage failed")
} }
level.Info(logger).Log("fs_type", prom_runtime.Statfs(cfg.localStoragePath))
level.Info(logger).Log("msg", "TSDB started") level.Info(logger).Log("msg", "TSDB started")
level.Debug(logger).Log("msg", "TSDB options",
"MinBlockDuration", cfg.tsdb.MinBlockDuration,
"MaxBlockDuration", cfg.tsdb.MaxBlockDuration,
"MaxBytes", cfg.tsdb.MaxBytes,
"NoLockfile", cfg.tsdb.NoLockfile,
"RetentionDuration", cfg.tsdb.RetentionDuration,
"WALSegmentSize", cfg.tsdb.WALSegmentSize,
"AllowOverlappingBlocks", cfg.tsdb.AllowOverlappingBlocks,
)
startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000) startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000)
localStorage.Set(db, startTimeMargin) localStorage.Set(db, startTimeMargin)
@ -590,7 +688,7 @@ func main() {
g.Add( g.Add(
func() error { func() error {
if err := webHandler.Run(ctxWeb); err != nil { if err := webHandler.Run(ctxWeb); err != nil {
return fmt.Errorf("error starting web server: %s", err) return errors.Wrapf(err, "error starting web server")
} }
return nil return nil
}, },
@ -612,12 +710,12 @@ func main() {
// so we wait until the config is fully loaded. // so we wait until the config is fully loaded.
<-reloadReady.C <-reloadReady.C
notifier.Run(discoveryManagerNotify.SyncCh()) notifierManager.Run(discoveryManagerNotify.SyncCh())
level.Info(logger).Log("msg", "Notifier manager stopped") level.Info(logger).Log("msg", "Notifier manager stopped")
return nil return nil
}, },
func(err error) { func(err error) {
notifier.Stop() notifierManager.Stop()
}, },
) )
} }
@ -642,7 +740,7 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config
conf, err := config.LoadFile(filename) conf, err := config.LoadFile(filename)
if err != nil { if err != nil {
return fmt.Errorf("couldn't load configuration (--config.file=%q): %v", filename, err) return errors.Wrapf(err, "couldn't load configuration (--config.file=%q)", filename)
} }
failed := false failed := false
@ -653,8 +751,10 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config
} }
} }
if failed { if failed {
return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) return errors.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
} }
promql.SetDefaultEvaluationInterval(time.Duration(conf.GlobalConfig.EvaluationInterval))
level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename) level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename)
return nil return nil
} }
@ -664,6 +764,15 @@ func startsOrEndsWithQuote(s string) bool {
strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'") strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'")
} }
// compileCORSRegexString compiles given string and adds anchors
func compileCORSRegexString(s string) (*regexp.Regexp, error) {
r, err := relabel.NewRegexp(s)
if err != nil {
return nil, err
}
return r.Regexp, nil
}
// computeExternalURL computes a sanitized external URL from a raw input. It infers unset // computeExternalURL computes a sanitized external URL from a raw input. It infers unset
// URL parts from the OS and the given listen address. // URL parts from the OS and the given listen address.
func computeExternalURL(u, listenAddr string) (*url.URL, error) { func computeExternalURL(u, listenAddr string) (*url.URL, error) {
@ -680,7 +789,7 @@ func computeExternalURL(u, listenAddr string) (*url.URL, error) {
} }
if startsOrEndsWithQuote(u) { if startsOrEndsWithQuote(u) {
return nil, fmt.Errorf("URL must not begin or end with quotes") return nil, errors.New("URL must not begin or end with quotes")
} }
eu, err := url.Parse(u) eu, err := url.Parse(u)
@ -697,8 +806,12 @@ func computeExternalURL(u, listenAddr string) (*url.URL, error) {
return eu, nil return eu, nil
} }
type sender interface {
Send(alerts ...*notifier.Alert)
}
// sendAlerts implements the rules.NotifyFunc for a Notifier. // sendAlerts implements the rules.NotifyFunc for a Notifier.
func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc { func sendAlerts(s sender, externalURL string) rules.NotifyFunc {
return func(ctx context.Context, expr string, alerts ...*rules.Alert) { return func(ctx context.Context, expr string, alerts ...*rules.Alert) {
var res []*notifier.Alert var res []*notifier.Alert
@ -718,7 +831,7 @@ func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc {
} }
if len(alerts) > 0 { if len(alerts) > 0 {
n.Send(res...) s.Send(res...)
} }
} }
} }

View file

@ -14,6 +14,7 @@
package main package main
import ( import (
"context"
"flag" "flag"
"fmt" "fmt"
"net/http" "net/http"
@ -24,6 +25,9 @@ import (
"testing" "testing"
"time" "time"
"github.com/prometheus/prometheus/notifier"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/util/testutil" "github.com/prometheus/prometheus/util/testutil"
) )
@ -159,6 +163,10 @@ func TestComputeExternalURL(t *testing.T) {
// Let's provide an invalid configuration file and verify the exit status indicates the error. // Let's provide an invalid configuration file and verify the exit status indicates the error.
func TestFailedStartupExitCode(t *testing.T) { func TestFailedStartupExitCode(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
fakeInputFile := "fake-input-file" fakeInputFile := "fake-input-file"
expectedExitStatus := 1 expectedExitStatus := 1
@ -173,3 +181,106 @@ func TestFailedStartupExitCode(t *testing.T) {
t.Errorf("unable to retrieve the exit status for prometheus: %v", err) t.Errorf("unable to retrieve the exit status for prometheus: %v", err)
} }
} }
type senderFunc func(alerts ...*notifier.Alert)
func (s senderFunc) Send(alerts ...*notifier.Alert) {
s(alerts...)
}
func TestSendAlerts(t *testing.T) {
testCases := []struct {
in []*rules.Alert
exp []*notifier.Alert
}{
{
in: []*rules.Alert{
{
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
ActiveAt: time.Unix(1, 0),
FiredAt: time.Unix(2, 0),
ValidUntil: time.Unix(3, 0),
},
},
exp: []*notifier.Alert{
{
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
StartsAt: time.Unix(2, 0),
EndsAt: time.Unix(3, 0),
GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1",
},
},
},
{
in: []*rules.Alert{
{
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
ActiveAt: time.Unix(1, 0),
FiredAt: time.Unix(2, 0),
ResolvedAt: time.Unix(4, 0),
},
},
exp: []*notifier.Alert{
{
Labels: []labels.Label{{Name: "l1", Value: "v1"}},
Annotations: []labels.Label{{Name: "a2", Value: "v2"}},
StartsAt: time.Unix(2, 0),
EndsAt: time.Unix(4, 0),
GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1",
},
},
},
{
in: []*rules.Alert{},
},
}
for i, tc := range testCases {
tc := tc
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
senderFunc := senderFunc(func(alerts ...*notifier.Alert) {
if len(tc.in) == 0 {
t.Fatalf("sender called with 0 alert")
}
testutil.Equals(t, tc.exp, alerts)
})
sendAlerts(senderFunc, "http://localhost:9090")(context.TODO(), "up", tc.in...)
})
}
}
func TestWALSegmentSizeBounds(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} {
prom := exec.Command(promPath, "--storage.tsdb.wal-segment-size="+size, "--config.file="+promConfig)
err := prom.Start()
testutil.Ok(t, err)
if expectedExitStatus == 0 {
done := make(chan error, 1)
go func() { done <- prom.Wait() }()
select {
case err := <-done:
t.Errorf("prometheus should be still running: %v", err)
case <-time.After(5 * time.Second):
prom.Process.Signal(os.Interrupt)
}
continue
}
err = prom.Wait()
testutil.NotOk(t, err, "")
if exitError, ok := err.(*exec.ExitError); ok {
status := exitError.Sys().(syscall.WaitStatus)
testutil.Equals(t, expectedExitStatus, status.ExitStatus())
} else {
t.Errorf("unable to retrieve the exit status for prometheus: %v", err)
}
}
}

View file

@ -16,18 +16,13 @@ package main
import ( import (
"archive/tar" "archive/tar"
"compress/gzip" "compress/gzip"
"fmt"
"os" "os"
"github.com/pkg/errors"
) )
const filePerm = 0644 const filePerm = 0644
type archiver interface {
write(filename string, b []byte) error
close() error
filename() string
}
type tarGzFileWriter struct { type tarGzFileWriter struct {
tarWriter *tar.Writer tarWriter *tar.Writer
gzWriter *gzip.Writer gzWriter *gzip.Writer
@ -37,7 +32,7 @@ type tarGzFileWriter struct {
func newTarGzFileWriter(archiveName string) (*tarGzFileWriter, error) { func newTarGzFileWriter(archiveName string) (*tarGzFileWriter, error) {
file, err := os.Create(archiveName) file, err := os.Create(archiveName)
if err != nil { if err != nil {
return nil, fmt.Errorf("error creating archive %q: %s", archiveName, err) return nil, errors.Wrapf(err, "error creating archive %q", archiveName)
} }
gzw := gzip.NewWriter(file) gzw := gzip.NewWriter(file)
tw := tar.NewWriter(gzw) tw := tar.NewWriter(gzw)
@ -72,7 +67,3 @@ func (w *tarGzFileWriter) write(filename string, b []byte) error {
} }
return nil return nil
} }
func (w *tarGzFileWriter) filename() string {
return w.file.Name()
}

View file

@ -14,112 +14,56 @@
package main package main
import ( import (
"bytes"
"fmt" "fmt"
"io/ioutil"
"net/http" "net/http"
"os"
"github.com/google/pprof/profile" "github.com/pkg/errors"
) )
type debugWriterConfig struct { type debugWriterConfig struct {
serverURL string serverURL string
tarballName string tarballName string
pathToFileName map[string]string endPointGroups []endpointsGroup
postProcess func(b []byte) ([]byte, error)
} }
type debugWriter struct { func debugWrite(cfg debugWriterConfig) error {
archiver
httpClient
requestToFile map[*http.Request]string
postProcess func(b []byte) ([]byte, error)
}
func newDebugWriter(cfg debugWriterConfig) (*debugWriter, error) {
client, err := newPrometheusHTTPClient(cfg.serverURL)
if err != nil {
return nil, err
}
archiver, err := newTarGzFileWriter(cfg.tarballName) archiver, err := newTarGzFileWriter(cfg.tarballName)
if err != nil { if err != nil {
return nil, err return errors.Wrap(err, "error creating a new archiver")
} }
reqs := make(map[*http.Request]string)
for path, filename := range cfg.pathToFileName { for _, endPointGroup := range cfg.endPointGroups {
req, err := http.NewRequest(http.MethodGet, client.urlJoin(path), nil) for url, filename := range endPointGroup.urlToFilename {
url := cfg.serverURL + url
fmt.Println("collecting:", url)
res, err := http.Get(url)
if err != nil { if err != nil {
return nil, err return errors.Wrap(err, "error executing HTTP request")
} }
reqs[req] = filename body, err := ioutil.ReadAll(res.Body)
} res.Body.Close()
return &debugWriter{
archiver,
client,
reqs,
cfg.postProcess,
}, nil
}
func (w *debugWriter) Write() int {
for req, filename := range w.requestToFile {
_, body, err := w.do(req)
if err != nil { if err != nil {
fmt.Fprintln(os.Stderr, "error executing HTTP request:", err) return errors.Wrap(err, "error reading the response body")
return 1
} }
buf, err := w.postProcess(body) if endPointGroup.postProcess != nil {
body, err = endPointGroup.postProcess(body)
if err != nil { if err != nil {
fmt.Fprintln(os.Stderr, "error post-processing HTTP response body:", err) return errors.Wrap(err, "error post-processing HTTP response body")
return 1
} }
}
if err := w.archiver.write(filename, buf); err != nil { if err := archiver.write(filename, body); err != nil {
fmt.Fprintln(os.Stderr, "error writing into archive:", err) return errors.Wrap(err, "error writing into the archive")
return 1
} }
} }
if err := w.close(); err != nil {
fmt.Fprintln(os.Stderr, "error closing archiver:", err)
return 1
} }
fmt.Printf("Compiling debug information complete, all files written in %q.\n", w.filename()) if err := archiver.close(); err != nil {
return 0 return errors.Wrap(err, "error closing archive writer")
} }
func validate(b []byte) (*profile.Profile, error) { fmt.Printf("Compiling debug information complete, all files written in %q.\n", cfg.tarballName)
p, err := profile.Parse(bytes.NewReader(b)) return nil
if err != nil {
return nil, err
}
return p, nil
}
var pprofPostProcess = func(b []byte) ([]byte, error) {
p, err := validate(b)
if err != nil {
return nil, err
}
var buf bytes.Buffer
if err := p.WriteUncompressed(&buf); err != nil {
return nil, err
}
fmt.Println(p.String())
return buf.Bytes(), nil
}
var metricsPostProcess = func(b []byte) ([]byte, error) {
fmt.Println(string(b))
return b, nil
}
var allPostProcess = func(b []byte) ([]byte, error) {
_, err := validate(b)
if err != nil {
return metricsPostProcess(b)
}
return pprofPostProcess(b)
} }

View file

@ -1,58 +0,0 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"net/http"
"time"
"github.com/prometheus/client_golang/api"
)
const defaultTimeout = 2 * time.Minute
type httpClient interface {
do(req *http.Request) (*http.Response, []byte, error)
urlJoin(path string) string
}
type prometheusHTTPClient struct {
requestTimeout time.Duration
httpClient api.Client
}
func newPrometheusHTTPClient(serverURL string) (*prometheusHTTPClient, error) {
hc, err := api.NewClient(api.Config{
Address: serverURL,
})
if err != nil {
return nil, fmt.Errorf("error creating HTTP client: %s", err)
}
return &prometheusHTTPClient{
requestTimeout: defaultTimeout,
httpClient: hc,
}, nil
}
func (c *prometheusHTTPClient) do(req *http.Request) (*http.Response, []byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), c.requestTimeout)
defer cancel()
return c.httpClient.Do(ctx, req)
}
func (c *prometheusHTTPClient) urlJoin(path string) string {
return c.httpClient.URL(path, nil).String()
}

View file

@ -1,56 +0,0 @@
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import "testing"
func TestURLJoin(t *testing.T) {
testCases := []struct {
inputHost string
inputPath string
expected string
}{
{"http://host", "path", "http://host/path"},
{"http://host", "path/", "http://host/path"},
{"http://host", "/path", "http://host/path"},
{"http://host", "/path/", "http://host/path"},
{"http://host/", "path", "http://host/path"},
{"http://host/", "path/", "http://host/path"},
{"http://host/", "/path", "http://host/path"},
{"http://host/", "/path/", "http://host/path"},
{"https://host", "path", "https://host/path"},
{"https://host", "path/", "https://host/path"},
{"https://host", "/path", "https://host/path"},
{"https://host", "/path/", "https://host/path"},
{"https://host/", "path", "https://host/path"},
{"https://host/", "path/", "https://host/path"},
{"https://host/", "/path", "https://host/path"},
{"https://host/", "/path/", "https://host/path"},
}
for i, c := range testCases {
client, err := newPrometheusHTTPClient(c.inputHost)
if err != nil {
panic(err)
}
actual := client.urlJoin(c.inputPath)
if actual != c.expected {
t.Errorf("Error on case %d: %v(actual) != %v(expected)", i, actual, c.expected)
}
t.Logf("Case %d: %v(actual) == %v(expected)", i, actual, c.expected)
}
}

View file

@ -14,6 +14,7 @@
package main package main
import ( import (
"bytes"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -25,13 +26,15 @@ import (
"strings" "strings"
"time" "time"
"gopkg.in/alecthomas/kingpin.v2" "github.com/google/pprof/profile"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/api" "github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/api/prometheus/v1" v1 "github.com/prometheus/client_golang/api/prometheus/v1"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/common/version" "github.com/prometheus/common/version"
kingpin "gopkg.in/alecthomas/kingpin.v2"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/pkg/rulefmt" "github.com/prometheus/prometheus/pkg/rulefmt"
"github.com/prometheus/prometheus/util/promlint" "github.com/prometheus/prometheus/util/promlint"
@ -199,10 +202,10 @@ func checkConfig(filename string) ([]string, error) {
// If an explicit file was given, error if it is not accessible. // If an explicit file was given, error if it is not accessible.
if !strings.Contains(rf, "*") { if !strings.Contains(rf, "*") {
if len(rfs) == 0 { if len(rfs) == 0 {
return nil, fmt.Errorf("%q does not point to an existing file", rf) return nil, errors.Errorf("%q does not point to an existing file", rf)
} }
if err := checkFileExists(rfs[0]); err != nil { if err := checkFileExists(rfs[0]); err != nil {
return nil, fmt.Errorf("error checking rule file %q: %s", rfs[0], err) return nil, errors.Wrapf(err, "error checking rule file %q", rfs[0])
} }
} }
ruleFiles = append(ruleFiles, rfs...) ruleFiles = append(ruleFiles, rfs...)
@ -210,7 +213,7 @@ func checkConfig(filename string) ([]string, error) {
for _, scfg := range cfg.ScrapeConfigs { for _, scfg := range cfg.ScrapeConfigs {
if err := checkFileExists(scfg.HTTPClientConfig.BearerTokenFile); err != nil { if err := checkFileExists(scfg.HTTPClientConfig.BearerTokenFile); err != nil {
return nil, fmt.Errorf("error checking bearer token file %q: %s", scfg.HTTPClientConfig.BearerTokenFile, err) return nil, errors.Wrapf(err, "error checking bearer token file %q", scfg.HTTPClientConfig.BearerTokenFile)
} }
if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig); err != nil { if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig); err != nil {
@ -218,7 +221,7 @@ func checkConfig(filename string) ([]string, error) {
} }
for _, kd := range scfg.ServiceDiscoveryConfig.KubernetesSDConfigs { for _, kd := range scfg.ServiceDiscoveryConfig.KubernetesSDConfigs {
if err := checkTLSConfig(kd.TLSConfig); err != nil { if err := checkTLSConfig(kd.HTTPClientConfig.TLSConfig); err != nil {
return nil, err return nil, err
} }
} }
@ -244,17 +247,17 @@ func checkConfig(filename string) ([]string, error) {
func checkTLSConfig(tlsConfig config_util.TLSConfig) error { func checkTLSConfig(tlsConfig config_util.TLSConfig) error {
if err := checkFileExists(tlsConfig.CertFile); err != nil { if err := checkFileExists(tlsConfig.CertFile); err != nil {
return fmt.Errorf("error checking client cert file %q: %s", tlsConfig.CertFile, err) return errors.Wrapf(err, "error checking client cert file %q", tlsConfig.CertFile)
} }
if err := checkFileExists(tlsConfig.KeyFile); err != nil { if err := checkFileExists(tlsConfig.KeyFile); err != nil {
return fmt.Errorf("error checking client key file %q: %s", tlsConfig.KeyFile, err) return errors.Wrapf(err, "error checking client key file %q", tlsConfig.KeyFile)
} }
if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 { if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 {
return fmt.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile) return errors.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile)
} }
if len(tlsConfig.KeyFile) > 0 && len(tlsConfig.CertFile) == 0 { if len(tlsConfig.KeyFile) > 0 && len(tlsConfig.CertFile) == 0 {
return fmt.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile) return errors.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile)
} }
return nil return nil
@ -507,64 +510,88 @@ func parseTime(s string) (time.Time, error) {
if t, err := time.Parse(time.RFC3339Nano, s); err == nil { if t, err := time.Parse(time.RFC3339Nano, s); err == nil {
return t, nil return t, nil
} }
return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s) return time.Time{}, errors.Errorf("cannot parse %q to a valid timestamp", s)
} }
func debugPprof(url string) int { type endpointsGroup struct {
w, err := newDebugWriter(debugWriterConfig{ urlToFilename map[string]string
serverURL: url, postProcess func(b []byte) ([]byte, error)
tarballName: "debug.tar.gz", }
pathToFileName: map[string]string{
var (
pprofEndpoints = []endpointsGroup{
{
urlToFilename: map[string]string{
"/debug/pprof/profile?seconds=30": "cpu.pb",
"/debug/pprof/block": "block.pb", "/debug/pprof/block": "block.pb",
"/debug/pprof/goroutine": "goroutine.pb", "/debug/pprof/goroutine": "goroutine.pb",
"/debug/pprof/heap": "heap.pb", "/debug/pprof/heap": "heap.pb",
"/debug/pprof/mutex": "mutex.pb", "/debug/pprof/mutex": "mutex.pb",
"/debug/pprof/threadcreate": "threadcreate.pb", "/debug/pprof/threadcreate": "threadcreate.pb",
}, },
postProcess: pprofPostProcess, postProcess: func(b []byte) ([]byte, error) {
}) p, err := profile.Parse(bytes.NewReader(b))
if err != nil { if err != nil {
fmt.Fprintln(os.Stderr, "error creating debug writer:", err) return nil, err
}
var buf bytes.Buffer
if err := p.WriteUncompressed(&buf); err != nil {
return nil, errors.Wrap(err, "writing the profile to the buffer")
}
return buf.Bytes(), nil
},
},
{
urlToFilename: map[string]string{
"/debug/pprof/trace?seconds=30": "trace.pb",
},
},
}
metricsEndpoints = []endpointsGroup{
{
urlToFilename: map[string]string{
"/metrics": "metrics.txt",
},
},
}
allEndpoints = append(pprofEndpoints, metricsEndpoints...)
)
func debugPprof(url string) int {
if err := debugWrite(debugWriterConfig{
serverURL: url,
tarballName: "debug.tar.gz",
endPointGroups: pprofEndpoints,
}); err != nil {
fmt.Fprintln(os.Stderr, "error completing debug command:", err)
return 1 return 1
} }
return w.Write() return 0
} }
func debugMetrics(url string) int { func debugMetrics(url string) int {
w, err := newDebugWriter(debugWriterConfig{ if err := debugWrite(debugWriterConfig{
serverURL: url, serverURL: url,
tarballName: "debug.tar.gz", tarballName: "debug.tar.gz",
pathToFileName: map[string]string{ endPointGroups: metricsEndpoints,
"/metrics": "metrics.txt", }); err != nil {
}, fmt.Fprintln(os.Stderr, "error completing debug command:", err)
postProcess: metricsPostProcess,
})
if err != nil {
fmt.Fprintln(os.Stderr, "error creating debug writer:", err)
return 1 return 1
} }
return w.Write() return 0
} }
func debugAll(url string) int { func debugAll(url string) int {
w, err := newDebugWriter(debugWriterConfig{ if err := debugWrite(debugWriterConfig{
serverURL: url, serverURL: url,
tarballName: "debug.tar.gz", tarballName: "debug.tar.gz",
pathToFileName: map[string]string{ endPointGroups: allEndpoints,
"/debug/pprof/block": "block.pb", }); err != nil {
"/debug/pprof/goroutine": "goroutine.pb", fmt.Fprintln(os.Stderr, "error completing debug command:", err)
"/debug/pprof/heap": "heap.pb",
"/debug/pprof/mutex": "mutex.pb",
"/debug/pprof/threadcreate": "threadcreate.pb",
"/metrics": "metrics.txt",
},
postProcess: allPostProcess,
})
if err != nil {
fmt.Fprintln(os.Stderr, "error creating debug writer:", err)
return 1 return 1
} }
return w.Write() return 0
} }
type printer interface { type printer interface {
@ -583,7 +610,7 @@ func (p *promqlPrinter) printSeries(val []model.LabelSet) {
fmt.Println(v) fmt.Println(v)
} }
} }
func (j *promqlPrinter) printLabelValues(val model.LabelValues) { func (p *promqlPrinter) printLabelValues(val model.LabelValues) {
for _, v := range val { for _, v := range val {
fmt.Println(v) fmt.Println(v)
} }
@ -592,11 +619,14 @@ func (j *promqlPrinter) printLabelValues(val model.LabelValues) {
type jsonPrinter struct{} type jsonPrinter struct{}
func (j *jsonPrinter) printValue(v model.Value) { func (j *jsonPrinter) printValue(v model.Value) {
//nolint:errcheck
json.NewEncoder(os.Stdout).Encode(v) json.NewEncoder(os.Stdout).Encode(v)
} }
func (j *jsonPrinter) printSeries(v []model.LabelSet) { func (j *jsonPrinter) printSeries(v []model.LabelSet) {
//nolint:errcheck
json.NewEncoder(os.Stdout).Encode(v) json.NewEncoder(os.Stdout).Encode(v)
} }
func (j *jsonPrinter) printLabelValues(v model.LabelValues) { func (j *jsonPrinter) printLabelValues(v model.LabelValues) {
//nolint:errcheck
json.NewEncoder(os.Stdout).Encode(v) json.NewEncoder(os.Stdout).Encode(v)
} }

View file

@ -17,26 +17,27 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"net/url"
"testing" "testing"
"time" "time"
) )
func TestQueryRange(t *testing.T) { func TestQueryRange(t *testing.T) {
s, getURL := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`) s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`)
defer s.Close() defer s.Close()
p := &promqlPrinter{} p := &promqlPrinter{}
exitCode := QueryRange(s.URL, "up", "0", "300", 0, p) exitCode := QueryRange(s.URL, "up", "0", "300", 0, p)
expectedPath := "/api/v1/query_range" expectedPath := "/api/v1/query_range"
if getURL().Path != expectedPath { gotPath := getRequest().URL.Path
t.Errorf("unexpected URL path %s (wanted %s)", getURL().Path, expectedPath) if gotPath != expectedPath {
t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath)
} }
actual := getURL().Query().Get("query") form := getRequest().Form
actual := form.Get("query")
if actual != "up" { if actual != "up" {
t.Errorf("unexpected value %s for query", actual) t.Errorf("unexpected value %s for query", actual)
} }
actual = getURL().Query().Get("step") actual = form.Get("step")
if actual != "1.000" { if actual != "1.000" {
t.Errorf("unexpected value %s for step", actual) t.Errorf("unexpected value %s for step", actual)
} }
@ -45,14 +46,16 @@ func TestQueryRange(t *testing.T) {
} }
exitCode = QueryRange(s.URL, "up", "0", "300", 10*time.Millisecond, p) exitCode = QueryRange(s.URL, "up", "0", "300", 10*time.Millisecond, p)
if getURL().Path != expectedPath { gotPath = getRequest().URL.Path
t.Errorf("unexpected URL path %s (wanted %s)", getURL().Path, expectedPath) if gotPath != expectedPath {
t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath)
} }
actual = getURL().Query().Get("query") form = getRequest().Form
actual = form.Get("query")
if actual != "up" { if actual != "up" {
t.Errorf("unexpected value %s for query", actual) t.Errorf("unexpected value %s for query", actual)
} }
actual = getURL().Query().Get("step") actual = form.Get("step")
if actual != "0.010" { if actual != "0.010" {
t.Errorf("unexpected value %s for step", actual) t.Errorf("unexpected value %s for step", actual)
} }
@ -61,16 +64,17 @@ func TestQueryRange(t *testing.T) {
} }
} }
func mockServer(code int, body string) (*httptest.Server, func() *url.URL) { func mockServer(code int, body string) (*httptest.Server, func() *http.Request) {
var u *url.URL var req *http.Request
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
u = r.URL r.ParseForm()
req = r
w.WriteHeader(code) w.WriteHeader(code)
fmt.Fprintln(w, body) fmt.Fprintln(w, body)
})) }))
f := func() *url.URL { f := func() *http.Request {
return u return req
} }
return server, f return server, f
} }

View file

@ -18,13 +18,16 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath"
"reflect" "reflect"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"time" "time"
"gopkg.in/yaml.v2" "github.com/go-kit/kit/log"
"github.com/pkg/errors"
yaml "gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql"
@ -67,6 +70,9 @@ func ruleUnitTest(filename string) []error {
if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil { if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil {
return []error{err} return []error{err}
} }
if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil {
return []error{err}
}
if unitTestInp.EvaluationInterval == 0 { if unitTestInp.EvaluationInterval == 0 {
unitTestInp.EvaluationInterval = 1 * time.Minute unitTestInp.EvaluationInterval = 1 * time.Minute
@ -84,7 +90,7 @@ func ruleUnitTest(filename string) []error {
groupOrderMap := make(map[string]int) groupOrderMap := make(map[string]int)
for i, gn := range unitTestInp.GroupEvalOrder { for i, gn := range unitTestInp.GroupEvalOrder {
if _, ok := groupOrderMap[gn]; ok { if _, ok := groupOrderMap[gn]; ok {
return []error{fmt.Errorf("Group name repeated in evaluation order: %s", gn)} return []error{errors.Errorf("group name repeated in evaluation order: %s", gn)}
} }
groupOrderMap[gn] = i groupOrderMap[gn] = i
} }
@ -124,6 +130,27 @@ func (utf *unitTestFile) maxEvalTime() time.Duration {
return maxd return maxd
} }
// resolveAndGlobFilepaths joins all relative paths in a configuration
// with a given base directory and replaces all globs with matching files.
func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error {
for i, rf := range utf.RuleFiles {
if rf != "" && !filepath.IsAbs(rf) {
utf.RuleFiles[i] = filepath.Join(baseDir, rf)
}
}
var globbedFiles []string
for _, rf := range utf.RuleFiles {
m, err := filepath.Glob(rf)
if err != nil {
return err
}
globbedFiles = append(globbedFiles, m...)
}
utf.RuleFiles = globbedFiles
return nil
}
// testGroup is a group of input series and tests associated with it. // testGroup is a group of input series and tests associated with it.
type testGroup struct { type testGroup struct {
Interval time.Duration `yaml:"interval"` Interval time.Duration `yaml:"interval"`
@ -135,27 +162,23 @@ type testGroup struct {
// test performs the unit tests. // test performs the unit tests.
func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, groupOrderMap map[string]int, ruleFiles ...string) []error { func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, groupOrderMap map[string]int, ruleFiles ...string) []error {
// Setup testing suite. // Setup testing suite.
suite, err := promql.NewTest(nil, tg.seriesLoadingString()) suite, err := promql.NewLazyLoader(nil, tg.seriesLoadingString())
if err != nil { if err != nil {
return []error{err} return []error{err}
} }
defer suite.Close() defer suite.Close()
err = suite.Run()
if err != nil {
return []error{err}
}
// Load the rule files. // Load the rule files.
opts := &rules.ManagerOptions{ opts := &rules.ManagerOptions{
QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()), QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()),
Appendable: suite.Storage(), Appendable: suite.Storage(),
Context: context.Background(), Context: context.Background(),
NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {},
Logger: &dummyLogger{}, Logger: log.NewNopLogger(),
} }
m := rules.NewManager(opts) m := rules.NewManager(opts)
groupsMap, ers := m.LoadGroups(tg.Interval, ruleFiles...) // TODO(beorn7): Provide a way to pass in external labels.
groupsMap, ers := m.LoadGroups(tg.Interval, nil, ruleFiles...)
if ers != nil { if ers != nil {
return ers return ers
} }
@ -165,14 +188,14 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
// All this preparation is so that we can test alerts as we evaluate the rules. // All this preparation is so that we can test alerts as we evaluate the rules.
// This avoids storing them in memory, as the number of evals might be high. // This avoids storing them in memory, as the number of evals might be high.
// All the `eval_time` for which we have unit tests. // All the `eval_time` for which we have unit tests for alerts.
var alertEvalTimes []time.Duration alertEvalTimesMap := map[time.Duration]struct{}{}
// Map of all the eval_time+alertname combination present in the unit tests. // Map of all the eval_time+alertname combination present in the unit tests.
alertsInTest := make(map[time.Duration]map[string]struct{}) alertsInTest := make(map[time.Duration]map[string]struct{})
// Map of all the unit tests for given eval_time. // Map of all the unit tests for given eval_time.
alertTests := make(map[time.Duration][]alertTestCase) alertTests := make(map[time.Duration][]alertTestCase)
for _, alert := range tg.AlertRuleTests { for _, alert := range tg.AlertRuleTests {
alertEvalTimes = append(alertEvalTimes, alert.EvalTime) alertEvalTimesMap[alert.EvalTime] = struct{}{}
if _, ok := alertsInTest[alert.EvalTime]; !ok { if _, ok := alertsInTest[alert.EvalTime]; !ok {
alertsInTest[alert.EvalTime] = make(map[string]struct{}) alertsInTest[alert.EvalTime] = make(map[string]struct{})
@ -181,6 +204,10 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
alertTests[alert.EvalTime] = append(alertTests[alert.EvalTime], alert) alertTests[alert.EvalTime] = append(alertTests[alert.EvalTime], alert)
} }
alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap))
for k := range alertEvalTimesMap {
alertEvalTimes = append(alertEvalTimes, k)
}
sort.Slice(alertEvalTimes, func(i, j int) bool { sort.Slice(alertEvalTimes, func(i, j int) bool {
return alertEvalTimes[i] < alertEvalTimes[j] return alertEvalTimes[i] < alertEvalTimes[j]
}) })
@ -191,8 +218,23 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
var errs []error var errs []error
for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) { for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) {
// Collects the alerts asked for unit testing. // Collects the alerts asked for unit testing.
suite.WithSamplesTill(ts, func(err error) {
if err != nil {
errs = append(errs, err)
return
}
for _, g := range groups { for _, g := range groups {
g.Eval(suite.Context(), ts) g.Eval(suite.Context(), ts)
for _, r := range g.Rules() {
if r.LastError() != nil {
errs = append(errs, errors.Errorf(" rule: %s, time: %s, err: %v",
r.Name(), ts.Sub(time.Unix(0, 0)), r.LastError()))
}
}
}
})
if len(errs) > 0 {
return errs
} }
for { for {
@ -253,14 +295,14 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou
} }
if gotAlerts.Len() != expAlerts.Len() { if gotAlerts.Len() != expAlerts.Len() {
errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String())) testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String()))
} else { } else {
sort.Sort(gotAlerts) sort.Sort(gotAlerts)
sort.Sort(expAlerts) sort.Sort(expAlerts)
if !reflect.DeepEqual(expAlerts, gotAlerts) { if !reflect.DeepEqual(expAlerts, gotAlerts) {
errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v",
testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String())) testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String()))
} }
} }
@ -276,7 +318,7 @@ Outer:
got, err := query(suite.Context(), testCase.Expr, mint.Add(testCase.EvalTime), got, err := query(suite.Context(), testCase.Expr, mint.Add(testCase.EvalTime),
suite.QueryEngine(), suite.Queryable()) suite.QueryEngine(), suite.Queryable())
if err != nil { if err != nil {
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr, errs = append(errs, errors.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
testCase.EvalTime.String(), err.Error())) testCase.EvalTime.String(), err.Error()))
continue continue
} }
@ -293,7 +335,7 @@ Outer:
for _, s := range testCase.ExpSamples { for _, s := range testCase.ExpSamples {
lb, err := promql.ParseMetric(s.Labels) lb, err := promql.ParseMetric(s.Labels)
if err != nil { if err != nil {
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr, errs = append(errs, errors.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr,
testCase.EvalTime.String(), err.Error())) testCase.EvalTime.String(), err.Error()))
continue Outer continue Outer
} }
@ -303,8 +345,14 @@ Outer:
}) })
} }
sort.Slice(expSamples, func(i, j int) bool {
return labels.Compare(expSamples[i].Labels, expSamples[j].Labels) <= 0
})
sort.Slice(gotSamples, func(i, j int) bool {
return labels.Compare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0
})
if !reflect.DeepEqual(expSamples, gotSamples) { if !reflect.DeepEqual(expSamples, gotSamples) {
errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr, errs = append(errs, errors.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr,
testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples))) testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples)))
} }
} }
@ -383,7 +431,7 @@ func query(ctx context.Context, qs string, t time.Time, engine *promql.Engine, q
Metric: labels.Labels{}, Metric: labels.Labels{},
}}, nil }}, nil
default: default:
return nil, fmt.Errorf("rule result is not a vector or scalar") return nil, errors.New("rule result is not a vector or scalar")
} }
} }
@ -468,9 +516,3 @@ func parsedSamplesString(pss []parsedSample) string {
func (ps *parsedSample) String() string { func (ps *parsedSample) String() string {
return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64) return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64)
} }
type dummyLogger struct{}
func (l *dummyLogger) Log(keyvals ...interface{}) error {
return nil
}

View file

@ -22,15 +22,18 @@ import (
"strings" "strings"
"time" "time"
"github.com/pkg/errors"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
yaml "gopkg.in/yaml.v2"
sd_config "github.com/prometheus/prometheus/discovery/config" sd_config "github.com/prometheus/prometheus/discovery/config"
"gopkg.in/yaml.v2" "github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/pkg/relabel"
) )
var ( var (
patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`)
relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$(?:\{\w+\}|\w+))+\w*)+$`)
) )
// Load parses the YAML input s into a Config. // Load parses the YAML input s into a Config.
@ -57,7 +60,7 @@ func LoadFile(filename string) (*Config, error) {
} }
cfg, err := Load(string(content)) cfg, err := Load(string(content))
if err != nil { if err != nil {
return nil, fmt.Errorf("parsing YAML file %s: %v", filename, err) return nil, errors.Wrapf(err, "parsing YAML file %s", filename)
} }
resolveFilepaths(filepath.Dir(filename), cfg) resolveFilepaths(filepath.Dir(filename), cfg)
return cfg, nil return cfg, nil
@ -84,6 +87,7 @@ var (
MetricsPath: "/metrics", MetricsPath: "/metrics",
Scheme: "http", Scheme: "http",
HonorLabels: false, HonorLabels: false,
HonorTimestamps: true,
} }
// DefaultAlertmanagerConfig is the default alertmanager configuration. // DefaultAlertmanagerConfig is the default alertmanager configuration.
@ -92,14 +96,6 @@ var (
Timeout: model.Duration(10 * time.Second), Timeout: model.Duration(10 * time.Second),
} }
// DefaultRelabelConfig is the default Relabel configuration.
DefaultRelabelConfig = RelabelConfig{
Action: RelabelReplace,
Separator: ";",
Regex: MustNewRegexp("(.*)"),
Replacement: "$1",
}
// DefaultRemoteWriteConfig is the default remote write configuration. // DefaultRemoteWriteConfig is the default remote write configuration.
DefaultRemoteWriteConfig = RemoteWriteConfig{ DefaultRemoteWriteConfig = RemoteWriteConfig{
RemoteTimeout: model.Duration(30 * time.Second), RemoteTimeout: model.Duration(30 * time.Second),
@ -111,15 +107,16 @@ var (
// With a maximum of 1000 shards, assuming an average of 100ms remote write // With a maximum of 1000 shards, assuming an average of 100ms remote write
// time and 100 samples per batch, we will be able to push 1M samples/s. // time and 100 samples per batch, we will be able to push 1M samples/s.
MaxShards: 1000, MaxShards: 1000,
MinShards: 1,
MaxSamplesPerSend: 100, MaxSamplesPerSend: 100,
// By default, buffer 100 batches, which at 100ms per batch is 10s. At // Each shard will have a max of 10 samples pending in it's channel, plus the pending
// 1000 shards, this will buffer 10M samples total. // samples that have been enqueued. Theoretically we should only ever have about 110 samples
Capacity: 100 * 100, // per shard pending. At 1000 shards that's 110k.
Capacity: 10,
BatchSendDeadline: model.Duration(5 * time.Second), BatchSendDeadline: model.Duration(5 * time.Second),
// Max number of times to retry a batch on recoverable errors. // Backoff times for retrying a batch of samples on recoverable errors.
MaxRetries: 3,
MinBackoff: model.Duration(30 * time.Millisecond), MinBackoff: model.Duration(30 * time.Millisecond),
MaxBackoff: model.Duration(100 * time.Millisecond), MaxBackoff: model.Duration(100 * time.Millisecond),
} }
@ -158,30 +155,34 @@ func resolveFilepaths(baseDir string, cfg *Config) {
cfg.RuleFiles[i] = join(rf) cfg.RuleFiles[i] = join(rf)
} }
tlsPaths := func(cfg *config_util.TLSConfig) {
cfg.CAFile = join(cfg.CAFile)
cfg.CertFile = join(cfg.CertFile)
cfg.KeyFile = join(cfg.KeyFile)
}
clientPaths := func(scfg *config_util.HTTPClientConfig) { clientPaths := func(scfg *config_util.HTTPClientConfig) {
if scfg.BasicAuth != nil {
scfg.BasicAuth.PasswordFile = join(scfg.BasicAuth.PasswordFile)
}
scfg.BearerTokenFile = join(scfg.BearerTokenFile) scfg.BearerTokenFile = join(scfg.BearerTokenFile)
scfg.TLSConfig.CAFile = join(scfg.TLSConfig.CAFile) tlsPaths(&scfg.TLSConfig)
scfg.TLSConfig.CertFile = join(scfg.TLSConfig.CertFile)
scfg.TLSConfig.KeyFile = join(scfg.TLSConfig.KeyFile)
} }
sdPaths := func(cfg *sd_config.ServiceDiscoveryConfig) { sdPaths := func(cfg *sd_config.ServiceDiscoveryConfig) {
for _, kcfg := range cfg.KubernetesSDConfigs { for _, kcfg := range cfg.KubernetesSDConfigs {
kcfg.BearerTokenFile = join(kcfg.BearerTokenFile) clientPaths(&kcfg.HTTPClientConfig)
kcfg.TLSConfig.CAFile = join(kcfg.TLSConfig.CAFile)
kcfg.TLSConfig.CertFile = join(kcfg.TLSConfig.CertFile)
kcfg.TLSConfig.KeyFile = join(kcfg.TLSConfig.KeyFile)
} }
for _, mcfg := range cfg.MarathonSDConfigs { for _, mcfg := range cfg.MarathonSDConfigs {
mcfg.AuthTokenFile = join(mcfg.AuthTokenFile) mcfg.AuthTokenFile = join(mcfg.AuthTokenFile)
mcfg.HTTPClientConfig.BearerTokenFile = join(mcfg.HTTPClientConfig.BearerTokenFile) clientPaths(&mcfg.HTTPClientConfig)
mcfg.HTTPClientConfig.TLSConfig.CAFile = join(mcfg.HTTPClientConfig.TLSConfig.CAFile)
mcfg.HTTPClientConfig.TLSConfig.CertFile = join(mcfg.HTTPClientConfig.TLSConfig.CertFile)
mcfg.HTTPClientConfig.TLSConfig.KeyFile = join(mcfg.HTTPClientConfig.TLSConfig.KeyFile)
} }
for _, consulcfg := range cfg.ConsulSDConfigs { for _, consulcfg := range cfg.ConsulSDConfigs {
consulcfg.TLSConfig.CAFile = join(consulcfg.TLSConfig.CAFile) tlsPaths(&consulcfg.TLSConfig)
consulcfg.TLSConfig.CertFile = join(consulcfg.TLSConfig.CertFile) }
consulcfg.TLSConfig.KeyFile = join(consulcfg.TLSConfig.KeyFile) for _, cfg := range cfg.OpenstackSDConfigs {
tlsPaths(&cfg.TLSConfig)
}
for _, cfg := range cfg.TritonSDConfigs {
tlsPaths(&cfg.TLSConfig)
} }
for _, filecfg := range cfg.FileSDConfigs { for _, filecfg := range cfg.FileSDConfigs {
for i, fn := range filecfg.Files { for i, fn := range filecfg.Files {
@ -198,6 +199,12 @@ func resolveFilepaths(baseDir string, cfg *Config) {
clientPaths(&cfg.HTTPClientConfig) clientPaths(&cfg.HTTPClientConfig)
sdPaths(&cfg.ServiceDiscoveryConfig) sdPaths(&cfg.ServiceDiscoveryConfig)
} }
for _, cfg := range cfg.RemoteReadConfigs {
clientPaths(&cfg.HTTPClientConfig)
}
for _, cfg := range cfg.RemoteWriteConfigs {
clientPaths(&cfg.HTTPClientConfig)
}
} }
func (c Config) String() string { func (c Config) String() string {
@ -227,19 +234,22 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
for _, rf := range c.RuleFiles { for _, rf := range c.RuleFiles {
if !patRulePath.MatchString(rf) { if !patRulePath.MatchString(rf) {
return fmt.Errorf("invalid rule file path %q", rf) return errors.Errorf("invalid rule file path %q", rf)
} }
} }
// Do global overrides and validate unique names. // Do global overrides and validate unique names.
jobNames := map[string]struct{}{} jobNames := map[string]struct{}{}
for _, scfg := range c.ScrapeConfigs { for _, scfg := range c.ScrapeConfigs {
if scfg == nil {
return errors.New("empty or null scrape config section")
}
// First set the correct scrape interval, then check that the timeout // First set the correct scrape interval, then check that the timeout
// (inferred or explicit) is not greater than that. // (inferred or explicit) is not greater than that.
if scfg.ScrapeInterval == 0 { if scfg.ScrapeInterval == 0 {
scfg.ScrapeInterval = c.GlobalConfig.ScrapeInterval scfg.ScrapeInterval = c.GlobalConfig.ScrapeInterval
} }
if scfg.ScrapeTimeout > scfg.ScrapeInterval { if scfg.ScrapeTimeout > scfg.ScrapeInterval {
return fmt.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName) return errors.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName)
} }
if scfg.ScrapeTimeout == 0 { if scfg.ScrapeTimeout == 0 {
if c.GlobalConfig.ScrapeTimeout > scfg.ScrapeInterval { if c.GlobalConfig.ScrapeTimeout > scfg.ScrapeInterval {
@ -250,10 +260,20 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error {
} }
if _, ok := jobNames[scfg.JobName]; ok { if _, ok := jobNames[scfg.JobName]; ok {
return fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName) return errors.Errorf("found multiple scrape configs with job name %q", scfg.JobName)
} }
jobNames[scfg.JobName] = struct{}{} jobNames[scfg.JobName] = struct{}{}
} }
for _, rwcfg := range c.RemoteWriteConfigs {
if rwcfg == nil {
return errors.New("empty or null remote write config section")
}
}
for _, rrcfg := range c.RemoteReadConfigs {
if rrcfg == nil {
return errors.New("empty or null remote read config section")
}
}
return nil return nil
} }
@ -267,7 +287,7 @@ type GlobalConfig struct {
// How frequently to evaluate rules by default. // How frequently to evaluate rules by default.
EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"` EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"`
// The labels to add to any timeseries that this Prometheus instance scrapes. // The labels to add to any timeseries that this Prometheus instance scrapes.
ExternalLabels model.LabelSet `yaml:"external_labels,omitempty"` ExternalLabels labels.Labels `yaml:"external_labels,omitempty"`
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -280,13 +300,22 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
for _, l := range gc.ExternalLabels {
if !model.LabelName(l.Name).IsValid() {
return errors.Errorf("%q is not a valid label name", l.Name)
}
if !model.LabelValue(l.Value).IsValid() {
return errors.Errorf("%q is not a valid label value", l.Value)
}
}
// First set the correct scrape interval, then check that the timeout // First set the correct scrape interval, then check that the timeout
// (inferred or explicit) is not greater than that. // (inferred or explicit) is not greater than that.
if gc.ScrapeInterval == 0 { if gc.ScrapeInterval == 0 {
gc.ScrapeInterval = DefaultGlobalConfig.ScrapeInterval gc.ScrapeInterval = DefaultGlobalConfig.ScrapeInterval
} }
if gc.ScrapeTimeout > gc.ScrapeInterval { if gc.ScrapeTimeout > gc.ScrapeInterval {
return fmt.Errorf("global scrape timeout greater than scrape interval") return errors.New("global scrape timeout greater than scrape interval")
} }
if gc.ScrapeTimeout == 0 { if gc.ScrapeTimeout == 0 {
if DefaultGlobalConfig.ScrapeTimeout > gc.ScrapeInterval { if DefaultGlobalConfig.ScrapeTimeout > gc.ScrapeInterval {
@ -316,6 +345,8 @@ type ScrapeConfig struct {
JobName string `yaml:"job_name"` JobName string `yaml:"job_name"`
// Indicator whether the scraped metrics should remain unmodified. // Indicator whether the scraped metrics should remain unmodified.
HonorLabels bool `yaml:"honor_labels,omitempty"` HonorLabels bool `yaml:"honor_labels,omitempty"`
// Indicator whether the scraped timestamps should be respected.
HonorTimestamps bool `yaml:"honor_timestamps"`
// A set of query parameters with which the target is scraped. // A set of query parameters with which the target is scraped.
Params url.Values `yaml:"params,omitempty"` Params url.Values `yaml:"params,omitempty"`
// How frequently to scrape the targets of this scrape config. // How frequently to scrape the targets of this scrape config.
@ -336,9 +367,9 @@ type ScrapeConfig struct {
HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"`
// List of target relabel configurations. // List of target relabel configurations.
RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"` RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"`
// List of metric relabel configurations. // List of metric relabel configurations.
MetricRelabelConfigs []*RelabelConfig `yaml:"metric_relabel_configs,omitempty"` MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"`
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -350,7 +381,7 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if len(c.JobName) == 0 { if len(c.JobName) == 0 {
return fmt.Errorf("job_name is empty") return errors.New("job_name is empty")
} }
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
@ -360,6 +391,13 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
// The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer.
// We cannot make it a pointer as the parser panics for inlined pointer structs.
// Thus we just do its validation here.
if err := c.ServiceDiscoveryConfig.Validate(); err != nil {
return err
}
// Check for users putting URLs in target groups. // Check for users putting URLs in target groups.
if len(c.RelabelConfigs) == 0 { if len(c.RelabelConfigs) == 0 {
for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
@ -371,6 +409,17 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
} }
} }
for _, rlcfg := range c.RelabelConfigs {
if rlcfg == nil {
return errors.New("empty or null target relabeling rule in scrape config")
}
}
for _, rlcfg := range c.MetricRelabelConfigs {
if rlcfg == nil {
return errors.New("empty or null metric relabeling rule in scrape config")
}
}
// Add index to the static config target groups for unique identification // Add index to the static config target groups for unique identification
// within scrape pool. // within scrape pool.
for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
@ -382,7 +431,7 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
// AlertingConfig configures alerting and alertmanager related configs. // AlertingConfig configures alerting and alertmanager related configs.
type AlertingConfig struct { type AlertingConfig struct {
AlertRelabelConfigs []*RelabelConfig `yaml:"alert_relabel_configs,omitempty"` AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"`
AlertmanagerConfigs []*AlertmanagerConfig `yaml:"alertmanagers,omitempty"` AlertmanagerConfigs []*AlertmanagerConfig `yaml:"alertmanagers,omitempty"`
} }
@ -392,7 +441,16 @@ func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error
// by the default due to the YAML parser behavior for empty blocks. // by the default due to the YAML parser behavior for empty blocks.
*c = AlertingConfig{} *c = AlertingConfig{}
type plain AlertingConfig type plain AlertingConfig
return unmarshal((*plain)(c)) if err := unmarshal((*plain)(c)); err != nil {
return err
}
for _, rlcfg := range c.AlertRelabelConfigs {
if rlcfg == nil {
return errors.New("empty or null alert relabeling rule")
}
}
return nil
} }
// AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with.
@ -411,7 +469,7 @@ type AlertmanagerConfig struct {
Timeout model.Duration `yaml:"timeout,omitempty"` Timeout model.Duration `yaml:"timeout,omitempty"`
// List of Alertmanager relabel configurations. // List of Alertmanager relabel configurations.
RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"` RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"`
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -429,6 +487,13 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
return err return err
} }
// The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer.
// We cannot make it a pointer as the parser panics for inlined pointer structs.
// Thus we just do its validation here.
if err := c.ServiceDiscoveryConfig.Validate(); err != nil {
return err
}
// Check for users putting URLs in target groups. // Check for users putting URLs in target groups.
if len(c.RelabelConfigs) == 0 { if len(c.RelabelConfigs) == 0 {
for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
@ -440,6 +505,12 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
} }
} }
for _, rlcfg := range c.RelabelConfigs {
if rlcfg == nil {
return errors.New("empty or null Alertmanager target relabeling rule")
}
}
// Add index to the static config target groups for unique identification // Add index to the static config target groups for unique identification
// within scrape pool. // within scrape pool.
for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs {
@ -453,7 +524,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er
func CheckTargetAddress(address model.LabelValue) error { func CheckTargetAddress(address model.LabelValue) error {
// For now check for a URL, we may want to expand this later. // For now check for a URL, we may want to expand this later.
if strings.Contains(string(address), "/") { if strings.Contains(string(address), "/") {
return fmt.Errorf("%q is not a valid hostname", address) return errors.Errorf("%q is not a valid hostname", address)
} }
return nil return nil
} }
@ -470,151 +541,11 @@ type FileSDConfig struct {
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
} }
// RelabelAction is the action to be performed on relabeling.
type RelabelAction string
const (
// RelabelReplace performs a regex replacement.
RelabelReplace RelabelAction = "replace"
// RelabelKeep drops targets for which the input does not match the regex.
RelabelKeep RelabelAction = "keep"
// RelabelDrop drops targets for which the input does match the regex.
RelabelDrop RelabelAction = "drop"
// RelabelHashMod sets a label to the modulus of a hash of labels.
RelabelHashMod RelabelAction = "hashmod"
// RelabelLabelMap copies labels to other labelnames based on a regex.
RelabelLabelMap RelabelAction = "labelmap"
// RelabelLabelDrop drops any label matching the regex.
RelabelLabelDrop RelabelAction = "labeldrop"
// RelabelLabelKeep drops any label not matching the regex.
RelabelLabelKeep RelabelAction = "labelkeep"
)
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (a *RelabelAction) UnmarshalYAML(unmarshal func(interface{}) error) error {
var s string
if err := unmarshal(&s); err != nil {
return err
}
switch act := RelabelAction(strings.ToLower(s)); act {
case RelabelReplace, RelabelKeep, RelabelDrop, RelabelHashMod, RelabelLabelMap, RelabelLabelDrop, RelabelLabelKeep:
*a = act
return nil
}
return fmt.Errorf("unknown relabel action %q", s)
}
// RelabelConfig is the configuration for relabeling of target label sets.
type RelabelConfig struct {
// A list of labels from which values are taken and concatenated
// with the configured separator in order.
SourceLabels model.LabelNames `yaml:"source_labels,flow,omitempty"`
// Separator is the string between concatenated values from the source labels.
Separator string `yaml:"separator,omitempty"`
// Regex against which the concatenation is matched.
Regex Regexp `yaml:"regex,omitempty"`
// Modulus to take of the hash of concatenated values from the source labels.
Modulus uint64 `yaml:"modulus,omitempty"`
// TargetLabel is the label to which the resulting string is written in a replacement.
// Regexp interpolation is allowed for the replace action.
TargetLabel string `yaml:"target_label,omitempty"`
// Replacement is the regex replacement pattern to be used.
Replacement string `yaml:"replacement,omitempty"`
// Action is the action to be performed for the relabeling.
Action RelabelAction `yaml:"action,omitempty"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
*c = DefaultRelabelConfig
type plain RelabelConfig
if err := unmarshal((*plain)(c)); err != nil {
return err
}
if c.Regex.Regexp == nil {
c.Regex = MustNewRegexp("")
}
if c.Modulus == 0 && c.Action == RelabelHashMod {
return fmt.Errorf("relabel configuration for hashmod requires non-zero modulus")
}
if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" {
return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action)
}
if c.Action == RelabelReplace && !relabelTarget.MatchString(c.TargetLabel) {
return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action)
}
if c.Action == RelabelLabelMap && !relabelTarget.MatchString(c.Replacement) {
return fmt.Errorf("%q is invalid 'replacement' for %s action", c.Replacement, c.Action)
}
if c.Action == RelabelHashMod && !model.LabelName(c.TargetLabel).IsValid() {
return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action)
}
if c.Action == RelabelLabelDrop || c.Action == RelabelLabelKeep {
if c.SourceLabels != nil ||
c.TargetLabel != DefaultRelabelConfig.TargetLabel ||
c.Modulus != DefaultRelabelConfig.Modulus ||
c.Separator != DefaultRelabelConfig.Separator ||
c.Replacement != DefaultRelabelConfig.Replacement {
return fmt.Errorf("%s action requires only 'regex', and no other fields", c.Action)
}
}
return nil
}
// Regexp encapsulates a regexp.Regexp and makes it YAML marshallable.
type Regexp struct {
*regexp.Regexp
original string
}
// NewRegexp creates a new anchored Regexp and returns an error if the
// passed-in regular expression does not compile.
func NewRegexp(s string) (Regexp, error) {
regex, err := regexp.Compile("^(?:" + s + ")$")
return Regexp{
Regexp: regex,
original: s,
}, err
}
// MustNewRegexp works like NewRegexp, but panics if the regular expression does not compile.
func MustNewRegexp(s string) Regexp {
re, err := NewRegexp(s)
if err != nil {
panic(err)
}
return re
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
func (re *Regexp) UnmarshalYAML(unmarshal func(interface{}) error) error {
var s string
if err := unmarshal(&s); err != nil {
return err
}
r, err := NewRegexp(s)
if err != nil {
return err
}
*re = r
return nil
}
// MarshalYAML implements the yaml.Marshaler interface.
func (re Regexp) MarshalYAML() (interface{}, error) {
if re.original != "" {
return re.original, nil
}
return nil, nil
}
// RemoteWriteConfig is the configuration for writing to remote storage. // RemoteWriteConfig is the configuration for writing to remote storage.
type RemoteWriteConfig struct { type RemoteWriteConfig struct {
URL *config_util.URL `yaml:"url"` URL *config_util.URL `yaml:"url"`
RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"`
WriteRelabelConfigs []*RelabelConfig `yaml:"write_relabel_configs,omitempty"` WriteRelabelConfigs []*relabel.Config `yaml:"write_relabel_configs,omitempty"`
// We cannot do proper Go type embedding below as the parser will then parse // We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types. // values arbitrarily into the overflow maps of further-down types.
@ -630,7 +561,12 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return err return err
} }
if c.URL == nil { if c.URL == nil {
return fmt.Errorf("url for remote_write is empty") return errors.New("url for remote_write is empty")
}
for _, rlcfg := range c.WriteRelabelConfigs {
if rlcfg == nil {
return errors.New("empty or null relabeling rule in remote write config")
}
} }
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
@ -648,15 +584,15 @@ type QueueConfig struct {
// Max number of shards, i.e. amount of concurrency. // Max number of shards, i.e. amount of concurrency.
MaxShards int `yaml:"max_shards,omitempty"` MaxShards int `yaml:"max_shards,omitempty"`
// Min number of shards, i.e. amount of concurrency.
MinShards int `yaml:"min_shards,omitempty"`
// Maximum number of samples per send. // Maximum number of samples per send.
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"` MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
// Maximum time sample will wait in buffer. // Maximum time sample will wait in buffer.
BatchSendDeadline model.Duration `yaml:"batch_send_deadline,omitempty"` BatchSendDeadline model.Duration `yaml:"batch_send_deadline,omitempty"`
// Max number of times to retry a batch on recoverable errors.
MaxRetries int `yaml:"max_retries,omitempty"`
// On recoverable errors, backoff exponentially. // On recoverable errors, backoff exponentially.
MinBackoff model.Duration `yaml:"min_backoff,omitempty"` MinBackoff model.Duration `yaml:"min_backoff,omitempty"`
MaxBackoff model.Duration `yaml:"max_backoff,omitempty"` MaxBackoff model.Duration `yaml:"max_backoff,omitempty"`
@ -684,7 +620,7 @@ func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro
return err return err
} }
if c.URL == nil { if c.URL == nil {
return fmt.Errorf("url for remote_read is empty") return errors.New("url for remote_read is empty")
} }
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
// We cannot make it a pointer as the parser panics for inlined pointer structs. // We cannot make it a pointer as the parser panics for inlined pointer structs.

View file

@ -23,7 +23,13 @@ import (
"testing" "testing"
"time" "time"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery/azure" "github.com/prometheus/prometheus/discovery/azure"
sd_config "github.com/prometheus/prometheus/discovery/config"
"github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/consul"
"github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/dns"
"github.com/prometheus/prometheus/discovery/ec2" "github.com/prometheus/prometheus/discovery/ec2"
@ -34,12 +40,9 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/discovery/triton" "github.com/prometheus/prometheus/discovery/triton"
"github.com/prometheus/prometheus/discovery/zookeeper" "github.com/prometheus/prometheus/discovery/zookeeper"
"github.com/prometheus/prometheus/pkg/labels"
config_util "github.com/prometheus/common/config" "github.com/prometheus/prometheus/pkg/relabel"
"github.com/prometheus/common/model"
sd_config "github.com/prometheus/prometheus/discovery/config"
"github.com/prometheus/prometheus/util/testutil" "github.com/prometheus/prometheus/util/testutil"
"gopkg.in/yaml.v2"
) )
func mustParseURL(u string) *config_util.URL { func mustParseURL(u string) *config_util.URL {
@ -56,9 +59,9 @@ var expectedConf = &Config{
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
EvaluationInterval: model.Duration(30 * time.Second), EvaluationInterval: model.Duration(30 * time.Second),
ExternalLabels: model.LabelSet{ ExternalLabels: labels.Labels{
"monitor": "codelab", {Name: "foo", Value: "bar"},
"foo": "bar", {Name: "monitor", Value: "codelab"},
}, },
}, },
@ -71,13 +74,13 @@ var expectedConf = &Config{
{ {
URL: mustParseURL("http://remote1/push"), URL: mustParseURL("http://remote1/push"),
RemoteTimeout: model.Duration(30 * time.Second), RemoteTimeout: model.Duration(30 * time.Second),
WriteRelabelConfigs: []*RelabelConfig{ WriteRelabelConfigs: []*relabel.Config{
{ {
SourceLabels: model.LabelNames{"__name__"}, SourceLabels: model.LabelNames{"__name__"},
Separator: ";", Separator: ";",
Regex: MustNewRegexp("expensive.*"), Regex: relabel.MustNewRegexp("expensive.*"),
Replacement: "$1", Replacement: "$1",
Action: RelabelDrop, Action: relabel.Drop,
}, },
}, },
QueueConfig: DefaultQueueConfig, QueueConfig: DefaultQueueConfig,
@ -86,6 +89,12 @@ var expectedConf = &Config{
URL: mustParseURL("http://remote2/push"), URL: mustParseURL("http://remote2/push"),
RemoteTimeout: model.Duration(30 * time.Second), RemoteTimeout: model.Duration(30 * time.Second),
QueueConfig: DefaultQueueConfig, QueueConfig: DefaultQueueConfig,
HTTPClientConfig: config_util.HTTPClientConfig{
TLSConfig: config_util.TLSConfig{
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
},
},
}, },
}, },
@ -100,6 +109,12 @@ var expectedConf = &Config{
RemoteTimeout: model.Duration(1 * time.Minute), RemoteTimeout: model.Duration(1 * time.Minute),
ReadRecent: false, ReadRecent: false,
RequiredMatchers: model.LabelSet{"job": "special"}, RequiredMatchers: model.LabelSet{"job": "special"},
HTTPClientConfig: config_util.HTTPClientConfig{
TLSConfig: config_util.TLSConfig{
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
},
},
}, },
}, },
@ -108,6 +123,7 @@ var expectedConf = &Config{
JobName: "prometheus", JobName: "prometheus",
HonorLabels: true, HonorLabels: true,
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -145,33 +161,33 @@ var expectedConf = &Config{
}, },
}, },
RelabelConfigs: []*RelabelConfig{ RelabelConfigs: []*relabel.Config{
{ {
SourceLabels: model.LabelNames{"job", "__meta_dns_name"}, SourceLabels: model.LabelNames{"job", "__meta_dns_name"},
TargetLabel: "job", TargetLabel: "job",
Separator: ";", Separator: ";",
Regex: MustNewRegexp("(.*)some-[regex]"), Regex: relabel.MustNewRegexp("(.*)some-[regex]"),
Replacement: "foo-${1}", Replacement: "foo-${1}",
Action: RelabelReplace, Action: relabel.Replace,
}, { }, {
SourceLabels: model.LabelNames{"abc"}, SourceLabels: model.LabelNames{"abc"},
TargetLabel: "cde", TargetLabel: "cde",
Separator: ";", Separator: ";",
Regex: DefaultRelabelConfig.Regex, Regex: relabel.DefaultRelabelConfig.Regex,
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelReplace, Action: relabel.Replace,
}, { }, {
TargetLabel: "abc", TargetLabel: "abc",
Separator: ";", Separator: ";",
Regex: DefaultRelabelConfig.Regex, Regex: relabel.DefaultRelabelConfig.Regex,
Replacement: "static", Replacement: "static",
Action: RelabelReplace, Action: relabel.Replace,
}, { }, {
TargetLabel: "abc", TargetLabel: "abc",
Separator: ";", Separator: ";",
Regex: MustNewRegexp(""), Regex: relabel.MustNewRegexp(""),
Replacement: "static", Replacement: "static",
Action: RelabelReplace, Action: relabel.Replace,
}, },
}, },
}, },
@ -179,6 +195,7 @@ var expectedConf = &Config{
JobName: "service-x", JobName: "service-x",
HonorTimestamps: true,
ScrapeInterval: model.Duration(50 * time.Second), ScrapeInterval: model.Duration(50 * time.Second),
ScrapeTimeout: model.Duration(5 * time.Second), ScrapeTimeout: model.Duration(5 * time.Second),
SampleLimit: 1000, SampleLimit: 1000,
@ -212,62 +229,63 @@ var expectedConf = &Config{
}, },
}, },
RelabelConfigs: []*RelabelConfig{ RelabelConfigs: []*relabel.Config{
{ {
SourceLabels: model.LabelNames{"job"}, SourceLabels: model.LabelNames{"job"},
Regex: MustNewRegexp("(.*)some-[regex]"), Regex: relabel.MustNewRegexp("(.*)some-[regex]"),
Separator: ";", Separator: ";",
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelDrop, Action: relabel.Drop,
}, },
{ {
SourceLabels: model.LabelNames{"__address__"}, SourceLabels: model.LabelNames{"__address__"},
TargetLabel: "__tmp_hash", TargetLabel: "__tmp_hash",
Regex: DefaultRelabelConfig.Regex, Regex: relabel.DefaultRelabelConfig.Regex,
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Modulus: 8, Modulus: 8,
Separator: ";", Separator: ";",
Action: RelabelHashMod, Action: relabel.HashMod,
}, },
{ {
SourceLabels: model.LabelNames{"__tmp_hash"}, SourceLabels: model.LabelNames{"__tmp_hash"},
Regex: MustNewRegexp("1"), Regex: relabel.MustNewRegexp("1"),
Separator: ";", Separator: ";",
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelKeep, Action: relabel.Keep,
}, },
{ {
Regex: MustNewRegexp("1"), Regex: relabel.MustNewRegexp("1"),
Separator: ";", Separator: ";",
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelLabelMap, Action: relabel.LabelMap,
}, },
{ {
Regex: MustNewRegexp("d"), Regex: relabel.MustNewRegexp("d"),
Separator: ";", Separator: ";",
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelLabelDrop, Action: relabel.LabelDrop,
}, },
{ {
Regex: MustNewRegexp("k"), Regex: relabel.MustNewRegexp("k"),
Separator: ";", Separator: ";",
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelLabelKeep, Action: relabel.LabelKeep,
}, },
}, },
MetricRelabelConfigs: []*RelabelConfig{ MetricRelabelConfigs: []*relabel.Config{
{ {
SourceLabels: model.LabelNames{"__name__"}, SourceLabels: model.LabelNames{"__name__"},
Regex: MustNewRegexp("expensive_metric.*"), Regex: relabel.MustNewRegexp("expensive_metric.*"),
Separator: ";", Separator: ";",
Replacement: DefaultRelabelConfig.Replacement, Replacement: relabel.DefaultRelabelConfig.Replacement,
Action: RelabelDrop, Action: relabel.Drop,
}, },
}, },
}, },
{ {
JobName: "service-y", JobName: "service-y",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -280,7 +298,7 @@ var expectedConf = &Config{
Server: "localhost:1234", Server: "localhost:1234",
Token: "mysecret", Token: "mysecret",
Services: []string{"nginx", "cache", "mysql"}, Services: []string{"nginx", "cache", "mysql"},
ServiceTag: "canary", ServiceTags: []string{"canary", "v1"},
NodeMeta: map[string]string{"rack": "123"}, NodeMeta: map[string]string{"rack": "123"},
TagSeparator: consul.DefaultSDConfig.TagSeparator, TagSeparator: consul.DefaultSDConfig.TagSeparator,
Scheme: "https", Scheme: "https",
@ -296,20 +314,21 @@ var expectedConf = &Config{
}, },
}, },
RelabelConfigs: []*RelabelConfig{ RelabelConfigs: []*relabel.Config{
{ {
SourceLabels: model.LabelNames{"__meta_sd_consul_tags"}, SourceLabels: model.LabelNames{"__meta_sd_consul_tags"},
Regex: MustNewRegexp("label:([^=]+)=([^,]+)"), Regex: relabel.MustNewRegexp("label:([^=]+)=([^,]+)"),
Separator: ",", Separator: ",",
TargetLabel: "${1}", TargetLabel: "${1}",
Replacement: "${2}", Replacement: "${2}",
Action: RelabelReplace, Action: relabel.Replace,
}, },
}, },
}, },
{ {
JobName: "service-z", JobName: "service-z",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: model.Duration(10 * time.Second), ScrapeTimeout: model.Duration(10 * time.Second),
@ -328,6 +347,7 @@ var expectedConf = &Config{
{ {
JobName: "service-kubernetes", JobName: "service-kubernetes",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -339,10 +359,16 @@ var expectedConf = &Config{
{ {
APIServer: kubernetesSDHostURL(), APIServer: kubernetesSDHostURL(),
Role: kubernetes.RoleEndpoint, Role: kubernetes.RoleEndpoint,
HTTPClientConfig: config_util.HTTPClientConfig{
BasicAuth: &config_util.BasicAuth{ BasicAuth: &config_util.BasicAuth{
Username: "myusername", Username: "myusername",
Password: "mysecret", Password: "mysecret",
}, },
TLSConfig: config_util.TLSConfig{
CertFile: filepath.FromSlash("testdata/valid_cert_file"),
KeyFile: filepath.FromSlash("testdata/valid_key_file"),
},
},
NamespaceDiscovery: kubernetes.NamespaceDiscovery{}, NamespaceDiscovery: kubernetes.NamespaceDiscovery{},
}, },
}, },
@ -351,11 +377,18 @@ var expectedConf = &Config{
{ {
JobName: "service-kubernetes-namespaces", JobName: "service-kubernetes-namespaces",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
MetricsPath: DefaultScrapeConfig.MetricsPath, MetricsPath: DefaultScrapeConfig.MetricsPath,
Scheme: DefaultScrapeConfig.Scheme, Scheme: DefaultScrapeConfig.Scheme,
HTTPClientConfig: config_util.HTTPClientConfig{
BasicAuth: &config_util.BasicAuth{
Username: "myusername",
PasswordFile: filepath.FromSlash("testdata/valid_password_file"),
},
},
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
KubernetesSDConfigs: []*kubernetes.SDConfig{ KubernetesSDConfigs: []*kubernetes.SDConfig{
@ -374,6 +407,7 @@ var expectedConf = &Config{
{ {
JobName: "service-marathon", JobName: "service-marathon",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -401,6 +435,7 @@ var expectedConf = &Config{
{ {
JobName: "service-ec2", JobName: "service-ec2",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -433,6 +468,7 @@ var expectedConf = &Config{
{ {
JobName: "service-azure", JobName: "service-azure",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -447,6 +483,7 @@ var expectedConf = &Config{
TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2",
ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C",
ClientSecret: "mysecret", ClientSecret: "mysecret",
AuthenticationMethod: "OAuth",
RefreshInterval: model.Duration(5 * time.Minute), RefreshInterval: model.Duration(5 * time.Minute),
Port: 9100, Port: 9100,
}, },
@ -456,6 +493,7 @@ var expectedConf = &Config{
{ {
JobName: "service-nerve", JobName: "service-nerve",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -475,6 +513,7 @@ var expectedConf = &Config{
{ {
JobName: "0123service-xxx", JobName: "0123service-xxx",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -492,9 +531,31 @@ var expectedConf = &Config{
}, },
}, },
}, },
{
JobName: "badfederation",
HonorTimestamps: false,
ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
MetricsPath: "/federate",
Scheme: DefaultScrapeConfig.Scheme,
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
StaticConfigs: []*targetgroup.Group{
{
Targets: []model.LabelSet{
{model.AddressLabel: "localhost:9090"},
},
Source: "0",
},
},
},
},
{ {
JobName: "測試", JobName: "測試",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -515,6 +576,7 @@ var expectedConf = &Config{
{ {
JobName: "service-triton", JobName: "service-triton",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -542,6 +604,7 @@ var expectedConf = &Config{
{ {
JobName: "service-openstack", JobName: "service-openstack",
HonorTimestamps: true,
ScrapeInterval: model.Duration(15 * time.Second), ScrapeInterval: model.Duration(15 * time.Second),
ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout,
@ -556,9 +619,9 @@ var expectedConf = &Config{
Port: 80, Port: 80,
RefreshInterval: model.Duration(60 * time.Second), RefreshInterval: model.Duration(60 * time.Second),
TLSConfig: config_util.TLSConfig{ TLSConfig: config_util.TLSConfig{
CAFile: "valid_ca_file", CAFile: "testdata/valid_ca_file",
CertFile: "valid_cert_file", CertFile: "testdata/valid_cert_file",
KeyFile: "valid_key_file", KeyFile: "testdata/valid_key_file",
}, },
}, },
}, },
@ -598,10 +661,10 @@ func TestLoadConfig(t *testing.T) {
testutil.Ok(t, err) testutil.Ok(t, err)
expectedConf.original = c.original expectedConf.original = c.original
testutil.Equals(t, expectedConf, c) assert.Equal(t, expectedConf, c)
} }
// YAML marshalling must not reveal authentication credentials. // YAML marshaling must not reveal authentication credentials.
func TestElideSecrets(t *testing.T) { func TestElideSecrets(t *testing.T) {
c, err := LoadFile("testdata/conf.good.yml") c, err := LoadFile("testdata/conf.good.yml")
testutil.Ok(t, err) testutil.Ok(t, err)
@ -627,6 +690,11 @@ func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) {
testutil.Equals(t, ruleFilesExpectedConf, c) testutil.Equals(t, ruleFilesExpectedConf, c)
} }
func TestKubernetesEmptyAPIServer(t *testing.T) {
_, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml")
testutil.Ok(t, err)
}
var expectedErrors = []struct { var expectedErrors = []struct {
filename string filename string
errMsg string errMsg string
@ -646,6 +714,9 @@ var expectedErrors = []struct {
}, { }, {
filename: "labelname2.bad.yml", filename: "labelname2.bad.yml",
errMsg: `"not:allowed" is not a valid label name`, errMsg: `"not:allowed" is not a valid label name`,
}, {
filename: "labelvalue.bad.yml",
errMsg: `"\xff" is not a valid label value`,
}, { }, {
filename: "regex.bad.yml", filename: "regex.bad.yml",
errMsg: "error parsing regexp", errMsg: "error parsing regexp",
@ -697,6 +768,9 @@ var expectedErrors = []struct {
}, { }, {
filename: "bearertoken_basicauth.bad.yml", filename: "bearertoken_basicauth.bad.yml",
errMsg: "at most one of basic_auth, bearer_token & bearer_token_file must be configured", errMsg: "at most one of basic_auth, bearer_token & bearer_token_file must be configured",
}, {
filename: "kubernetes_http_config_without_api_server.bad.yml",
errMsg: "to use custom HTTP client configuration please provide the 'api_server' URL explicitly",
}, { }, {
filename: "kubernetes_bearertoken.bad.yml", filename: "kubernetes_bearertoken.bad.yml",
errMsg: "at most one of bearer_token & bearer_token_file must be configured", errMsg: "at most one of bearer_token & bearer_token_file must be configured",
@ -751,6 +825,62 @@ var expectedErrors = []struct {
filename: "section_key_dup.bad.yml", filename: "section_key_dup.bad.yml",
errMsg: "field scrape_configs already set in type config.plain", errMsg: "field scrape_configs already set in type config.plain",
}, },
{
filename: "azure_client_id_missing.bad.yml",
errMsg: "azure SD configuration requires a client_id",
},
{
filename: "azure_client_secret_missing.bad.yml",
errMsg: "azure SD configuration requires a client_secret",
},
{
filename: "azure_subscription_id_missing.bad.yml",
errMsg: "azure SD configuration requires a subscription_id",
},
{
filename: "azure_tenant_id_missing.bad.yml",
errMsg: "azure SD configuration requires a tenant_id",
},
{
filename: "azure_authentication_method.bad.yml",
errMsg: "unknown authentication_type \"invalid\". Supported types are \"OAuth\" or \"ManagedIdentity\"",
},
{
filename: "empty_scrape_config.bad.yml",
errMsg: "empty or null scrape config section",
},
{
filename: "empty_rw_config.bad.yml",
errMsg: "empty or null remote write config section",
},
{
filename: "empty_rr_config.bad.yml",
errMsg: "empty or null remote read config section",
},
{
filename: "empty_target_relabel_config.bad.yml",
errMsg: "empty or null target relabeling rule",
},
{
filename: "empty_metric_relabel_config.bad.yml",
errMsg: "empty or null metric relabeling rule",
},
{
filename: "empty_alert_relabel_config.bad.yml",
errMsg: "empty or null alert relabeling rule",
},
{
filename: "empty_alertmanager_relabel_config.bad.yml",
errMsg: "empty or null Alertmanager target relabeling rule",
},
{
filename: "empty_rw_relabel_config.bad.yml",
errMsg: "empty or null relabeling rule in remote write config",
},
{
filename: "empty_static_config.bad.yml",
errMsg: "empty or null section in static_configs",
},
} }
func TestBadConfigs(t *testing.T) { func TestBadConfigs(t *testing.T) {
@ -793,33 +923,6 @@ func TestEmptyGlobalBlock(t *testing.T) {
testutil.Equals(t, exp, *c) testutil.Equals(t, exp, *c)
} }
func TestTargetLabelValidity(t *testing.T) {
tests := []struct {
str string
valid bool
}{
{"-label", false},
{"label", true},
{"label${1}", true},
{"${1}label", true},
{"${1}", true},
{"${1}label", true},
{"${", false},
{"$", false},
{"${}", false},
{"foo${", false},
{"$1", true},
{"asd$2asd", true},
{"-foo${1}bar-", false},
{"_${1}_", true},
{"foo${bar}foo", true},
}
for _, test := range tests {
testutil.Assert(t, relabelTarget.Match([]byte(test.str)) == test.valid,
"Expected %q to be %v", test.str, test.valid)
}
}
func kubernetesSDHostURL() config_util.URL { func kubernetesSDHostURL() config_util.URL {
tURL, _ := url.Parse("https://localhost:1234") tURL, _ := url.Parse("https://localhost:1234")
return config_util.URL{URL: tURL} return config_util.URL{URL: tURL}

View file

@ -0,0 +1,4 @@
scrape_configs:
- azure_sd_configs:
- authentication_method: invalid
subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: azure
azure_sd_configs:
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
client_id:
client_secret: mysecret

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: azure
azure_sd_configs:
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
client_secret:

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: azure
azure_sd_configs:
- subscription_id:
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
client_secret: mysecret

View file

@ -0,0 +1,7 @@
scrape_configs:
- job_name: azure
azure_sd_configs:
- subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
tenant_id:
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
client_secret: mysecret

View file

@ -19,6 +19,9 @@ remote_write:
regex: expensive.* regex: expensive.*
action: drop action: drop
- url: http://remote2/push - url: http://remote2/push
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file
remote_read: remote_read:
- url: http://remote1/read - url: http://remote1/read
@ -27,6 +30,9 @@ remote_read:
read_recent: false read_recent: false
required_matchers: required_matchers:
job: special job: special
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file
scrape_configs: scrape_configs:
- job_name: prometheus - job_name: prometheus
@ -122,7 +128,7 @@ scrape_configs:
- server: 'localhost:1234' - server: 'localhost:1234'
token: mysecret token: mysecret
services: ['nginx', 'cache', 'mysql'] services: ['nginx', 'cache', 'mysql']
tag: "canary" tags: ["canary", "v1"]
node_meta: node_meta:
rack: "123" rack: "123"
allow_stale: true allow_stale: true
@ -153,6 +159,9 @@ scrape_configs:
kubernetes_sd_configs: kubernetes_sd_configs:
- role: endpoints - role: endpoints
api_server: 'https://localhost:1234' api_server: 'https://localhost:1234'
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file
basic_auth: basic_auth:
username: 'myusername' username: 'myusername'
@ -167,6 +176,11 @@ scrape_configs:
names: names:
- default - default
basic_auth:
username: 'myusername'
password_file: valid_password_file
- job_name: service-marathon - job_name: service-marathon
marathon_sd_configs: marathon_sd_configs:
- servers: - servers:
@ -196,6 +210,7 @@ scrape_configs:
- job_name: service-azure - job_name: service-azure
azure_sd_configs: azure_sd_configs:
- environment: AzurePublicCloud - environment: AzurePublicCloud
authentication_method: OAuth
subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
@ -215,6 +230,13 @@ scrape_configs:
- targets: - targets:
- localhost:9090 - localhost:9090
- job_name: badfederation
honor_timestamps: false
metrics_path: /federate
static_configs:
- targets:
- localhost:9090
- job_name: 測試 - job_name: 測試
metrics_path: /metrics metrics_path: /metrics
static_configs: static_configs:
@ -230,8 +252,8 @@ scrape_configs:
refresh_interval: 1m refresh_interval: 1m
version: 1 version: 1
tls_config: tls_config:
cert_file: testdata/valid_cert_file cert_file: valid_cert_file
key_file: testdata/valid_key_file key_file: valid_key_file
- job_name: service-openstack - job_name: service-openstack
openstack_sd_configs: openstack_sd_configs:

View file

@ -0,0 +1,3 @@
alerting:
alert_relabel_configs:
-

View file

@ -0,0 +1,4 @@
alerting:
alertmanagers:
- relabel_configs:
-

View file

@ -0,0 +1,4 @@
scrape_configs:
- job_name: "test"
metric_relabel_configs:
-

View file

@ -0,0 +1,2 @@
remote_read:
-

View file

@ -0,0 +1,2 @@
remote_write:
-

View file

@ -0,0 +1,4 @@
remote_write:
- url: "foo"
write_relabel_configs:
-

View file

@ -0,0 +1,2 @@
scrape_configs:
-

View file

@ -0,0 +1,4 @@
scrape_configs:
- job_name: "test"
static_configs:
-

View file

@ -0,0 +1,4 @@
scrape_configs:
- job_name: "test"
relabel_configs:
-

View file

@ -0,0 +1,4 @@
scrape_configs:
- job_name: prometheus
kubernetes_sd_configs:
- role: endpoints

View file

@ -0,0 +1,5 @@
scrape_configs:
- job_name: prometheus
kubernetes_sd_configs:
- role: pod
bearer_token: 1234

3
config/testdata/labelvalue.bad.yml vendored Normal file
View file

@ -0,0 +1,3 @@
global:
external_labels:
name: !!binary "/w=="

View file

@ -2,33 +2,37 @@
{{/* Navbar, should be passed . */}} {{/* Navbar, should be passed . */}}
{{ define "navbar" }} {{ define "navbar" }}
<nav class="navbar navbar-inverse navbar-static-top"> <nav class="navbar fixed-top navbar-expand-sm navbar-dark bg-dark">
<div class="container-fluid"> <div class="container-fluid">
<!-- Brand and toggle get grouped for better mobile display --> <!-- Brand and toggle get grouped for better mobile display -->
<div class="navbar-header"> <div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> <button type="button" class="navbar-toggler" data-toggle="collapse" data-target="#bs-example-navbar-collapse-1" aria-expanded="false" aria-controls="navbar-nav" aria-label="toggle navigation">
<span class="sr-only">Toggle navigation</span> <span class="navbar-toggler-icon"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button> </button>
<a class="navbar-brand" href="{{ pathPrefix }}/">Prometheus</a> <a class="navbar-brand" href="{{ pathPrefix }}/">Prometheus</a>
</div> </div>
<div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1"> <div class="collapse navbar-collapse" id="bs-example-navbar-collapse-1">
<ul class="nav navbar-nav"> <ul class="nav navbar-nav">
<li><a href="{{ pathPrefix }}/alerts">Alerts</a></li> <li class="nav-item"><a class="nav-link" href="{{ pathPrefix }}/alerts">Alerts</a></li>
<li><a href="https://www.pagerduty.com/">PagerDuty</a></li> <li class="nav-item"><a class="nav-link" href="https://www.pagerduty.com/">PagerDuty</a></li>
</div>
</ul> </ul>
</div> </div>
</div>
</nav> </nav>
{{ end }} {{ end }}
{{/* LHS menu, should be passed . */}} {{/* LHS menu, should be passed . */}}
{{ define "menu" }} {{ define "menu" }}
<div class="prom_lhs_menu"> <div class="prom_lhs_menu row">
<ul> <nav class="col-md-2 md-block bg-dark sidebar prom_lhs_menu_nav">
<div class="sidebar-sticky">
<ul class="nav flex-column">
{{ template "_menuItem" (args . "index.html.example" "Overview") }} {{ template "_menuItem" (args . "index.html.example" "Overview") }}
{{ if query "up{job='node'}" }} {{ if query "up{job='node'}" }}
@ -36,15 +40,15 @@
{{ if match "^node" .Path }} {{ if match "^node" .Path }}
{{ if .Params.instance }} {{ if .Params.instance }}
<ul> <ul>
<li {{ if eq .Path "node-overview.html" }}class="prom_lhs_menu_selected"{{ end }}> <li {{ if eq .Path "node-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a href="node-overview.html?instance={{ .Params.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</a> <a class="nav-link" href="node-overview.html?instance={{ .Params.instance }}">{{ reReplaceAll "(.*?://)([^:/]+?)(:\\d+)?/.*" "$2" .Params.instance }}</a>
</li> </li>
<ul> <ul>
<li {{ if eq .Path "node-cpu.html" }}class="prom_lhs_menu_selected"{{ end }}> <li {{ if eq .Path "node-cpu.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a href="node-cpu.html?instance={{ .Params.instance }}">CPU</a> <a class="nav-link" href="node-cpu.html?instance={{ .Params.instance }}">CPU</a>
</li> </li>
<li {{ if eq .Path "node-disk.html" }}class="prom_lhs_menu_selected"{{ end }}> <li {{ if eq .Path "node-disk.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a href="node-disk.html?instance={{ .Params.instance }}">Disk</a> <a class="nav-link" href="node-disk.html?instance={{ .Params.instance }}">Disk</a>
</li> </li>
</ul> </ul>
</ul> </ul>
@ -57,8 +61,8 @@
{{ if match "^prometheus" .Path }} {{ if match "^prometheus" .Path }}
{{ if .Params.instance }} {{ if .Params.instance }}
<ul> <ul>
<li {{ if eq .Path "prometheus-overview.html" }}class="prom_lhs_menu_selected"{{ end }}> <li {{ if eq .Path "prometheus-overview.html" }}class="prom_lhs_menu_selected nav-item"{{ end }}>
<a href="prometheus-overview.html?instance={{ .Params.instance }}">{{.Params.instance }}</a> <a class="nav-link" href="prometheus-overview.html?instance={{ .Params.instance }}">{{.Params.instance }}</a>
</li> </li>
</ul> </ul>
{{ end }} {{ end }}
@ -67,10 +71,12 @@
</ul> </ul>
</div> </div>
</nav>
</div>
{{ end }} {{ end }}
{{/* Helper, pass (args . path name) */}} {{/* Helper, pass (args . path name) */}}
{{ define "_menuItem" }} {{ define "_menuItem" }}
<li {{ if eq .arg0.Path .arg1 }} class="prom_lhs_menu_selected" {{ end }}><a href="{{ .arg1 }}">{{ .arg2 }}</a></li> <li {{ if eq .arg0.Path .arg1 }} class="prom_lhs_menu_selected nav-item" {{ end }}><a class="nav-link" href="{{ .arg1 }}">{{ .arg2 }}</a></li>
{{ end }} {{ end }}

View file

@ -2,13 +2,15 @@
{{/* Load Prometheus console library JS/CSS. Should go in <head> */}} {{/* Load Prometheus console library JS/CSS. Should go in <head> */}}
{{ define "prom_console_head" }} {{ define "prom_console_head" }}
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/rickshaw/rickshaw.min.css"> <link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/rickshaw/rickshaw.min.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/bootstrap-3.3.1/css/bootstrap.min.css"> <link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/bootstrap-4.3.1/css/bootstrap.min.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/css/prom_console.css"> <link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/css/prom_console.css">
<link type="text/css" rel="stylesheet" href="{{ pathPrefix }}/static/vendor/bootstrap4-glyphicons/css/bootstrap-glyphicons.min.css">
<script src="{{ pathPrefix }}/static/vendor/rickshaw/vendor/d3.v3.js"></script> <script src="{{ pathPrefix }}/static/vendor/rickshaw/vendor/d3.v3.js"></script>
<script src="{{ pathPrefix }}/static/vendor/rickshaw/vendor/d3.layout.min.js"></script> <script src="{{ pathPrefix }}/static/vendor/rickshaw/vendor/d3.layout.min.js"></script>
<script src="{{ pathPrefix }}/static/vendor/rickshaw/rickshaw.min.js"></script> <script src="{{ pathPrefix }}/static/vendor/rickshaw/rickshaw.min.js"></script>
<script src="{{ pathPrefix }}/static/vendor/js/jquery.min.js"></script> <script src="{{ pathPrefix }}/static/vendor/js/jquery-3.3.1.min.js"></script>
<script src="{{ pathPrefix }}/static/vendor/bootstrap-3.3.1/js/bootstrap.min.js"></script> <script src="{{ pathPrefix }}/static/vendor/js/popper.min.js"></script>
<script src="{{ pathPrefix }}/static/vendor/bootstrap-4.3.1/js/bootstrap.min.js"></script>
<script> <script>
var PATH_PREFIX = "{{ pathPrefix }}"; var PATH_PREFIX = "{{ pathPrefix }}";
@ -17,13 +19,15 @@ var PATH_PREFIX = "{{ pathPrefix }}";
{{ end }} {{ end }}
{{/* Top of all pages. */}} {{/* Top of all pages. */}}
{{ define "head" }} {{ define "head" -}}
<html> <!doctype html>
<html lang="en">
<head> <head>
{{ template "prom_console_head" }} {{ template "prom_console_head" }}
</head> </head>
<body> <body>
{{ template "navbar" . }} {{ template "navbar" . }}
{{ template "menu" . }} {{ template "menu" . }}
{{ end }} {{ end }}
@ -32,6 +36,7 @@ var PATH_PREFIX = "{{ pathPrefix }}";
{{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }} {{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }}
{{ define "humanize1024" }}{{ humanize1024 . }}{{ end }} {{ define "humanize1024" }}{{ humanize1024 . }}{{ end }}
{{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }} {{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }}
{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }}
{{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }} {{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }}
{{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }} {{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }}
{{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }} {{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }}
@ -50,7 +55,7 @@ renderTemplate is the name of the template to use to render the value.
{{ define "prom_right_table_head" }} {{ define "prom_right_table_head" }}
<div class="prom_console_rhs"> <div class="prom_console_rhs">
<table class="table table-bordered table-hover table-condensed"> <table class="table table-bordered table-hover table-sm">
{{ end }} {{ end }}
{{ define "prom_right_table_tail" }} {{ define "prom_right_table_tail" }}
</table> </table>
@ -76,7 +81,7 @@ renderTemplate is the name of the template to use to render the value.
{{ define "prom_content_head" }} {{ define "prom_content_head" }}
<div class="prom_console_content"> <div class="prom_console_content">
<div class="container"> <div class="container-fluid">
{{ template "prom_graph_timecontrol" . }} {{ template "prom_graph_timecontrol" . }}
{{ end }} {{ end }}
{{ define "prom_content_tail" }} {{ define "prom_content_tail" }}
@ -87,34 +92,32 @@ renderTemplate is the name of the template to use to render the value.
{{ define "prom_graph_timecontrol" }} {{ define "prom_graph_timecontrol" }}
<div class="prom_graph_timecontrol"> <div class="prom_graph_timecontrol">
<div class="prom_graph_timecontrol_inner"> <div class="prom_graph_timecontrol_inner">
<div class="prom_graph_timecontrol_group"> <div class="prom_graph_timecontrol_group ">
<button class="btn btn-default pull-left" type="button" id="prom_graph_duration_shrink" title="Shrink the time range."> <button class="btn btn-light pull-left" type="button" id="prom_graph_duration_shrink" title="Shrink the time range.">
<i class="glyphicon glyphicon-minus"></i> <i class="glyphicon glyphicon-minus"></i>
</button> </button><!-- Comments between elements to remove spaces
<input class="input pull-left" size="3" title="Time range of graph" type="text" id="prom_graph_duration"> --><input class="input pull-left align-middle" size="3" title="Time range of graph" type="text" id="prom_graph_duration"><!--
<button class="btn btn-default pull-left" type="button" id="prom_graph_duration_grow" title="Grow the time range."> --><button class="btn btn-light pull-left" type="button" id="prom_graph_duration_grow" title="Grow the time range.">
<i class="glyphicon glyphicon-plus"></i> <i class="glyphicon glyphicon-plus"></i>
</button> </button>
</div> </div>
<div class="prom_graph_timecontrol_group ">
<div class="prom_graph_timecontrol_group"> <button class="btn btn-light pull-left" type="button" id="prom_graph_time_back" title="Rewind the end time.">
<button class="btn btn-default pull-left" type="button" id="prom_graph_time_back" title="Rewind the end time.">
<i class="glyphicon glyphicon-backward"></i> <i class="glyphicon glyphicon-backward"></i>
</button> </button><!--
<input class="input pull-left" title="End time of graph" placeholder="Until" type="text" id="prom_graph_time_end" size="16" value=""> --><input class="input pull-left align-middle" title="End time of graph" placeholder="Until" type="text" id="prom_graph_time_end" size="16" value=""><!--
<button class="btn btn-default pull-left" type="button" id="prom_graph_time_forward" title="Advance the end time."> --><button class="btn btn-light pull-left" type="button" id="prom_graph_time_forward" title="Advance the end time.">
<i class="glyphicon glyphicon-forward"></i> <i class="glyphicon glyphicon-forward"></i>
</button> </button>
</div> </div>
<div class="prom_graph_timecontrol_group ">
<div class="prom_graph_timecontrol_group">
<div class="btn-group dropup prom_graph_timecontrol_refresh pull-left"> <div class="btn-group dropup prom_graph_timecontrol_refresh pull-left">
<button type="button" class="btn btn-default pull-left" id="prom_graph_refresh_button" title="Refresh."> <button type="button" class="btn btn-light pull-left" id="prom_graph_refresh_button" title="Refresh.">
<i class="glyphicon glyphicon-repeat"></i> <i class="glyphicon glyphicon-repeat"></i>
<span class="icon-repeat"></span> <span class="icon-repeat"></span>
(<span id="prom_graph_refresh_button_value">Off</span>) (<span id="prom_graph_refresh_button_value">Off</span>)
</button> </button>
<button type="button" class="btn btn-default pull-left dropdown-toggle" data-toggle="dropdown" title="Set autorefresh." aria-expanded="false"> <button type="button" class="btn btn-light pull-left dropdown-toggle" data-toggle="dropdown" title="Set autorefresh."aria-haspopup="true" aria-expanded="false">
<span class="caret"></span>&nbsp; <span class="caret"></span>&nbsp;
</button> </button>
<ul class="dropdown-menu" id="prom_graph_refresh_intervals" role="menu"> <ul class="dropdown-menu" id="prom_graph_refresh_intervals" role="menu">

View file

@ -8,7 +8,7 @@
<p>These are example consoles for Prometheus.</p> <p>These are example consoles for Prometheus.</p>
<p>These consoles expect exporters to have the following job labels:</p> <p>These consoles expect exporters to have the following job labels:</p>
<table class="table table-condensed table-striped table-bordered" style="width: 0%"> <table class="table table-sm table-striped table-bordered" style="width: 0%">
<tr> <tr>
<th>Exporter</th> <th>Exporter</th>
<th>Job label</th> <th>Job label</th>

View file

@ -60,6 +60,7 @@
{{ template "prom_right_table_tail" }} {{ template "prom_right_table_tail" }}
{{ template "prom_content_head" . }} {{ template "prom_content_head" . }}
<div class="prom_content_div">
<h1>Prometheus Overview - {{ .Params.instance }}</h1> <h1>Prometheus Overview - {{ .Params.instance }}</h1>
<h3>Ingested Samples</h3> <h3>Ingested Samples</h3>
@ -89,7 +90,7 @@
yUnits: "/s", yUnits: "/s",
}) })
</script> </script>
</div>
{{ template "prom_content_tail" . }} {{ template "prom_content_tail" . }}
{{ template "tail" }} {{ template "tail" }}

View file

@ -10,7 +10,7 @@
{{ template "prom_content_head" . }} {{ template "prom_content_head" . }}
<h1>Prometheus</h1> <h1>Prometheus</h1>
<table class="table table-condensed table-striped table-bordered" style="width: 0%"> <table class="table table-sm table-striped table-bordered" style="width: 0%">
<tr> <tr>
<th>Prometheus</th> <th>Prometheus</th>
<th>Up</th> <th>Up</th>
@ -27,6 +27,7 @@
{{ else }} {{ else }}
<tr><td colspan=4>No devices found.</td></tr> <tr><td colspan=4>No devices found.</td></tr>
{{ end }} {{ end }}
</table>
{{ template "prom_content_tail" . }} {{ template "prom_content_tail" . }}

View file

@ -1,4 +1,4 @@
### Service Discovery # Service Discovery
This directory contains the service discovery (SD) component of Prometheus. This directory contains the service discovery (SD) component of Prometheus.
@ -15,7 +15,7 @@ what makes a good SD and covers some of the common implementation issues.
The first question to be asked is does it make sense to add this particular The first question to be asked is does it make sense to add this particular
SD? An SD mechanism should be reasonably well established, and at a minimum in SD? An SD mechanism should be reasonably well established, and at a minimum in
use across multiple organisations. It should allow discovering of machines use across multiple organizations. It should allow discovering of machines
and/or services running somewhere. When exactly an SD is popular enough to and/or services running somewhere. When exactly an SD is popular enough to
justify being added to Prometheus natively is an open question. justify being added to Prometheus natively is an open question.
@ -59,7 +59,7 @@ label with the host:port of the target (preferably an IP address to avoid DNS
lookups). No other labelnames should be exposed. lookups). No other labelnames should be exposed.
It is very common for initial pull requests for new SDs to include hardcoded It is very common for initial pull requests for new SDs to include hardcoded
assumptions that make sense for the the author's setup. SD should be generic, assumptions that make sense for the author's setup. SD should be generic,
any customisation should be handled via relabelling. There should be basically any customisation should be handled via relabelling. There should be basically
no business logic, filtering, or transformations of the data from the SD beyond no business logic, filtering, or transformations of the data from the SD beyond
that which is needed to fit it into the metadata data model. that which is needed to fit it into the metadata data model.
@ -131,23 +131,23 @@ the Prometheus server will be able to see them.
### The SD interface ### The SD interface
A Service Discovery (SD) mechanism has to discover targets and provide them to Prometheus. We expect similar targets to be grouped together, in the form of a [`TargetGroup`](https://godoc.org/github.com/prometheus/prometheus/config#TargetGroup). The SD mechanism sends the targets down to prometheus as list of `TargetGroups`. A Service Discovery (SD) mechanism has to discover targets and provide them to Prometheus. We expect similar targets to be grouped together, in the form of a [target group](https://godoc.org/github.com/prometheus/prometheus/discovery/targetgroup#Group). The SD mechanism sends the targets down to prometheus as list of target groups.
An SD mechanism has to implement the `Discoverer` Interface: An SD mechanism has to implement the `Discoverer` Interface:
```go ```go
type Discoverer interface { type Discoverer interface {
Run(ctx context.Context, up chan<- []*config.TargetGroup) Run(ctx context.Context, up chan<- []*targetgroup.Group)
} }
``` ```
Prometheus will call the `Run()` method on a provider to initialise the discovery mechanism. The mechanism will then send *all* the `TargetGroup`s into the channel. Prometheus will call the `Run()` method on a provider to initialize the discovery mechanism. The mechanism will then send *all* the target groups into the channel.
Now the mechanism will watch for changes. For each update it can send all `TargetGroup`s, or only changed and new `TargetGroup`s, down the channel. `Manager` will handle Now the mechanism will watch for changes. For each update it can send all target groups, or only changed and new target groups, down the channel. `Manager` will handle
both cases. both cases.
For example if we had a discovery mechanism and it retrieves the following groups: For example if we had a discovery mechanism and it retrieves the following groups:
``` ```
[]config.TargetGroup{ []targetgroup.Group{
{ {
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
@ -187,11 +187,11 @@ For example if we had a discovery mechanism and it retrieves the following group
} }
``` ```
Here there are two `TargetGroups` one group with source `file1` and another with `file2`. The grouping is implementation specific and could even be one target per group. But, one has to make sure every target group sent by an SD instance should have a `Source` which is unique across all the `TargetGroup`s of that SD instance. Here there are two target groups one group with source `file1` and another with `file2`. The grouping is implementation specific and could even be one target per group. But, one has to make sure every target group sent by an SD instance should have a `Source` which is unique across all the target groups of that SD instance.
In this case, both the `TargetGroup`s are sent down the channel the first time `Run()` is called. Now, for an update, we need to send the whole _changed_ `TargetGroup` down the channel. i.e, if the target with `hostname: demo-postgres-2` goes away, we send: In this case, both the target groups are sent down the channel the first time `Run()` is called. Now, for an update, we need to send the whole _changed_ target group down the channel. i.e, if the target with `hostname: demo-postgres-2` goes away, we send:
``` ```
&config.TargetGroup{ &targetgroup.Group{
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
"__instance__": "10.11.122.11:6001", "__instance__": "10.11.122.11:6001",
@ -209,7 +209,7 @@ down the channel.
If all the targets in a group go away, we need to send the target groups with empty `Targets` down the channel. i.e, if all targets with `job: postgres` go away, we send: If all the targets in a group go away, we need to send the target groups with empty `Targets` down the channel. i.e, if all targets with `job: postgres` go away, we send:
``` ```
&config.TargetGroup{ &targetgroup.Group{
Targets: nil, Targets: nil,
"Source": "file2", "Source": "file2",
} }

View file

@ -19,55 +19,50 @@ import (
"net" "net"
"net/http" "net/http"
"strings" "strings"
"sync"
"time" "time"
"github.com/Azure/azure-sdk-for-go/arm/compute" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/arm/network" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-10-01/network"
"github.com/Azure/go-autorest/autorest" "github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/adal" "github.com/Azure/go-autorest/autorest/adal"
"github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/azure"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus" "github.com/pkg/errors"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/strutil"
) )
const ( const (
azureLabel = model.MetaLabelPrefix + "azure_" azureLabel = model.MetaLabelPrefix + "azure_"
azureLabelSubscriptionID = azureLabel + "subscription_id"
azureLabelTenantID = azureLabel + "tenant_id"
azureLabelMachineID = azureLabel + "machine_id" azureLabelMachineID = azureLabel + "machine_id"
azureLabelMachineResourceGroup = azureLabel + "machine_resource_group" azureLabelMachineResourceGroup = azureLabel + "machine_resource_group"
azureLabelMachineName = azureLabel + "machine_name" azureLabelMachineName = azureLabel + "machine_name"
azureLabelMachineOSType = azureLabel + "machine_os_type" azureLabelMachineOSType = azureLabel + "machine_os_type"
azureLabelMachineLocation = azureLabel + "machine_location" azureLabelMachineLocation = azureLabel + "machine_location"
azureLabelMachinePrivateIP = azureLabel + "machine_private_ip" azureLabelMachinePrivateIP = azureLabel + "machine_private_ip"
azureLabelMachinePublicIP = azureLabel + "machine_public_ip"
azureLabelMachineTag = azureLabel + "machine_tag_" azureLabelMachineTag = azureLabel + "machine_tag_"
azureLabelMachineScaleSet = azureLabel + "machine_scale_set" azureLabelMachineScaleSet = azureLabel + "machine_scale_set"
authMethodOAuth = "OAuth"
authMethodManagedIdentity = "ManagedIdentity"
) )
var ( // DefaultSDConfig is the default Azure SD configuration.
azureSDRefreshFailuresCount = prometheus.NewCounter( var DefaultSDConfig = SDConfig{
prometheus.CounterOpts{
Name: "prometheus_sd_azure_refresh_failures_total",
Help: "Number of Azure-SD refresh failures.",
})
azureSDRefreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_azure_refresh_duration_seconds",
Help: "The duration of a Azure-SD refresh in seconds.",
})
// DefaultSDConfig is the default Azure SD configuration.
DefaultSDConfig = SDConfig{
Port: 80, Port: 80,
RefreshInterval: model.Duration(5 * time.Minute), RefreshInterval: model.Duration(5 * time.Minute),
Environment: azure.PublicCloud.Name, Environment: azure.PublicCloud.Name,
} AuthenticationMethod: authMethodOAuth,
) }
// SDConfig is the configuration for Azure based service discovery. // SDConfig is the configuration for Azure based service discovery.
type SDConfig struct { type SDConfig struct {
@ -78,6 +73,14 @@ type SDConfig struct {
ClientID string `yaml:"client_id,omitempty"` ClientID string `yaml:"client_id,omitempty"`
ClientSecret config_util.Secret `yaml:"client_secret,omitempty"` ClientSecret config_util.Secret `yaml:"client_secret,omitempty"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
AuthenticationMethod string `yaml:"authentication_method,omitempty"`
}
func validateAuthParam(param, name string) error {
if len(param) == 0 {
return errors.Errorf("azure SD configuration requires a %s", name)
}
return nil
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // UnmarshalYAML implements the yaml.Unmarshaler interface.
@ -88,24 +91,35 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err != nil { if err != nil {
return err return err
} }
if c.SubscriptionID == "" {
return fmt.Errorf("Azure SD configuration requires a subscription_id") if err = validateAuthParam(c.SubscriptionID, "subscription_id"); err != nil {
return err
} }
if c.AuthenticationMethod == authMethodOAuth {
if err = validateAuthParam(c.TenantID, "tenant_id"); err != nil {
return err
}
if err = validateAuthParam(c.ClientID, "client_id"); err != nil {
return err
}
if err = validateAuthParam(string(c.ClientSecret), "client_secret"); err != nil {
return err
}
}
if c.AuthenticationMethod != authMethodOAuth && c.AuthenticationMethod != authMethodManagedIdentity {
return errors.Errorf("unknown authentication_type %q. Supported types are %q or %q", c.AuthenticationMethod, authMethodOAuth, authMethodManagedIdentity)
}
return nil return nil
} }
func init() {
prometheus.MustRegister(azureSDRefreshDuration)
prometheus.MustRegister(azureSDRefreshFailuresCount)
}
// Discovery periodically performs Azure-SD requests. It implements
// the Discoverer interface.
type Discovery struct { type Discovery struct {
cfg *SDConfig *refresh.Discovery
interval time.Duration
port int
logger log.Logger logger log.Logger
cfg *SDConfig
port int
} }
// NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets. // NewDiscovery returns a new AzureDiscovery which periodically refreshes its targets.
@ -113,42 +127,18 @@ func NewDiscovery(cfg *SDConfig, logger log.Logger) *Discovery {
if logger == nil { if logger == nil {
logger = log.NewNopLogger() logger = log.NewNopLogger()
} }
return &Discovery{ d := &Discovery{
cfg: cfg, cfg: cfg,
interval: time.Duration(cfg.RefreshInterval),
port: cfg.Port, port: cfg.Port,
logger: logger, logger: logger,
} }
} d.Discovery = refresh.NewDiscovery(
logger,
// Run implements the Discoverer interface. "azure",
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { time.Duration(cfg.RefreshInterval),
ticker := time.NewTicker(d.interval) d.refresh,
defer ticker.Stop() )
return d
for {
select {
case <-ctx.Done():
return
default:
}
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Unable to refresh during Azure discovery", "err", err)
} else {
select {
case <-ctx.Done():
case ch <- []*targetgroup.Group{tg}:
}
}
select {
case <-ticker.C:
case <-ctx.Done():
return
}
}
} }
// azureClient represents multiple Azure Resource Manager providers. // azureClient represents multiple Azure Resource Manager providers.
@ -170,14 +160,31 @@ func createAzureClient(cfg SDConfig) (azureClient, error) {
resourceManagerEndpoint := env.ResourceManagerEndpoint resourceManagerEndpoint := env.ResourceManagerEndpoint
var c azureClient var c azureClient
var spt *adal.ServicePrincipalToken
switch cfg.AuthenticationMethod {
case authMethodManagedIdentity:
msiEndpoint, err := adal.GetMSIVMEndpoint()
if err != nil {
return azureClient{}, err
}
spt, err = adal.NewServicePrincipalTokenFromMSI(msiEndpoint, resourceManagerEndpoint)
if err != nil {
return azureClient{}, err
}
case authMethodOAuth:
oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, cfg.TenantID) oauthConfig, err := adal.NewOAuthConfig(activeDirectoryEndpoint, cfg.TenantID)
if err != nil { if err != nil {
return azureClient{}, err return azureClient{}, err
} }
spt, err := adal.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), resourceManagerEndpoint)
spt, err = adal.NewServicePrincipalToken(*oauthConfig, cfg.ClientID, string(cfg.ClientSecret), resourceManagerEndpoint)
if err != nil { if err != nil {
return azureClient{}, err return azureClient{}, err
} }
}
bearerAuthorizer := autorest.NewBearerAuthorizer(spt) bearerAuthorizer := autorest.NewBearerAuthorizer(spt)
@ -211,18 +218,18 @@ type virtualMachine struct {
OsType string OsType string
ScaleSet string ScaleSet string
Tags map[string]*string Tags map[string]*string
NetworkProfile compute.NetworkProfile NetworkInterfaces []string
} }
// Create a new azureResource object from an ID string. // Create a new azureResource object from an ID string.
func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error) { func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error) {
// Resource IDs have the following format. // Resource IDs have the following format.
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME
// or if embeded resource then // or if embedded resource then
// /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME/TYPE/NAME // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME/TYPE/NAME
s := strings.Split(id, "/") s := strings.Split(id, "/")
if len(s) != 9 && len(s) != 11 { if len(s) != 9 && len(s) != 11 {
err := fmt.Errorf("invalid ID '%s'. Refusing to create azureResource", id) err := errors.Errorf("invalid ID '%s'. Refusing to create azureResource", id)
level.Error(logger).Log("err", err) level.Error(logger).Log("err", err)
return azureResource{}, err return azureResource{}, err
} }
@ -233,39 +240,31 @@ func newAzureResourceFromID(id string, logger log.Logger) (azureResource, error)
}, nil }, nil
} }
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) { func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
defer level.Debug(d.logger).Log("msg", "Azure discovery completed") defer level.Debug(d.logger).Log("msg", "Azure discovery completed")
t0 := time.Now()
defer func() {
azureSDRefreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
azureSDRefreshFailuresCount.Inc()
}
}()
tg = &targetgroup.Group{}
client, err := createAzureClient(*d.cfg) client, err := createAzureClient(*d.cfg)
if err != nil { if err != nil {
return tg, fmt.Errorf("could not create Azure client: %s", err) return nil, errors.Wrap(err, "could not create Azure client")
} }
machines, err := client.getVMs() machines, err := client.getVMs(ctx)
if err != nil { if err != nil {
return tg, fmt.Errorf("could not get virtual machines: %s", err) return nil, errors.Wrap(err, "could not get virtual machines")
} }
level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines)) level.Debug(d.logger).Log("msg", "Found virtual machines during Azure discovery.", "count", len(machines))
// Load the vms managed by scale sets. // Load the vms managed by scale sets.
scaleSets, err := client.getScaleSets() scaleSets, err := client.getScaleSets(ctx)
if err != nil { if err != nil {
return tg, fmt.Errorf("could not get virtual machine scale sets: %s", err) return nil, errors.Wrap(err, "could not get virtual machine scale sets")
} }
for _, scaleSet := range scaleSets { for _, scaleSet := range scaleSets {
scaleSetVms, err := client.getScaleSetVMs(scaleSet) scaleSetVms, err := client.getScaleSetVMs(ctx, scaleSet)
if err != nil { if err != nil {
return tg, fmt.Errorf("could not get virtual machine scale set vms: %s", err) return nil, errors.Wrap(err, "could not get virtual machine scale set vms")
} }
machines = append(machines, scaleSetVms...) machines = append(machines, scaleSetVms...)
} }
@ -277,9 +276,12 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
err error err error
} }
var wg sync.WaitGroup
wg.Add(len(machines))
ch := make(chan target, len(machines)) ch := make(chan target, len(machines))
for i, vm := range machines { for i, vm := range machines {
go func(i int, vm virtualMachine) { go func(i int, vm virtualMachine) {
defer wg.Done()
r, err := newAzureResourceFromID(vm.ID, d.logger) r, err := newAzureResourceFromID(vm.ID, d.logger)
if err != nil { if err != nil {
ch <- target{labelSet: nil, err: err} ch <- target{labelSet: nil, err: err}
@ -287,6 +289,8 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
} }
labels := model.LabelSet{ labels := model.LabelSet{
azureLabelSubscriptionID: model.LabelValue(d.cfg.SubscriptionID),
azureLabelTenantID: model.LabelValue(d.cfg.TenantID),
azureLabelMachineID: model.LabelValue(vm.ID), azureLabelMachineID: model.LabelValue(vm.ID),
azureLabelMachineName: model.LabelValue(vm.Name), azureLabelMachineName: model.LabelValue(vm.Name),
azureLabelMachineOSType: model.LabelValue(vm.OsType), azureLabelMachineOSType: model.LabelValue(vm.OsType),
@ -306,37 +310,44 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
} }
// Get the IP address information via separate call to the network provider. // Get the IP address information via separate call to the network provider.
for _, nic := range *vm.NetworkProfile.NetworkInterfaces { for _, nicID := range vm.NetworkInterfaces {
networkInterface, err := client.getNetworkInterfaceByID(*nic.ID) networkInterface, err := client.getNetworkInterfaceByID(ctx, nicID)
if err != nil { if err != nil {
level.Error(d.logger).Log("msg", "Unable to get network interface", "name", *nic.ID, "err", err) level.Error(d.logger).Log("msg", "Unable to get network interface", "name", nicID, "err", err)
ch <- target{labelSet: nil, err: err} ch <- target{labelSet: nil, err: err}
// Get out of this routine because we cannot continue without a network interface. // Get out of this routine because we cannot continue without a network interface.
return return
} }
if networkInterface.InterfacePropertiesFormat == nil {
continue
}
// Unfortunately Azure does not return information on whether a VM is deallocated. // Unfortunately Azure does not return information on whether a VM is deallocated.
// This information is available via another API call however the Go SDK does not // This information is available via another API call however the Go SDK does not
// yet support this. On deallocated machines, this value happens to be nil so it // yet support this. On deallocated machines, this value happens to be nil so it
// is a cheap and easy way to determine if a machine is allocated or not. // is a cheap and easy way to determine if a machine is allocated or not.
if networkInterface.Properties.Primary == nil { if networkInterface.Primary == nil {
level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name) level.Debug(d.logger).Log("msg", "Skipping deallocated virtual machine", "machine", vm.Name)
ch <- target{}
return return
} }
if *networkInterface.Properties.Primary { if *networkInterface.Primary {
for _, ip := range *networkInterface.Properties.IPConfigurations { for _, ip := range *networkInterface.IPConfigurations {
if ip.Properties.PrivateIPAddress != nil { if ip.PublicIPAddress != nil && ip.PublicIPAddress.PublicIPAddressPropertiesFormat != nil {
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) labels[azureLabelMachinePublicIP] = model.LabelValue(*ip.PublicIPAddress.IPAddress)
address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) }
if ip.PrivateIPAddress != nil {
labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.PrivateIPAddress)
address := net.JoinHostPort(*ip.PrivateIPAddress, fmt.Sprintf("%d", d.port))
labels[model.AddressLabel] = model.LabelValue(address) labels[model.AddressLabel] = model.LabelValue(address)
ch <- target{labelSet: labels, err: nil} ch <- target{labelSet: labels, err: nil}
return return
} }
// If we made it here, we don't have a private IP which should be impossible. // If we made it here, we don't have a private IP which should be impossible.
// Return an empty target and error to ensure an all or nothing situation. // Return an empty target and error to ensure an all or nothing situation.
err = fmt.Errorf("unable to find a private IP for VM %s", vm.Name) err = errors.Errorf("unable to find a private IP for VM %s", vm.Name)
ch <- target{labelSet: nil, err: err} ch <- target{labelSet: nil, err: err}
return return
} }
@ -345,90 +356,78 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
}(i, vm) }(i, vm)
} }
for range machines { wg.Wait()
tgt := <-ch close(ch)
var tg targetgroup.Group
for tgt := range ch {
if tgt.err != nil { if tgt.err != nil {
return nil, fmt.Errorf("unable to complete Azure service discovery: %s", err) return nil, errors.Wrap(err, "unable to complete Azure service discovery")
} }
if tgt.labelSet != nil { if tgt.labelSet != nil {
tg.Targets = append(tg.Targets, tgt.labelSet) tg.Targets = append(tg.Targets, tgt.labelSet)
} }
} }
return tg, nil return []*targetgroup.Group{&tg}, nil
} }
func (client *azureClient) getVMs() ([]virtualMachine, error) { func (client *azureClient) getVMs(ctx context.Context) ([]virtualMachine, error) {
var vms []virtualMachine var vms []virtualMachine
result, err := client.vm.ListAll() result, err := client.vm.ListAll(ctx)
if err != nil { if err != nil {
return vms, fmt.Errorf("could not list virtual machines: %s", err) return nil, errors.Wrap(err, "could not list virtual machines")
} }
for result.NotDone() {
for _, vm := range *result.Value { for _, vm := range result.Values() {
vms = append(vms, mapFromVM(vm)) vms = append(vms, mapFromVM(vm))
} }
err = result.NextWithContext(ctx)
// If we still have results, keep going until we have no more.
for result.NextLink != nil {
result, err = client.vm.ListAllNextResults(result)
if err != nil { if err != nil {
return vms, fmt.Errorf("could not list virtual machines: %s", err) return nil, errors.Wrap(err, "could not list virtual machines")
}
for _, vm := range *result.Value {
vms = append(vms, mapFromVM(vm))
} }
} }
return vms, nil return vms, nil
} }
func (client *azureClient) getScaleSets() ([]compute.VirtualMachineScaleSet, error) { func (client *azureClient) getScaleSets(ctx context.Context) ([]compute.VirtualMachineScaleSet, error) {
var scaleSets []compute.VirtualMachineScaleSet var scaleSets []compute.VirtualMachineScaleSet
result, err := client.vmss.ListAll() result, err := client.vmss.ListAll(ctx)
if err != nil { if err != nil {
return scaleSets, fmt.Errorf("could not list virtual machine scale sets: %s", err) return nil, errors.Wrap(err, "could not list virtual machine scale sets")
} }
scaleSets = append(scaleSets, *result.Value...) for result.NotDone() {
scaleSets = append(scaleSets, result.Values()...)
for result.NextLink != nil { err = result.NextWithContext(ctx)
result, err = client.vmss.ListAllNextResults(result)
if err != nil { if err != nil {
return scaleSets, fmt.Errorf("could not list virtual machine scale sets: %s", err) return nil, errors.Wrap(err, "could not list virtual machine scale sets")
} }
scaleSets = append(scaleSets, *result.Value...)
} }
return scaleSets, nil return scaleSets, nil
} }
func (client *azureClient) getScaleSetVMs(scaleSet compute.VirtualMachineScaleSet) ([]virtualMachine, error) { func (client *azureClient) getScaleSetVMs(ctx context.Context, scaleSet compute.VirtualMachineScaleSet) ([]virtualMachine, error) {
var vms []virtualMachine var vms []virtualMachine
//TODO do we really need to fetch the resourcegroup this way? //TODO do we really need to fetch the resourcegroup this way?
r, err := newAzureResourceFromID(*scaleSet.ID, nil) r, err := newAzureResourceFromID(*scaleSet.ID, nil)
if err != nil { if err != nil {
return vms, fmt.Errorf("could not parse scale set ID: %s", err) return nil, errors.Wrap(err, "could not parse scale set ID")
} }
result, err := client.vmssvm.List(r.ResourceGroup, *(scaleSet.Name), "", "", "") result, err := client.vmssvm.List(ctx, r.ResourceGroup, *(scaleSet.Name), "", "", "")
if err != nil { if err != nil {
return vms, fmt.Errorf("could not list virtual machine scale set vms: %s", err) return nil, errors.Wrap(err, "could not list virtual machine scale set vms")
} }
for result.NotDone() {
for _, vm := range *result.Value { for _, vm := range result.Values() {
vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name)) vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name))
} }
err = result.NextWithContext(ctx)
for result.NextLink != nil {
result, err = client.vmssvm.ListNextResults(result)
if err != nil { if err != nil {
return vms, fmt.Errorf("could not list virtual machine scale set vms: %s", err) return nil, errors.Wrap(err, "could not list virtual machine scale set vms")
}
for _, vm := range *result.Value {
vms = append(vms, mapFromVMScaleSetVM(vm, *scaleSet.Name))
} }
} }
@ -436,11 +435,18 @@ func (client *azureClient) getScaleSetVMs(scaleSet compute.VirtualMachineScaleSe
} }
func mapFromVM(vm compute.VirtualMachine) virtualMachine { func mapFromVM(vm compute.VirtualMachine) virtualMachine {
osType := string(vm.Properties.StorageProfile.OsDisk.OsType) osType := string(vm.StorageProfile.OsDisk.OsType)
tags := map[string]*string{} tags := map[string]*string{}
networkInterfaces := []string{}
if vm.Tags != nil { if vm.Tags != nil {
tags = *(vm.Tags) tags = vm.Tags
}
if vm.NetworkProfile != nil {
for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) {
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
}
} }
return virtualMachine{ return virtualMachine{
@ -451,16 +457,23 @@ func mapFromVM(vm compute.VirtualMachine) virtualMachine {
OsType: osType, OsType: osType,
ScaleSet: "", ScaleSet: "",
Tags: tags, Tags: tags,
NetworkProfile: *(vm.Properties.NetworkProfile), NetworkInterfaces: networkInterfaces,
} }
} }
func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName string) virtualMachine { func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName string) virtualMachine {
osType := string(vm.Properties.StorageProfile.OsDisk.OsType) osType := string(vm.StorageProfile.OsDisk.OsType)
tags := map[string]*string{} tags := map[string]*string{}
networkInterfaces := []string{}
if vm.Tags != nil { if vm.Tags != nil {
tags = *(vm.Tags) tags = vm.Tags
}
if vm.NetworkProfile != nil {
for _, vmNIC := range *(vm.NetworkProfile.NetworkInterfaces) {
networkInterfaces = append(networkInterfaces, *vmNIC.ID)
}
} }
return virtualMachine{ return virtualMachine{
@ -471,14 +484,14 @@ func mapFromVMScaleSetVM(vm compute.VirtualMachineScaleSetVM, scaleSetName strin
OsType: osType, OsType: osType,
ScaleSet: scaleSetName, ScaleSet: scaleSetName,
Tags: tags, Tags: tags,
NetworkProfile: *(vm.Properties.NetworkProfile), NetworkInterfaces: networkInterfaces,
} }
} }
func (client *azureClient) getNetworkInterfaceByID(networkInterfaceID string) (network.Interface, error) { func (client *azureClient) getNetworkInterfaceByID(ctx context.Context, networkInterfaceID string) (*network.Interface, error) {
result := network.Interface{} result := network.Interface{}
queryParameters := map[string]interface{}{ queryParameters := map[string]interface{}{
"api-version": client.nic.APIVersion, "api-version": "2018-10-01",
} }
preparer := autorest.CreatePreparer( preparer := autorest.CreatePreparer(
@ -486,21 +499,20 @@ func (client *azureClient) getNetworkInterfaceByID(networkInterfaceID string) (n
autorest.WithBaseURL(client.nic.BaseURI), autorest.WithBaseURL(client.nic.BaseURI),
autorest.WithPath(networkInterfaceID), autorest.WithPath(networkInterfaceID),
autorest.WithQueryParameters(queryParameters)) autorest.WithQueryParameters(queryParameters))
req, err := preparer.Prepare(&http.Request{}) req, err := preparer.Prepare((&http.Request{}).WithContext(ctx))
if err != nil { if err != nil {
return result, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request") return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", nil, "Failure preparing request")
} }
resp, err := client.nic.GetSender(req) resp, err := client.nic.GetSender(req)
if err != nil { if err != nil {
result.Response = autorest.Response{Response: resp} return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure sending request")
return result, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure sending request")
} }
result, err = client.nic.GetResponder(resp) result, err = client.nic.GetResponder(resp)
if err != nil { if err != nil {
err = autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure responding to request") return nil, autorest.NewErrorWithError(err, "network.InterfacesClient", "Get", resp, "Failure responding to request")
} }
return result, nil return &result, nil
} }

View file

@ -17,7 +17,7 @@ import (
"reflect" "reflect"
"testing" "testing"
"github.com/Azure/azure-sdk-for-go/arm/compute" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
) )
func TestMapFromVMWithEmptyTags(t *testing.T) { func TestMapFromVMWithEmptyTags(t *testing.T) {
@ -25,7 +25,9 @@ func TestMapFromVMWithEmptyTags(t *testing.T) {
name := "name" name := "name"
vmType := "type" vmType := "type"
location := "westeurope" location := "westeurope"
networkProfile := compute.NetworkProfile{} networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineProperties{ properties := &compute.VirtualMachineProperties{
StorageProfile: &compute.StorageProfile{ StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{ OsDisk: &compute.OSDisk{
@ -41,7 +43,7 @@ func TestMapFromVMWithEmptyTags(t *testing.T) {
Type: &vmType, Type: &vmType,
Location: &location, Location: &location,
Tags: nil, Tags: nil,
Properties: properties, VirtualMachineProperties: properties,
} }
expectedVM := virtualMachine{ expectedVM := virtualMachine{
@ -51,7 +53,7 @@ func TestMapFromVMWithEmptyTags(t *testing.T) {
Location: location, Location: location,
OsType: "Linux", OsType: "Linux",
Tags: map[string]*string{}, Tags: map[string]*string{},
NetworkProfile: networkProfile, NetworkInterfaces: []string{},
} }
actualVM := mapFromVM(testVM) actualVM := mapFromVM(testVM)
@ -69,7 +71,9 @@ func TestMapFromVMWithTags(t *testing.T) {
tags := map[string]*string{ tags := map[string]*string{
"prometheus": new(string), "prometheus": new(string),
} }
networkProfile := compute.NetworkProfile{} networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineProperties{ properties := &compute.VirtualMachineProperties{
StorageProfile: &compute.StorageProfile{ StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{ OsDisk: &compute.OSDisk{
@ -84,8 +88,8 @@ func TestMapFromVMWithTags(t *testing.T) {
Name: &name, Name: &name,
Type: &vmType, Type: &vmType,
Location: &location, Location: &location,
Tags: &tags, Tags: tags,
Properties: properties, VirtualMachineProperties: properties,
} }
expectedVM := virtualMachine{ expectedVM := virtualMachine{
@ -95,7 +99,7 @@ func TestMapFromVMWithTags(t *testing.T) {
Location: location, Location: location,
OsType: "Linux", OsType: "Linux",
Tags: tags, Tags: tags,
NetworkProfile: networkProfile, NetworkInterfaces: []string{},
} }
actualVM := mapFromVM(testVM) actualVM := mapFromVM(testVM)
@ -110,7 +114,9 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
name := "name" name := "name"
vmType := "type" vmType := "type"
location := "westeurope" location := "westeurope"
networkProfile := compute.NetworkProfile{} networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineScaleSetVMProperties{ properties := &compute.VirtualMachineScaleSetVMProperties{
StorageProfile: &compute.StorageProfile{ StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{ OsDisk: &compute.OSDisk{
@ -126,7 +132,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
Type: &vmType, Type: &vmType,
Location: &location, Location: &location,
Tags: nil, Tags: nil,
Properties: properties, VirtualMachineScaleSetVMProperties: properties,
} }
scaleSet := "testSet" scaleSet := "testSet"
@ -137,7 +143,7 @@ func TestMapFromVMScaleSetVMWithEmptyTags(t *testing.T) {
Location: location, Location: location,
OsType: "Linux", OsType: "Linux",
Tags: map[string]*string{}, Tags: map[string]*string{},
NetworkProfile: networkProfile, NetworkInterfaces: []string{},
ScaleSet: scaleSet, ScaleSet: scaleSet,
} }
@ -156,7 +162,9 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
tags := map[string]*string{ tags := map[string]*string{
"prometheus": new(string), "prometheus": new(string),
} }
networkProfile := compute.NetworkProfile{} networkProfile := compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{},
}
properties := &compute.VirtualMachineScaleSetVMProperties{ properties := &compute.VirtualMachineScaleSetVMProperties{
StorageProfile: &compute.StorageProfile{ StorageProfile: &compute.StorageProfile{
OsDisk: &compute.OSDisk{ OsDisk: &compute.OSDisk{
@ -171,8 +179,8 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
Name: &name, Name: &name,
Type: &vmType, Type: &vmType,
Location: &location, Location: &location,
Tags: &tags, Tags: tags,
Properties: properties, VirtualMachineScaleSetVMProperties: properties,
} }
scaleSet := "testSet" scaleSet := "testSet"
@ -183,7 +191,7 @@ func TestMapFromVMScaleSetVMWithTags(t *testing.T) {
Location: location, Location: location,
OsType: "Linux", OsType: "Linux",
Tags: tags, Tags: tags,
NetworkProfile: networkProfile, NetworkInterfaces: []string{},
ScaleSet: scaleSet, ScaleSet: scaleSet,
} }

View file

@ -14,6 +14,8 @@
package config package config
import ( import (
"github.com/pkg/errors"
"github.com/prometheus/prometheus/discovery/azure" "github.com/prometheus/prometheus/discovery/azure"
"github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/consul"
"github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/dns"
@ -58,8 +60,67 @@ type ServiceDiscoveryConfig struct {
TritonSDConfigs []*triton.SDConfig `yaml:"triton_sd_configs,omitempty"` TritonSDConfigs []*triton.SDConfig `yaml:"triton_sd_configs,omitempty"`
} }
// UnmarshalYAML implements the yaml.Unmarshaler interface. // Validate validates the ServiceDiscoveryConfig.
func (c *ServiceDiscoveryConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { func (c *ServiceDiscoveryConfig) Validate() error {
type plain ServiceDiscoveryConfig for _, cfg := range c.AzureSDConfigs {
return unmarshal((*plain)(c)) if cfg == nil {
return errors.New("empty or null section in azure_sd_configs")
}
}
for _, cfg := range c.ConsulSDConfigs {
if cfg == nil {
return errors.New("empty or null section in consul_sd_configs")
}
}
for _, cfg := range c.DNSSDConfigs {
if cfg == nil {
return errors.New("empty or null section in dns_sd_configs")
}
}
for _, cfg := range c.EC2SDConfigs {
if cfg == nil {
return errors.New("empty or null section in ec2_sd_configs")
}
}
for _, cfg := range c.FileSDConfigs {
if cfg == nil {
return errors.New("empty or null section in file_sd_configs")
}
}
for _, cfg := range c.GCESDConfigs {
if cfg == nil {
return errors.New("empty or null section in gce_sd_configs")
}
}
for _, cfg := range c.KubernetesSDConfigs {
if cfg == nil {
return errors.New("empty or null section in kubernetes_sd_configs")
}
}
for _, cfg := range c.MarathonSDConfigs {
if cfg == nil {
return errors.New("empty or null section in marathon_sd_configs")
}
}
for _, cfg := range c.NerveSDConfigs {
if cfg == nil {
return errors.New("empty or null section in nerve_sd_configs")
}
}
for _, cfg := range c.OpenstackSDConfigs {
if cfg == nil {
return errors.New("empty or null section in openstack_sd_configs")
}
}
for _, cfg := range c.ServersetSDConfigs {
if cfg == nil {
return errors.New("empty or null section in serverset_sd_configs")
}
}
for _, cfg := range c.StaticConfigs {
if cfg == nil {
return errors.New("empty or null section in static_configs")
}
}
return nil
} }

View file

@ -25,10 +25,12 @@ import (
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
consul "github.com/hashicorp/consul/api" consul "github.com/hashicorp/consul/api"
"github.com/mwitkow/go-conntrack" conntrack "github.com/mwitkow/go-conntrack"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/strutil"
) )
@ -55,6 +57,8 @@ const (
servicePortLabel = model.MetaLabelPrefix + "consul_service_port" servicePortLabel = model.MetaLabelPrefix + "consul_service_port"
// datacenterLabel is the name of the label containing the datacenter ID. // datacenterLabel is the name of the label containing the datacenter ID.
datacenterLabel = model.MetaLabelPrefix + "consul_dc" datacenterLabel = model.MetaLabelPrefix + "consul_dc"
// taggedAddressesLabel is the prefix for the labels mapping to a target's tagged addresses.
taggedAddressesLabel = model.MetaLabelPrefix + "consul_tagged_address_"
// serviceIDLabel is the name of the label containing the service ID. // serviceIDLabel is the name of the label containing the service ID.
serviceIDLabel = model.MetaLabelPrefix + "consul_service_id" serviceIDLabel = model.MetaLabelPrefix + "consul_service_id"
@ -74,6 +78,7 @@ var (
Namespace: namespace, Namespace: namespace,
Name: "sd_consul_rpc_duration_seconds", Name: "sd_consul_rpc_duration_seconds",
Help: "The duration of a Consul RPC call in seconds.", Help: "The duration of a Consul RPC call in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}, },
[]string{"endpoint", "call"}, []string{"endpoint", "call"},
) )
@ -111,9 +116,8 @@ type SDConfig struct {
// The list of services for which targets are discovered. // The list of services for which targets are discovered.
// Defaults to all services if empty. // Defaults to all services if empty.
Services []string `yaml:"services,omitempty"` Services []string `yaml:"services,omitempty"`
// An optional tag used to filter instances inside a service. A single tag is supported // A list of tags used to filter instances inside a service. Services must contain all tags in the list.
// here to match the Consul API. ServiceTags []string `yaml:"tags,omitempty"`
ServiceTag string `yaml:"tag,omitempty"`
// Desired node metadata. // Desired node metadata.
NodeMeta map[string]string `yaml:"node_meta,omitempty"` NodeMeta map[string]string `yaml:"node_meta,omitempty"`
@ -129,7 +133,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if strings.TrimSpace(c.Server) == "" { if strings.TrimSpace(c.Server) == "" {
return fmt.Errorf("Consul SD configuration requires a server address") return errors.New("consul SD configuration requires a server address")
} }
return nil return nil
} }
@ -150,7 +154,7 @@ type Discovery struct {
clientDatacenter string clientDatacenter string
tagSeparator string tagSeparator string
watchedServices []string // Set of services which will be discovered. watchedServices []string // Set of services which will be discovered.
watchedTag string // A tag used to filter instances of a service. watchedTags []string // Tags used to filter instances of a service.
watchedNodeMeta map[string]string watchedNodeMeta map[string]string
allowStale bool allowStale bool
refreshInterval time.Duration refreshInterval time.Duration
@ -200,7 +204,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) {
client: client, client: client,
tagSeparator: conf.TagSeparator, tagSeparator: conf.TagSeparator,
watchedServices: conf.Services, watchedServices: conf.Services,
watchedTag: conf.ServiceTag, watchedTags: conf.ServiceTags,
watchedNodeMeta: conf.NodeMeta, watchedNodeMeta: conf.NodeMeta,
allowStale: conf.AllowStale, allowStale: conf.AllowStale,
refreshInterval: time.Duration(conf.RefreshInterval), refreshInterval: time.Duration(conf.RefreshInterval),
@ -236,16 +240,20 @@ func (d *Discovery) shouldWatchFromName(name string) bool {
// *all* services. Details in https://github.com/prometheus/prometheus/pull/3814 // *all* services. Details in https://github.com/prometheus/prometheus/pull/3814
func (d *Discovery) shouldWatchFromTags(tags []string) bool { func (d *Discovery) shouldWatchFromTags(tags []string) bool {
// If there's no fixed set of watched tags, we watch everything. // If there's no fixed set of watched tags, we watch everything.
if d.watchedTag == "" { if len(d.watchedTags) == 0 {
return true return true
} }
tagOuter:
for _, wtag := range d.watchedTags {
for _, tag := range tags { for _, tag := range tags {
if d.watchedTag == tag { if wtag == tag {
return true continue tagOuter
} }
} }
return false return false
}
return true
} }
// Get the local datacenter if not specified. // Get the local datacenter if not specified.
@ -265,7 +273,7 @@ func (d *Discovery) getDatacenter() error {
dc, ok := info["Config"]["Datacenter"].(string) dc, ok := info["Config"]["Datacenter"].(string)
if !ok { if !ok {
err := fmt.Errorf("Invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"]) err := errors.Errorf("invalid value '%v' for Config.Datacenter", info["Config"]["Datacenter"])
level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err)
return err return err
} }
@ -304,7 +312,7 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
} }
d.initialize(ctx) d.initialize(ctx)
if len(d.watchedServices) == 0 || d.watchedTag != "" { if len(d.watchedServices) == 0 || len(d.watchedTags) != 0 {
// We need to watch the catalog. // We need to watch the catalog.
ticker := time.NewTicker(d.refreshInterval) ticker := time.NewTicker(d.refreshInterval)
@ -322,7 +330,6 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
<-ticker.C <-ticker.C
} }
} }
} else { } else {
// We only have fully defined services. // We only have fully defined services.
for _, name := range d.watchedServices { for _, name := range d.watchedServices {
@ -335,17 +342,18 @@ func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// Watch the catalog for new services we would like to watch. This is called only // Watch the catalog for new services we would like to watch. This is called only
// when we don't know yet the names of the services and need to ask Consul the // when we don't know yet the names of the services and need to ask Consul the
// entire list of services. // entire list of services.
func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) error { func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.Group, lastIndex *uint64, services map[string]func()) {
catalog := d.client.Catalog() catalog := d.client.Catalog()
level.Debug(d.logger).Log("msg", "Watching services", "tag", d.watchedTag) level.Debug(d.logger).Log("msg", "Watching services", "tags", d.watchedTags)
t0 := time.Now() t0 := time.Now()
srvs, meta, err := catalog.Services(&consul.QueryOptions{ opts := &consul.QueryOptions{
WaitIndex: *lastIndex, WaitIndex: *lastIndex,
WaitTime: watchTimeout, WaitTime: watchTimeout,
AllowStale: d.allowStale, AllowStale: d.allowStale,
NodeMeta: d.watchedNodeMeta, NodeMeta: d.watchedNodeMeta,
}) }
srvs, meta, err := catalog.Services(opts.WithContext(ctx))
elapsed := time.Since(t0) elapsed := time.Since(t0)
rpcDuration.WithLabelValues("catalog", "services").Observe(elapsed.Seconds()) rpcDuration.WithLabelValues("catalog", "services").Observe(elapsed.Seconds())
@ -353,11 +361,11 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err)
rpcFailuresCount.Inc() rpcFailuresCount.Inc()
time.Sleep(retryInterval) time.Sleep(retryInterval)
return err return
} }
// If the index equals the previous one, the watch timed out with no update. // If the index equals the previous one, the watch timed out with no update.
if meta.LastIndex == *lastIndex { if meta.LastIndex == *lastIndex {
return nil return
} }
*lastIndex = meta.LastIndex *lastIndex = meta.LastIndex
@ -389,18 +397,17 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup.
// Send clearing target group. // Send clearing target group.
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return
case ch <- []*targetgroup.Group{{Source: name}}: case ch <- []*targetgroup.Group{{Source: name}}:
} }
} }
} }
return nil
} }
// consulService contains data belonging to the same service. // consulService contains data belonging to the same service.
type consulService struct { type consulService struct {
name string name string
tag string tags []string
labels model.LabelSet labels model.LabelSet
discovery *Discovery discovery *Discovery
client *consul.Client client *consul.Client
@ -414,7 +421,7 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
discovery: d, discovery: d,
client: d.client, client: d.client,
name: name, name: name,
tag: d.watchedTag, tags: d.watchedTags,
labels: model.LabelSet{ labels: model.LabelSet{
serviceLabel: model.LabelValue(name), serviceLabel: model.LabelValue(name),
datacenterLabel: model.LabelValue(d.clientDatacenter), datacenterLabel: model.LabelValue(d.clientDatacenter),
@ -434,43 +441,47 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G
return return
default: default:
srv.watch(ctx, ch, catalog, &lastIndex) srv.watch(ctx, ch, catalog, &lastIndex)
<-ticker.C select {
case <-ticker.C:
case <-ctx.Done():
}
} }
} }
}() }()
} }
// Get updates for a service. // Get updates for a service.
func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, catalog *consul.Catalog, lastIndex *uint64) error { func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Group, catalog *consul.Catalog, lastIndex *uint64) {
level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tag", srv.tag) level.Debug(srv.logger).Log("msg", "Watching service", "service", srv.name, "tags", srv.tags)
t0 := time.Now() t0 := time.Now()
nodes, meta, err := catalog.Service(srv.name, srv.tag, &consul.QueryOptions{ opts := &consul.QueryOptions{
WaitIndex: *lastIndex, WaitIndex: *lastIndex,
WaitTime: watchTimeout, WaitTime: watchTimeout,
AllowStale: srv.discovery.allowStale, AllowStale: srv.discovery.allowStale,
NodeMeta: srv.discovery.watchedNodeMeta, NodeMeta: srv.discovery.watchedNodeMeta,
}) }
nodes, meta, err := catalog.ServiceMultipleTags(srv.name, srv.tags, opts.WithContext(ctx))
elapsed := time.Since(t0) elapsed := time.Since(t0)
rpcDuration.WithLabelValues("catalog", "service").Observe(elapsed.Seconds()) rpcDuration.WithLabelValues("catalog", "service").Observe(elapsed.Seconds())
// Check the context before in order to exit early. // Check the context before in order to exit early.
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return
default: default:
// Continue. // Continue.
} }
if err != nil { if err != nil {
level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tag", srv.tag, "err", err) level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", srv.tags, "err", err)
rpcFailuresCount.Inc() rpcFailuresCount.Inc()
time.Sleep(retryInterval) time.Sleep(retryInterval)
return err return
} }
// If the index equals the previous one, the watch timed out with no update. // If the index equals the previous one, the watch timed out with no update.
if meta.LastIndex == *lastIndex { if meta.LastIndex == *lastIndex {
return nil return
} }
*lastIndex = meta.LastIndex *lastIndex = meta.LastIndex
@ -487,7 +498,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
var tags = srv.tagSeparator + strings.Join(node.ServiceTags, srv.tagSeparator) + srv.tagSeparator var tags = srv.tagSeparator + strings.Join(node.ServiceTags, srv.tagSeparator) + srv.tagSeparator
// If the service address is not empty it should be used instead of the node address // If the service address is not empty it should be used instead of the node address
// since the service may be registered remotely through a different node // since the service may be registered remotely through a different node.
var addr string var addr string
if node.ServiceAddress != "" { if node.ServiceAddress != "" {
addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort)) addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort))
@ -505,25 +516,29 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr
serviceIDLabel: model.LabelValue(node.ServiceID), serviceIDLabel: model.LabelValue(node.ServiceID),
} }
// Add all key/value pairs from the node's metadata as their own labels // Add all key/value pairs from the node's metadata as their own labels.
for k, v := range node.NodeMeta { for k, v := range node.NodeMeta {
name := strutil.SanitizeLabelName(k) name := strutil.SanitizeLabelName(k)
labels[metaDataLabel+model.LabelName(name)] = model.LabelValue(v) labels[metaDataLabel+model.LabelName(name)] = model.LabelValue(v)
} }
// Add all key/value pairs from the service's metadata as their own labels // Add all key/value pairs from the service's metadata as their own labels.
for k, v := range node.ServiceMeta { for k, v := range node.ServiceMeta {
name := strutil.SanitizeLabelName(k) name := strutil.SanitizeLabelName(k)
labels[serviceMetaDataLabel+model.LabelName(name)] = model.LabelValue(v) labels[serviceMetaDataLabel+model.LabelName(name)] = model.LabelValue(v)
} }
// Add all key/value pairs from the service's tagged addresses as their own labels.
for k, v := range node.TaggedAddresses {
name := strutil.SanitizeLabelName(k)
labels[taggedAddressesLabel+model.LabelName(name)] = model.LabelValue(v)
}
tgroup.Targets = append(tgroup.Targets, labels) tgroup.Targets = append(tgroup.Targets, labels)
} }
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err()
case ch <- []*targetgroup.Group{&tgroup}: case ch <- []*targetgroup.Group{&tgroup}:
} }
return nil
} }

View file

@ -34,7 +34,7 @@ func TestConfiguredService(t *testing.T) {
consulDiscovery, err := NewDiscovery(conf, nil) consulDiscovery, err := NewDiscovery(conf, nil)
if err != nil { if err != nil {
t.Errorf("Unexpected error when initialising discovery %v", err) t.Errorf("Unexpected error when initializing discovery %v", err)
} }
if !consulDiscovery.shouldWatch("configuredServiceName", []string{""}) { if !consulDiscovery.shouldWatch("configuredServiceName", []string{""}) {
t.Errorf("Expected service %s to be watched", "configuredServiceName") t.Errorf("Expected service %s to be watched", "configuredServiceName")
@ -47,12 +47,12 @@ func TestConfiguredService(t *testing.T) {
func TestConfiguredServiceWithTag(t *testing.T) { func TestConfiguredServiceWithTag(t *testing.T) {
conf := &SDConfig{ conf := &SDConfig{
Services: []string{"configuredServiceName"}, Services: []string{"configuredServiceName"},
ServiceTag: "http", ServiceTags: []string{"http"},
} }
consulDiscovery, err := NewDiscovery(conf, nil) consulDiscovery, err := NewDiscovery(conf, nil)
if err != nil { if err != nil {
t.Errorf("Unexpected error when initialising discovery %v", err) t.Errorf("Unexpected error when initializing discovery %v", err)
} }
if consulDiscovery.shouldWatch("configuredServiceName", []string{""}) { if consulDiscovery.shouldWatch("configuredServiceName", []string{""}) {
t.Errorf("Expected service %s to not be watched without tag", "configuredServiceName") t.Errorf("Expected service %s to not be watched without tag", "configuredServiceName")
@ -68,12 +68,102 @@ func TestConfiguredServiceWithTag(t *testing.T) {
} }
} }
func TestConfiguredServiceWithTags(t *testing.T) {
type testcase struct {
// What we've configured to watch.
conf *SDConfig
// The service we're checking if we should watch or not.
serviceName string
serviceTags []string
shouldWatch bool
}
cases := []testcase{
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1"},
},
serviceName: "configuredServiceName",
serviceTags: []string{""},
shouldWatch: false,
},
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1"},
},
serviceName: "configuredServiceName",
serviceTags: []string{"http", "v1"},
shouldWatch: true,
},
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1"},
},
serviceName: "nonConfiguredServiceName",
serviceTags: []string{""},
shouldWatch: false,
},
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1"},
},
serviceName: "nonConfiguredServiceName",
serviceTags: []string{"http, v1"},
shouldWatch: false,
},
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1"},
},
serviceName: "configuredServiceName",
serviceTags: []string{"http", "v1", "foo"},
shouldWatch: true,
},
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1", "foo"},
},
serviceName: "configuredServiceName",
serviceTags: []string{"http", "v1", "foo"},
shouldWatch: true,
},
testcase{
conf: &SDConfig{
Services: []string{"configuredServiceName"},
ServiceTags: []string{"http", "v1"},
},
serviceName: "configuredServiceName",
serviceTags: []string{"http", "v1", "v1"},
shouldWatch: true,
},
}
for _, tc := range cases {
consulDiscovery, err := NewDiscovery(tc.conf, nil)
if err != nil {
t.Errorf("Unexpected error when initializing discovery %v", err)
}
ret := consulDiscovery.shouldWatch(tc.serviceName, tc.serviceTags)
if ret != tc.shouldWatch {
t.Errorf("Expected should watch? %t, got %t. Watched service and tags: %s %+v, input was %s %+v", tc.shouldWatch, ret, tc.conf.Services, tc.conf.ServiceTags, tc.serviceName, tc.serviceTags)
}
}
}
func TestNonConfiguredService(t *testing.T) { func TestNonConfiguredService(t *testing.T) {
conf := &SDConfig{} conf := &SDConfig{}
consulDiscovery, err := NewDiscovery(conf, nil) consulDiscovery, err := NewDiscovery(conf, nil)
if err != nil { if err != nil {
t.Errorf("Unexpected error when initialising discovery %v", err) t.Errorf("Unexpected error when initializing discovery %v", err)
} }
if !consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{""}) { if !consulDiscovery.shouldWatch("nonConfiguredServiceName", []string{""}) {
t.Errorf("Expected service %s to be watched", "nonConfiguredServiceName") t.Errorf("Expected service %s to be watched", "nonConfiguredServiceName")
@ -87,6 +177,7 @@ const (
"Node": "node1", "Node": "node1",
"Address": "1.1.1.1", "Address": "1.1.1.1",
"Datacenter": "test-dc", "Datacenter": "test-dc",
"TaggedAddresses": {"lan":"192.168.10.10","wan":"10.0.10.10"},
"NodeMeta": {"rack_name": "2304"}, "NodeMeta": {"rack_name": "2304"},
"ServiceID": "test", "ServiceID": "test",
"ServiceName": "test", "ServiceName": "test",
@ -194,7 +285,7 @@ func TestAllOptions(t *testing.T) {
config.Services = []string{"test"} config.Services = []string{"test"}
config.NodeMeta = map[string]string{"rack_name": "2304"} config.NodeMeta = map[string]string{"rack_name": "2304"}
config.ServiceTag = "tag1" config.ServiceTags = []string{"tag1"}
config.AllowStale = true config.AllowStale = true
config.Token = "fake-token" config.Token = "fake-token"

View file

@ -24,8 +24,11 @@ import (
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/miekg/dns" "github.com/miekg/dns"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
) )
@ -76,16 +79,16 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if len(c.Names) == 0 { if len(c.Names) == 0 {
return fmt.Errorf("DNS-SD config must contain at least one SRV record name") return errors.New("DNS-SD config must contain at least one SRV record name")
} }
switch strings.ToUpper(c.Type) { switch strings.ToUpper(c.Type) {
case "SRV": case "SRV":
case "A", "AAAA": case "A", "AAAA":
if c.Port == 0 { if c.Port == 0 {
return fmt.Errorf("a port is required in DNS-SD configs for all record types except SRV") return errors.New("a port is required in DNS-SD configs for all record types except SRV")
} }
default: default:
return fmt.Errorf("invalid DNS-SD records type %s", c.Type) return errors.Errorf("invalid DNS-SD records type %s", c.Type)
} }
return nil return nil
} }
@ -98,12 +101,13 @@ func init() {
// Discovery periodically performs DNS-SD requests. It implements // Discovery periodically performs DNS-SD requests. It implements
// the Discoverer interface. // the Discoverer interface.
type Discovery struct { type Discovery struct {
*refresh.Discovery
names []string names []string
interval time.Duration
port int port int
qtype uint16 qtype uint16
logger log.Logger logger log.Logger
lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
} }
// NewDiscovery returns a new Discovery which periodically refreshes its targets. // NewDiscovery returns a new Discovery which periodically refreshes its targets.
@ -121,51 +125,52 @@ func NewDiscovery(conf SDConfig, logger log.Logger) *Discovery {
case "SRV": case "SRV":
qtype = dns.TypeSRV qtype = dns.TypeSRV
} }
return &Discovery{ d := &Discovery{
names: conf.Names, names: conf.Names,
interval: time.Duration(conf.RefreshInterval),
qtype: qtype, qtype: qtype,
port: conf.Port, port: conf.Port,
logger: logger, logger: logger,
lookupFn: lookupWithSearchPath,
} }
d.Discovery = refresh.NewDiscovery(
logger,
"dns",
time.Duration(conf.RefreshInterval),
d.refresh,
)
return d
} }
// Run implements the Discoverer interface. func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { var (
ticker := time.NewTicker(d.interval) wg sync.WaitGroup
defer ticker.Stop() ch = make(chan *targetgroup.Group)
tgs = make([]*targetgroup.Group, 0, len(d.names))
// Get an initial set right away. )
d.refreshAll(ctx, ch)
for {
select {
case <-ticker.C:
d.refreshAll(ctx, ch)
case <-ctx.Done():
return
}
}
}
func (d *Discovery) refreshAll(ctx context.Context, ch chan<- []*targetgroup.Group) {
var wg sync.WaitGroup
wg.Add(len(d.names)) wg.Add(len(d.names))
for _, name := range d.names { for _, name := range d.names {
go func(n string) { go func(n string) {
if err := d.refresh(ctx, n, ch); err != nil { if err := d.refreshOne(ctx, n, ch); err != nil {
level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err) level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err)
} }
wg.Done() wg.Done()
}(name) }(name)
} }
go func() {
wg.Wait() wg.Wait()
close(ch)
}()
for tg := range ch {
tgs = append(tgs, tg)
}
return tgs, nil
} }
func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targetgroup.Group) error { func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
response, err := lookupWithSearchPath(name, d.qtype, d.logger) response, err := d.lookupFn(name, d.qtype, d.logger)
dnsSDLookupsCount.Inc() dnsSDLookupsCount.Inc()
if err != nil { if err != nil {
dnsSDLookupFailuresCount.Inc() dnsSDLookupFailuresCount.Inc()
@ -178,7 +183,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
} }
for _, record := range response.Answer { for _, record := range response.Answer {
target := model.LabelValue("") var target model.LabelValue
switch addr := record.(type) { switch addr := record.(type) {
case *dns.SRV: case *dns.SRV:
// Remove the final dot from rooted DNS names to make them look more usual. // Remove the final dot from rooted DNS names to make them look more usual.
@ -203,7 +208,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
case ch <- []*targetgroup.Group{tg}: case ch <- tg:
} }
return nil return nil
@ -214,7 +219,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
// //
// There are three possible outcomes: // There are three possible outcomes:
// //
// 1. One of the permutations of the given name is recognised as // 1. One of the permutations of the given name is recognized as
// "valid" by the DNS, in which case we consider ourselves "done" // "valid" by the DNS, in which case we consider ourselves "done"
// and that answer is returned. Note that, due to the way the DNS // and that answer is returned. Note that, due to the way the DNS
// handles "name has resource records, but none of the specified type", // handles "name has resource records, but none of the specified type",
@ -239,7 +244,7 @@ func (d *Discovery) refresh(ctx context.Context, name string, ch chan<- []*targe
func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) { func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
conf, err := dns.ClientConfigFromFile(resolvConf) conf, err := dns.ClientConfigFromFile(resolvConf)
if err != nil { if err != nil {
return nil, fmt.Errorf("could not load resolv.conf: %s", err) return nil, errors.Wrap(err, "could not load resolv.conf")
} }
allResponsesValid := true allResponsesValid := true
@ -265,7 +270,7 @@ func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Ms
return &dns.Msg{}, nil return &dns.Msg{}, nil
} }
// Outcome 3: boned. // Outcome 3: boned.
return nil, fmt.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name) return nil, errors.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name)
} }
// lookupFromAnyServer uses all configured servers to try and resolve a specific // lookupFromAnyServer uses all configured servers to try and resolve a specific
@ -301,7 +306,7 @@ func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logg
} }
} }
return nil, fmt.Errorf("could not resolve %s: no servers returned a viable answer", name) return nil, errors.Errorf("could not resolve %s: no servers returned a viable answer", name)
} }
// askServerForName makes a request to a specific DNS server for a specific // askServerForName makes a request to a specific DNS server for a specific
@ -317,19 +322,18 @@ func askServerForName(name string, queryType uint16, client *dns.Client, servAdd
} }
response, _, err := client.Exchange(msg, servAddr) response, _, err := client.Exchange(msg, servAddr)
if err == dns.ErrTruncated { if err != nil {
return nil, err
}
if response.Truncated {
if client.Net == "tcp" { if client.Net == "tcp" {
return nil, fmt.Errorf("got truncated message on TCP (64kiB limit exceeded?)") return nil, errors.New("got truncated message on TCP (64kiB limit exceeded?)")
} }
client.Net = "tcp" client.Net = "tcp"
return askServerForName(name, queryType, client, servAddr, false) return askServerForName(name, queryType, client, servAddr, false)
} }
if err != nil {
return nil, err
}
if msg.Id != response.Id {
return nil, fmt.Errorf("DNS ID mismatch, request: %d, response: %d", msg.Id, response.Id)
}
return response, nil return response, nil
} }

180
discovery/dns/dns_test.go Normal file
View file

@ -0,0 +1,180 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package dns
import (
"context"
"fmt"
"net"
"testing"
"time"
"github.com/go-kit/kit/log"
"github.com/miekg/dns"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
func TestDNS(t *testing.T) {
testCases := []struct {
name string
config SDConfig
lookup func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
expected []*targetgroup.Group
}{
{
name: "A record query with error",
config: SDConfig{
Names: []string{"web.example.com."},
RefreshInterval: model.Duration(time.Minute),
Port: 80,
Type: "A",
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
return nil, fmt.Errorf("some error")
},
expected: []*targetgroup.Group{},
},
{
name: "A record query",
config: SDConfig{
Names: []string{"web.example.com."},
RefreshInterval: model.Duration(time.Minute),
Port: 80,
Type: "A",
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.A{A: net.IPv4(192, 0, 2, 2)},
},
},
nil
},
expected: []*targetgroup.Group{
&targetgroup.Group{
Source: "web.example.com.",
Targets: []model.LabelSet{
{"__address__": "192.0.2.2:80", "__meta_dns_name": "web.example.com."},
},
},
},
},
{
name: "AAAA record query",
config: SDConfig{
Names: []string{"web.example.com."},
RefreshInterval: model.Duration(time.Minute),
Port: 80,
Type: "AAAA",
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.AAAA{AAAA: net.IPv6loopback},
},
},
nil
},
expected: []*targetgroup.Group{
&targetgroup.Group{
Source: "web.example.com.",
Targets: []model.LabelSet{
{"__address__": "[::1]:80", "__meta_dns_name": "web.example.com."},
},
},
},
},
{
name: "SRV record query",
config: SDConfig{
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.SRV{Port: 3306, Target: "db1.example.com."},
&dns.SRV{Port: 3306, Target: "db2.example.com."},
},
},
nil
},
expected: []*targetgroup.Group{
&targetgroup.Group{
Source: "_mysql._tcp.db.example.com.",
Targets: []model.LabelSet{
{"__address__": "db1.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."},
{"__address__": "db2.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."},
},
},
},
},
{
name: "SRV record query with unsupported resource records",
config: SDConfig{
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
return &dns.Msg{
Answer: []dns.RR{
&dns.SRV{Port: 3306, Target: "db1.example.com."},
&dns.TXT{Txt: []string{"this should be discarded"}},
},
},
nil
},
expected: []*targetgroup.Group{
&targetgroup.Group{
Source: "_mysql._tcp.db.example.com.",
Targets: []model.LabelSet{
{"__address__": "db1.example.com:3306", "__meta_dns_name": "_mysql._tcp.db.example.com."},
},
},
},
},
{
name: "SRV record query with empty answer (NXDOMAIN)",
config: SDConfig{
Names: []string{"_mysql._tcp.db.example.com."},
RefreshInterval: model.Duration(time.Minute),
},
lookup: func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
return &dns.Msg{}, nil
},
expected: []*targetgroup.Group{
&targetgroup.Group{
Source: "_mysql._tcp.db.example.com.",
},
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
sd := NewDiscovery(tc.config, nil)
sd.lookupFn = tc.lookup
tgs, err := sd.refresh(context.Background())
require.NoError(t, err)
require.Equal(t, tc.expected, tgs)
})
}
}

View file

@ -25,13 +25,13 @@ import (
"github.com/aws/aws-sdk-go/aws/credentials/stscreds" "github.com/aws/aws-sdk-go/aws/credentials/stscreds"
"github.com/aws/aws-sdk-go/aws/ec2metadata" "github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/aws/aws-sdk-go/service/ec2" "github.com/prometheus/prometheus/discovery/refresh"
config_util "github.com/prometheus/common/config"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/strutil"
) )
@ -46,6 +46,7 @@ const (
ec2LabelPlatform = ec2Label + "platform" ec2LabelPlatform = ec2Label + "platform"
ec2LabelPublicDNS = ec2Label + "public_dns_name" ec2LabelPublicDNS = ec2Label + "public_dns_name"
ec2LabelPublicIP = ec2Label + "public_ip" ec2LabelPublicIP = ec2Label + "public_ip"
ec2LabelPrivateDNS = ec2Label + "private_dns_name"
ec2LabelPrivateIP = ec2Label + "private_ip" ec2LabelPrivateIP = ec2Label + "private_ip"
ec2LabelPrimarySubnetID = ec2Label + "primary_subnet_id" ec2LabelPrimarySubnetID = ec2Label + "primary_subnet_id"
ec2LabelSubnetID = ec2Label + "subnet_id" ec2LabelSubnetID = ec2Label + "subnet_id"
@ -54,23 +55,11 @@ const (
subnetSeparator = "," subnetSeparator = ","
) )
var ( // DefaultSDConfig is the default EC2 SD configuration.
ec2SDRefreshFailuresCount = prometheus.NewCounter( var DefaultSDConfig = SDConfig{
prometheus.CounterOpts{
Name: "prometheus_sd_ec2_refresh_failures_total",
Help: "The number of EC2-SD scrape failures.",
})
ec2SDRefreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_ec2_refresh_duration_seconds",
Help: "The duration of a EC2-SD refresh in seconds.",
})
// DefaultSDConfig is the default EC2 SD configuration.
DefaultSDConfig = SDConfig{
Port: 80, Port: 80,
RefreshInterval: model.Duration(60 * time.Second), RefreshInterval: model.Duration(60 * time.Second),
} }
)
// Filter is the configuration for filtering EC2 instances. // Filter is the configuration for filtering EC2 instances.
type Filter struct { type Filter struct {
@ -107,33 +96,28 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
metadata := ec2metadata.New(sess) metadata := ec2metadata.New(sess)
region, err := metadata.Region() region, err := metadata.Region()
if err != nil { if err != nil {
return fmt.Errorf("EC2 SD configuration requires a region") return errors.New("EC2 SD configuration requires a region")
} }
c.Region = region c.Region = region
} }
for _, f := range c.Filters { for _, f := range c.Filters {
if len(f.Values) == 0 { if len(f.Values) == 0 {
return fmt.Errorf("EC2 SD configuration filter values cannot be empty") return errors.New("EC2 SD configuration filter values cannot be empty")
} }
} }
return nil return nil
} }
func init() {
prometheus.MustRegister(ec2SDRefreshFailuresCount)
prometheus.MustRegister(ec2SDRefreshDuration)
}
// Discovery periodically performs EC2-SD requests. It implements // Discovery periodically performs EC2-SD requests. It implements
// the Discoverer interface. // the Discoverer interface.
type Discovery struct { type Discovery struct {
*refresh.Discovery
aws *aws.Config aws *aws.Config
interval time.Duration interval time.Duration
profile string profile string
roleARN string roleARN string
port int port int
filters []*Filter filters []*Filter
logger log.Logger
} }
// NewDiscovery returns a new EC2Discovery which periodically refreshes its targets. // NewDiscovery returns a new EC2Discovery which periodically refreshes its targets.
@ -145,7 +129,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
if logger == nil { if logger == nil {
logger = log.NewNopLogger() logger = log.NewNopLogger()
} }
return &Discovery{ d := &Discovery{
aws: &aws.Config{ aws: &aws.Config{
Endpoint: &conf.Endpoint, Endpoint: &conf.Endpoint,
Region: &conf.Region, Region: &conf.Region,
@ -156,62 +140,23 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) *Discovery {
filters: conf.Filters, filters: conf.Filters,
interval: time.Duration(conf.RefreshInterval), interval: time.Duration(conf.RefreshInterval),
port: conf.Port, port: conf.Port,
logger: logger,
} }
d.Discovery = refresh.NewDiscovery(
logger,
"ec2",
time.Duration(conf.RefreshInterval),
d.refresh,
)
return d
} }
// Run implements the Discoverer interface. func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
ticker := time.NewTicker(d.interval)
defer ticker.Stop()
// Get an initial set right away.
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
} else {
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
return
}
}
for {
select {
case <-ticker.C:
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
continue
}
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
t0 := time.Now()
defer func() {
ec2SDRefreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
ec2SDRefreshFailuresCount.Inc()
}
}()
sess, err := session.NewSessionWithOptions(session.Options{ sess, err := session.NewSessionWithOptions(session.Options{
Config: *d.aws, Config: *d.aws,
Profile: d.profile, Profile: d.profile,
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("could not create aws session: %s", err) return nil, errors.Wrap(err, "could not create aws session")
} }
var ec2s *ec2.EC2 var ec2s *ec2.EC2
@ -221,7 +166,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
} else { } else {
ec2s = ec2.New(sess) ec2s = ec2.New(sess)
} }
tg = &targetgroup.Group{ tg := &targetgroup.Group{
Source: *d.aws.Region, Source: *d.aws.Region,
} }
@ -235,7 +180,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
input := &ec2.DescribeInstancesInput{Filters: filters} input := &ec2.DescribeInstancesInput{Filters: filters}
if err = ec2s.DescribeInstancesPages(input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool { if err = ec2s.DescribeInstancesPagesWithContext(ctx, input, func(p *ec2.DescribeInstancesOutput, lastPage bool) bool {
for _, r := range p.Reservations { for _, r := range p.Reservations {
for _, inst := range r.Instances { for _, inst := range r.Instances {
if inst.PrivateIpAddress == nil { if inst.PrivateIpAddress == nil {
@ -250,6 +195,9 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
} }
labels[ec2LabelPrivateIP] = model.LabelValue(*inst.PrivateIpAddress) labels[ec2LabelPrivateIP] = model.LabelValue(*inst.PrivateIpAddress)
if inst.PrivateDnsName != nil {
labels[ec2LabelPrivateDNS] = model.LabelValue(*inst.PrivateDnsName)
}
addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.port)) addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.port))
labels[model.AddressLabel] = model.LabelValue(addr) labels[model.AddressLabel] = model.LabelValue(addr)
@ -300,7 +248,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
} }
return true return true
}); err != nil { }); err != nil {
return nil, fmt.Errorf("could not describe instances: %s", err) return nil, errors.Wrap(err, "could not describe instances")
} }
return tg, nil return []*targetgroup.Group{tg}, nil
} }

View file

@ -16,7 +16,6 @@ package file
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os" "os"
@ -28,11 +27,13 @@ import (
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
fsnotify "gopkg.in/fsnotify/fsnotify.v1"
yaml "gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"gopkg.in/fsnotify/fsnotify.v1"
"gopkg.in/yaml.v2"
) )
var ( var (
@ -59,11 +60,11 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if len(c.Files) == 0 { if len(c.Files) == 0 {
return fmt.Errorf("file service discovery config must contain at least one path name") return errors.New("file service discovery config must contain at least one path name")
} }
for _, name := range c.Files { for _, name := range c.Files {
if !patFileSDName.MatchString(name) { if !patFileSDName.MatchString(name) {
return fmt.Errorf("path name %q is not valid for file discovery", name) return errors.Errorf("path name %q is not valid for file discovery", name)
} }
} }
return nil return nil
@ -136,6 +137,7 @@ var (
prometheus.SummaryOpts{ prometheus.SummaryOpts{
Name: "prometheus_sd_file_scan_duration_seconds", Name: "prometheus_sd_file_scan_duration_seconds",
Help: "The duration of the File-SD scan in seconds.", Help: "The duration of the File-SD scan in seconds.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
}) })
fileSDReadErrorsCount = prometheus.NewCounter( fileSDReadErrorsCount = prometheus.NewCounter(
prometheus.CounterOpts{ prometheus.CounterOpts{
@ -382,7 +384,7 @@ func (d *Discovery) readFile(filename string) ([]*targetgroup.Group, error) {
return nil, err return nil, err
} }
default: default:
panic(fmt.Errorf("discovery.File.readFile: unhandled file extension %q", ext)) panic(errors.Errorf("discovery.File.readFile: unhandled file extension %q", ext))
} }
for i, tg := range targetGroups { for i, tg := range targetGroups {

View file

@ -22,13 +22,13 @@ import (
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google" "golang.org/x/oauth2/google"
compute "google.golang.org/api/compute/v1" compute "google.golang.org/api/compute/v1"
"google.golang.org/api/option"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/strutil"
) )
@ -50,24 +50,12 @@ const (
gceLabelMachineType = gceLabel + "machine_type" gceLabelMachineType = gceLabel + "machine_type"
) )
var ( // DefaultSDConfig is the default GCE SD configuration.
gceSDRefreshFailuresCount = prometheus.NewCounter( var DefaultSDConfig = SDConfig{
prometheus.CounterOpts{
Name: "prometheus_sd_gce_refresh_failures_total",
Help: "The number of GCE-SD refresh failures.",
})
gceSDRefreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_gce_refresh_duration",
Help: "The duration of a GCE-SD refresh in seconds.",
})
// DefaultSDConfig is the default GCE SD configuration.
DefaultSDConfig = SDConfig{
Port: 80, Port: 80,
TagSeparator: ",", TagSeparator: ",",
RefreshInterval: model.Duration(60 * time.Second), RefreshInterval: model.Duration(60 * time.Second),
} }
)
// SDConfig is the configuration for GCE based service discovery. // SDConfig is the configuration for GCE based service discovery.
type SDConfig struct { type SDConfig struct {
@ -97,105 +85,59 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if c.Project == "" { if c.Project == "" {
return fmt.Errorf("GCE SD configuration requires a project") return errors.New("GCE SD configuration requires a project")
} }
if c.Zone == "" { if c.Zone == "" {
return fmt.Errorf("GCE SD configuration requires a zone") return errors.New("GCE SD configuration requires a zone")
} }
return nil return nil
} }
func init() {
prometheus.MustRegister(gceSDRefreshFailuresCount)
prometheus.MustRegister(gceSDRefreshDuration)
}
// Discovery periodically performs GCE-SD requests. It implements // Discovery periodically performs GCE-SD requests. It implements
// the Discoverer interface. // the Discoverer interface.
type Discovery struct { type Discovery struct {
*refresh.Discovery
project string project string
zone string zone string
filter string filter string
client *http.Client client *http.Client
svc *compute.Service svc *compute.Service
isvc *compute.InstancesService isvc *compute.InstancesService
interval time.Duration
port int port int
tagSeparator string tagSeparator string
logger log.Logger
} }
// NewDiscovery returns a new Discovery which periodically refreshes its targets. // NewDiscovery returns a new Discovery which periodically refreshes its targets.
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
if logger == nil { d := &Discovery{
logger = log.NewNopLogger()
}
gd := &Discovery{
project: conf.Project, project: conf.Project,
zone: conf.Zone, zone: conf.Zone,
filter: conf.Filter, filter: conf.Filter,
interval: time.Duration(conf.RefreshInterval),
port: conf.Port, port: conf.Port,
tagSeparator: conf.TagSeparator, tagSeparator: conf.TagSeparator,
logger: logger,
} }
var err error var err error
gd.client, err = google.DefaultClient(oauth2.NoContext, compute.ComputeReadonlyScope) d.client, err = google.DefaultClient(context.Background(), compute.ComputeReadonlyScope)
if err != nil { if err != nil {
return nil, fmt.Errorf("error setting up communication with GCE service: %s", err) return nil, errors.Wrap(err, "error setting up communication with GCE service")
} }
gd.svc, err = compute.New(gd.client) d.svc, err = compute.NewService(context.Background(), option.WithHTTPClient(d.client))
if err != nil { if err != nil {
return nil, fmt.Errorf("error setting up communication with GCE service: %s", err) return nil, errors.Wrap(err, "error setting up communication with GCE service")
} }
gd.isvc = compute.NewInstancesService(gd.svc) d.isvc = compute.NewInstancesService(d.svc)
return gd, nil
d.Discovery = refresh.NewDiscovery(
logger,
"gce",
time.Duration(conf.RefreshInterval),
d.refresh,
)
return d, nil
} }
// Run implements the Discoverer interface. func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { tg := &targetgroup.Group{
// Get an initial set right away.
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
} else {
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
}
}
ticker := time.NewTicker(d.interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refresh failed", "err", err)
continue
}
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
}
case <-ctx.Done():
return
}
}
}
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
t0 := time.Now()
defer func() {
gceSDRefreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
gceSDRefreshFailuresCount.Inc()
}
}()
tg = &targetgroup.Group{
Source: fmt.Sprintf("GCE_%s_%s", d.project, d.zone), Source: fmt.Sprintf("GCE_%s_%s", d.project, d.zone),
} }
@ -203,7 +145,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
if len(d.filter) > 0 { if len(d.filter) > 0 {
ilc = ilc.Filter(d.filter) ilc = ilc.Filter(d.filter)
} }
err = ilc.Pages(context.TODO(), func(l *compute.InstanceList) error { err := ilc.Pages(ctx, func(l *compute.InstanceList) error {
for _, inst := range l.Items { for _, inst := range l.Items {
if len(inst.NetworkInterfaces) == 0 { if len(inst.NetworkInterfaces) == 0 {
continue continue
@ -260,7 +202,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
return nil return nil
}) })
if err != nil { if err != nil {
return tg, fmt.Errorf("error retrieving refresh targets from gce: %s", err) return nil, errors.Wrap(err, "error retrieving refresh targets from gce")
} }
return tg, nil return []*targetgroup.Group{tg}, nil
} }

View file

@ -18,6 +18,7 @@ import (
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/metrics" "k8s.io/client-go/tools/metrics"
"k8s.io/client-go/util/workqueue" "k8s.io/client-go/util/workqueue"
@ -136,6 +137,22 @@ var (
}, },
[]string{"queue_name"}, []string{"queue_name"},
) )
clientGoWorkqueueUnfinishedWorkSecondsMetricVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: workqueueMetricsNamespace,
Name: "unfinished_work_seconds",
Help: "How long an item has remained unfinished in the work queue.",
},
[]string{"queue_name"},
)
clientGoWorkqueueLongestRunningProcessorMetricVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: workqueueMetricsNamespace,
Name: "longest_running_processor_seconds",
Help: "Duration of the longest running processor in the work queue.",
},
[]string{"queue_name"},
)
clientGoWorkqueueWorkDurationMetricVec = prometheus.NewSummaryVec( clientGoWorkqueueWorkDurationMetricVec = prometheus.NewSummaryVec(
prometheus.SummaryOpts{ prometheus.SummaryOpts{
Namespace: workqueueMetricsNamespace, Namespace: workqueueMetricsNamespace,
@ -153,6 +170,7 @@ type noopMetric struct{}
func (noopMetric) Inc() {} func (noopMetric) Inc() {}
func (noopMetric) Dec() {} func (noopMetric) Dec() {}
func (noopMetric) Observe(float64) {} func (noopMetric) Observe(float64) {}
func (noopMetric) Set(float64) {}
// Definition of client-go metrics adapters for HTTP requests observation // Definition of client-go metrics adapters for HTTP requests observation
type clientGoRequestMetricAdapter struct{} type clientGoRequestMetricAdapter struct{}
@ -218,6 +236,8 @@ func (f *clientGoWorkqueueMetricsProvider) Register(registerer prometheus.Regist
registerer.MustRegister(clientGoWorkqueueAddsMetricVec) registerer.MustRegister(clientGoWorkqueueAddsMetricVec)
registerer.MustRegister(clientGoWorkqueueLatencyMetricVec) registerer.MustRegister(clientGoWorkqueueLatencyMetricVec)
registerer.MustRegister(clientGoWorkqueueWorkDurationMetricVec) registerer.MustRegister(clientGoWorkqueueWorkDurationMetricVec)
registerer.MustRegister(clientGoWorkqueueUnfinishedWorkSecondsMetricVec)
registerer.MustRegister(clientGoWorkqueueLongestRunningProcessorMetricVec)
} }
func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric { func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric {
@ -226,21 +246,48 @@ func (f *clientGoWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue
func (f *clientGoWorkqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric { func (f *clientGoWorkqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric {
return clientGoWorkqueueAddsMetricVec.WithLabelValues(name) return clientGoWorkqueueAddsMetricVec.WithLabelValues(name)
} }
func (f *clientGoWorkqueueMetricsProvider) NewLatencyMetric(name string) workqueue.SummaryMetric { func (f *clientGoWorkqueueMetricsProvider) NewLatencyMetric(name string) workqueue.HistogramMetric {
metric := clientGoWorkqueueLatencyMetricVec.WithLabelValues(name) metric := clientGoWorkqueueLatencyMetricVec.WithLabelValues(name)
// Convert microseconds to seconds for consistency across metrics. // Convert microseconds to seconds for consistency across metrics.
return prometheus.ObserverFunc(func(v float64) { return prometheus.ObserverFunc(func(v float64) {
metric.Observe(v / 1e6) metric.Observe(v / 1e6)
}) })
} }
func (f *clientGoWorkqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.SummaryMetric { func (f *clientGoWorkqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric {
metric := clientGoWorkqueueWorkDurationMetricVec.WithLabelValues(name) metric := clientGoWorkqueueWorkDurationMetricVec.WithLabelValues(name)
// Convert microseconds to seconds for consistency across metrics. // Convert microseconds to seconds for consistency across metrics.
return prometheus.ObserverFunc(func(v float64) { return prometheus.ObserverFunc(func(v float64) {
metric.Observe(v / 1e6) metric.Observe(v / 1e6)
}) })
} }
func (f *clientGoWorkqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric {
return clientGoWorkqueueUnfinishedWorkSecondsMetricVec.WithLabelValues(name)
}
func (f *clientGoWorkqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric {
return clientGoWorkqueueLongestRunningProcessorMetricVec.WithLabelValues(name)
}
func (clientGoWorkqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric { func (clientGoWorkqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric {
// Retries are not used so the metric is ommited. // Retries are not used so the metric is omitted.
return noopMetric{}
}
func (clientGoWorkqueueMetricsProvider) NewDeprecatedDepthMetric(name string) workqueue.GaugeMetric {
return noopMetric{}
}
func (clientGoWorkqueueMetricsProvider) NewDeprecatedAddsMetric(name string) workqueue.CounterMetric {
return noopMetric{}
}
func (clientGoWorkqueueMetricsProvider) NewDeprecatedLatencyMetric(name string) workqueue.SummaryMetric {
return noopMetric{}
}
func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedWorkDurationMetric(name string) workqueue.SummaryMetric {
return noopMetric{}
}
func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric {
return noopMetric{}
}
func (f *clientGoWorkqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(name string) workqueue.SettableGaugeMetric {
return noopMetric{}
}
func (clientGoWorkqueueMetricsProvider) NewDeprecatedRetriesMetric(name string) workqueue.CounterMetric {
return noopMetric{} return noopMetric{}
} }

View file

@ -15,17 +15,18 @@ package kubernetes
import ( import (
"context" "context"
"fmt"
"net" "net"
"strconv" "strconv"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue" "k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
) )
// Endpoints discovers new endpoint targets. // Endpoints discovers new endpoint targets.
@ -150,7 +151,7 @@ func (e *Endpoints) process(ctx context.Context, ch chan<- []*targetgroup.Group)
namespace, name, err := cache.SplitMetaNamespaceKey(key) namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil { if err != nil {
level.Error(e.logger).Log("msg", "spliting key failed", "key", key) level.Error(e.logger).Log("msg", "splitting key failed", "key", key)
return true return true
} }
@ -178,7 +179,7 @@ func convertToEndpoints(o interface{}) (*apiv1.Endpoints, error) {
return endpoints, nil return endpoints, nil
} }
return nil, fmt.Errorf("Received unexpected object: %v", o) return nil, errors.Errorf("received unexpected object: %v", o)
} }
func endpointsSource(ep *apiv1.Endpoints) string { func endpointsSource(ep *apiv1.Endpoints) string {
@ -191,6 +192,8 @@ func endpointsSourceFromNamespaceAndName(namespace, name string) string {
const ( const (
endpointsNameLabel = metaLabelPrefix + "endpoints_name" endpointsNameLabel = metaLabelPrefix + "endpoints_name"
endpointNodeName = metaLabelPrefix + "endpoint_node_name"
endpointHostname = metaLabelPrefix + "endpoint_hostname"
endpointReadyLabel = metaLabelPrefix + "endpoint_ready" endpointReadyLabel = metaLabelPrefix + "endpoint_ready"
endpointPortNameLabel = metaLabelPrefix + "endpoint_port_name" endpointPortNameLabel = metaLabelPrefix + "endpoint_port_name"
endpointPortProtocolLabel = metaLabelPrefix + "endpoint_port_protocol" endpointPortProtocolLabel = metaLabelPrefix + "endpoint_port_protocol"
@ -229,6 +232,13 @@ func (e *Endpoints) buildEndpoints(eps *apiv1.Endpoints) *targetgroup.Group {
target[model.LabelName(endpointAddressTargetNameLabel)] = lv(addr.TargetRef.Name) target[model.LabelName(endpointAddressTargetNameLabel)] = lv(addr.TargetRef.Name)
} }
if addr.NodeName != nil {
target[model.LabelName(endpointNodeName)] = lv(*addr.NodeName)
}
if addr.Hostname != "" {
target[model.LabelName(endpointHostname)] = lv(addr.Hostname)
}
pod := e.resolvePodRef(addr.TargetRef) pod := e.resolvePodRef(addr.TargetRef)
if pod == nil { if pod == nil {
// This target is not a Pod, so don't continue with Pod specific logic. // This target is not a Pod, so don't continue with Pod specific logic.
@ -324,11 +334,12 @@ func (e *Endpoints) resolvePodRef(ref *apiv1.ObjectReference) *apiv1.Pod {
p.Name = ref.Name p.Name = ref.Name
obj, exists, err := e.podStore.Get(p) obj, exists, err := e.podStore.Get(p)
if err != nil || !exists {
return nil
}
if err != nil { if err != nil {
level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err) level.Error(e.logger).Log("msg", "resolving pod ref failed", "err", err)
return nil
}
if !exists {
return nil
} }
return obj.(*apiv1.Pod) return obj.(*apiv1.Pod)
} }
@ -339,11 +350,12 @@ func (e *Endpoints) addServiceLabels(ns, name string, tg *targetgroup.Group) {
svc.Name = name svc.Name = name
obj, exists, err := e.serviceStore.Get(svc) obj, exists, err := e.serviceStore.Get(svc)
if !exists || err != nil {
return
}
if err != nil { if err != nil {
level.Error(e.logger).Log("msg", "retrieving service failed", "err", err) level.Error(e.logger).Log("msg", "retrieving service failed", "err", err)
return
}
if !exists {
return
} }
svc = obj.(*apiv1.Service) svc = obj.(*apiv1.Service)

View file

@ -18,13 +18,14 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
) )
func makeEndpoints() *v1.Endpoints { func makeEndpoints() *v1.Endpoints {
var nodeName = "foobar"
return &v1.Endpoints{ return &v1.Endpoints{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "testendpoints", Name: "testendpoints",
@ -35,6 +36,8 @@ func makeEndpoints() *v1.Endpoints {
Addresses: []v1.EndpointAddress{ Addresses: []v1.EndpointAddress{
{ {
IP: "1.2.3.4", IP: "1.2.3.4",
Hostname: "testendpoint1",
NodeName: &nodeName,
}, },
}, },
Ports: []v1.EndpointPort{ Ports: []v1.EndpointPort{
@ -69,14 +72,13 @@ func makeEndpoints() *v1.Endpoints {
} }
func TestEndpointsDiscoveryBeforeRun(t *testing.T) { func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
beforeRun: func() { beforeRun: func() {
obj := makeEndpoints() obj := makeEndpoints()
c.CoreV1().Endpoints(obj.Namespace).Create(obj) c.CoreV1().Endpoints(obj.Namespace).Create(obj)
w.Endpoints().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -84,6 +86,8 @@ func TestEndpointsDiscoveryBeforeRun(t *testing.T) {
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
"__address__": "1.2.3.4:9000", "__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpoint_node_name": "foobar",
"__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true", "__meta_kubernetes_endpoint_ready": "true",
@ -148,7 +152,7 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
PodIP: "1.2.3.4", PodIP: "1.2.3.4",
}, },
} }
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, obj) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, obj)
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -181,7 +185,6 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
}, },
} }
c.CoreV1().Endpoints(obj.Namespace).Create(obj) c.CoreV1().Endpoints(obj.Namespace).Create(obj)
w.Endpoints().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -232,14 +235,13 @@ func TestEndpointsDiscoveryAdd(t *testing.T) {
} }
func TestEndpointsDiscoveryDelete(t *testing.T) { func TestEndpointsDiscoveryDelete(t *testing.T) {
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeEndpoints() obj := makeEndpoints()
c.CoreV1().Endpoints(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{}) c.CoreV1().Endpoints(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{})
w.Endpoints().Delete(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -251,7 +253,7 @@ func TestEndpointsDiscoveryDelete(t *testing.T) {
} }
func TestEndpointsDiscoveryUpdate(t *testing.T) { func TestEndpointsDiscoveryUpdate(t *testing.T) {
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -293,7 +295,6 @@ func TestEndpointsDiscoveryUpdate(t *testing.T) {
}, },
} }
c.CoreV1().Endpoints(obj.Namespace).Update(obj) c.CoreV1().Endpoints(obj.Namespace).Update(obj)
w.Endpoints().Modify(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -323,7 +324,7 @@ func TestEndpointsDiscoveryUpdate(t *testing.T) {
} }
func TestEndpointsDiscoveryEmptySubsets(t *testing.T) { func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -336,7 +337,6 @@ func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
Subsets: []v1.EndpointSubset{}, Subsets: []v1.EndpointSubset{},
} }
c.CoreV1().Endpoints(obj.Namespace).Update(obj) c.CoreV1().Endpoints(obj.Namespace).Update(obj)
w.Endpoints().Modify(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -352,7 +352,7 @@ func TestEndpointsDiscoveryEmptySubsets(t *testing.T) {
} }
func TestEndpointsDiscoveryWithService(t *testing.T) { func TestEndpointsDiscoveryWithService(t *testing.T) {
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -362,12 +362,11 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
Name: "testendpoints", Name: "testendpoints",
Namespace: "default", Namespace: "default",
Labels: map[string]string{ Labels: map[string]string{
"app": "test", "app/name": "test",
}, },
}, },
} }
c.CoreV1().Services(obj.Namespace).Create(obj) c.CoreV1().Services(obj.Namespace).Create(obj)
w.Services().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -375,6 +374,8 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
"__address__": "1.2.3.4:9000", "__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpoint_node_name": "foobar",
"__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true", "__meta_kubernetes_endpoint_ready": "true",
@ -395,7 +396,8 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default", "__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app": "test", "__meta_kubernetes_service_label_app_name": "test",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints", "__meta_kubernetes_service_name": "testendpoints",
}, },
Source: "endpoints/default/testendpoints", Source: "endpoints/default/testendpoints",
@ -405,7 +407,7 @@ func TestEndpointsDiscoveryWithService(t *testing.T) {
} }
func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) { func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
n, c, w := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints()) n, c := makeDiscovery(RoleEndpoint, NamespaceDiscovery{}, makeEndpoints())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -415,12 +417,11 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
Name: "testendpoints", Name: "testendpoints",
Namespace: "default", Namespace: "default",
Labels: map[string]string{ Labels: map[string]string{
"app": "test", "app/name": "test",
}, },
}, },
} }
c.CoreV1().Services(obj.Namespace).Create(obj) c.CoreV1().Services(obj.Namespace).Create(obj)
w.Services().Add(obj)
}, },
afterStart: func() { afterStart: func() {
obj := &v1.Service{ obj := &v1.Service{
@ -428,13 +429,12 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
Name: "testendpoints", Name: "testendpoints",
Namespace: "default", Namespace: "default",
Labels: map[string]string{ Labels: map[string]string{
"app": "svc", "app/name": "svc",
"component": "testing", "component": "testing",
}, },
}, },
} }
c.CoreV1().Services(obj.Namespace).Update(obj) c.CoreV1().Services(obj.Namespace).Update(obj)
w.Services().Modify(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -442,6 +442,8 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
"__address__": "1.2.3.4:9000", "__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpoint_node_name": "foobar",
"__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true", "__meta_kubernetes_endpoint_ready": "true",
@ -462,9 +464,11 @@ func TestEndpointsDiscoveryWithServiceUpdate(t *testing.T) {
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_namespace": "default", "__meta_kubernetes_namespace": "default",
"__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app": "svc", "__meta_kubernetes_service_label_app_name": "svc",
"__meta_kubernetes_service_labelpresent_app_name": "true",
"__meta_kubernetes_service_name": "testendpoints", "__meta_kubernetes_service_name": "testendpoints",
"__meta_kubernetes_service_label_component": "testing", "__meta_kubernetes_service_label_component": "testing",
"__meta_kubernetes_service_labelpresent_component": "true",
}, },
Source: "endpoints/default/testendpoints", Source: "endpoints/default/testendpoints",
}, },
@ -540,7 +544,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
}, },
}, },
} }
n, _, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, objs...) n, _ := makeDiscovery(RoleEndpoint, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}, objs...)
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -550,6 +554,8 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
"__address__": "1.2.3.4:9000", "__address__": "1.2.3.4:9000",
"__meta_kubernetes_endpoint_hostname": "testendpoint1",
"__meta_kubernetes_endpoint_node_name": "foobar",
"__meta_kubernetes_endpoint_port_name": "testport", "__meta_kubernetes_endpoint_port_name": "testport",
"__meta_kubernetes_endpoint_port_protocol": "TCP", "__meta_kubernetes_endpoint_port_protocol": "TCP",
"__meta_kubernetes_endpoint_ready": "true", "__meta_kubernetes_endpoint_ready": "true",
@ -571,6 +577,7 @@ func TestEndpointsDiscoveryNamespaces(t *testing.T) {
"__meta_kubernetes_namespace": "ns1", "__meta_kubernetes_namespace": "ns1",
"__meta_kubernetes_endpoints_name": "testendpoints", "__meta_kubernetes_endpoints_name": "testendpoints",
"__meta_kubernetes_service_label_app": "app1", "__meta_kubernetes_service_label_app": "app1",
"__meta_kubernetes_service_labelpresent_app": "true",
"__meta_kubernetes_service_name": "testendpoints", "__meta_kubernetes_service_name": "testendpoints",
}, },
Source: "endpoints/ns1/testendpoints", Source: "endpoints/ns1/testendpoints",

View file

@ -15,16 +15,17 @@ package kubernetes
import ( import (
"context" "context"
"fmt"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
"k8s.io/api/extensions/v1beta1" "k8s.io/api/extensions/v1beta1"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue" "k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
) )
// Ingress implements discovery of Kubernetes ingresss. // Ingress implements discovery of Kubernetes ingresss.
@ -118,7 +119,7 @@ func convertToIngress(o interface{}) (*v1beta1.Ingress, error) {
return ingress, nil return ingress, nil
} }
return nil, fmt.Errorf("Received unexpected object: %v", o) return nil, errors.Errorf("received unexpected object: %v", o)
} }
func ingressSource(s *v1beta1.Ingress) string { func ingressSource(s *v1beta1.Ingress) string {
@ -132,7 +133,9 @@ func ingressSourceFromNamespaceAndName(namespace, name string) string {
const ( const (
ingressNameLabel = metaLabelPrefix + "ingress_name" ingressNameLabel = metaLabelPrefix + "ingress_name"
ingressLabelPrefix = metaLabelPrefix + "ingress_label_" ingressLabelPrefix = metaLabelPrefix + "ingress_label_"
ingressLabelPresentPrefix = metaLabelPrefix + "ingress_labelpresent_"
ingressAnnotationPrefix = metaLabelPrefix + "ingress_annotation_" ingressAnnotationPrefix = metaLabelPrefix + "ingress_annotation_"
ingressAnnotationPresentPrefix = metaLabelPrefix + "ingress_annotationpresent_"
ingressSchemeLabel = metaLabelPrefix + "ingress_scheme" ingressSchemeLabel = metaLabelPrefix + "ingress_scheme"
ingressHostLabel = metaLabelPrefix + "ingress_host" ingressHostLabel = metaLabelPrefix + "ingress_host"
ingressPathLabel = metaLabelPrefix + "ingress_path" ingressPathLabel = metaLabelPrefix + "ingress_path"
@ -144,13 +147,15 @@ func ingressLabels(ingress *v1beta1.Ingress) model.LabelSet {
ls[namespaceLabel] = lv(ingress.Namespace) ls[namespaceLabel] = lv(ingress.Namespace)
for k, v := range ingress.Labels { for k, v := range ingress.Labels {
ln := strutil.SanitizeLabelName(ingressLabelPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(ingressLabelPrefix+ln)] = lv(v)
ls[model.LabelName(ingressLabelPresentPrefix+ln)] = presentValue
} }
for k, v := range ingress.Annotations { for k, v := range ingress.Annotations {
ln := strutil.SanitizeLabelName(ingressAnnotationPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(ingressAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(ingressAnnotationPresentPrefix+ln)] = presentValue
} }
return ls return ls
} }

View file

@ -36,8 +36,8 @@ func makeIngress(tls TLSMode) *v1beta1.Ingress {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "testingress", Name: "testingress",
Namespace: "default", Namespace: "default",
Labels: map[string]string{"testlabel": "testvalue"}, Labels: map[string]string{"test/label": "testvalue"},
Annotations: map[string]string{"testannotation": "testannotationvalue"}, Annotations: map[string]string{"test/annotation": "testannotationvalue"},
}, },
Spec: v1beta1.IngressSpec{ Spec: v1beta1.IngressSpec{
TLS: nil, TLS: nil,
@ -120,8 +120,10 @@ func expectedTargetGroups(ns string, tls TLSMode) map[string]*targetgroup.Group
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_ingress_name": "testingress", "__meta_kubernetes_ingress_name": "testingress",
"__meta_kubernetes_namespace": lv(ns), "__meta_kubernetes_namespace": lv(ns),
"__meta_kubernetes_ingress_label_testlabel": "testvalue", "__meta_kubernetes_ingress_label_test_label": "testvalue",
"__meta_kubernetes_ingress_annotation_testannotation": "testannotationvalue", "__meta_kubernetes_ingress_labelpresent_test_label": "true",
"__meta_kubernetes_ingress_annotation_test_annotation": "testannotationvalue",
"__meta_kubernetes_ingress_annotationpresent_test_annotation": "true",
}, },
Source: key, Source: key,
}, },
@ -129,14 +131,13 @@ func expectedTargetGroups(ns string, tls TLSMode) map[string]*targetgroup.Group
} }
func TestIngressDiscoveryAdd(t *testing.T) { func TestIngressDiscoveryAdd(t *testing.T) {
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeIngress(TLSNo) obj := makeIngress(TLSNo)
c.ExtensionsV1beta1().Ingresses("default").Create(obj) c.ExtensionsV1beta1().Ingresses("default").Create(obj)
w.Ingresses().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: expectedTargetGroups("default", TLSNo), expectedRes: expectedTargetGroups("default", TLSNo),
@ -144,14 +145,13 @@ func TestIngressDiscoveryAdd(t *testing.T) {
} }
func TestIngressDiscoveryAddTLS(t *testing.T) { func TestIngressDiscoveryAddTLS(t *testing.T) {
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeIngress(TLSYes) obj := makeIngress(TLSYes)
c.ExtensionsV1beta1().Ingresses("default").Create(obj) c.ExtensionsV1beta1().Ingresses("default").Create(obj)
w.Ingresses().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: expectedTargetGroups("default", TLSYes), expectedRes: expectedTargetGroups("default", TLSYes),
@ -159,14 +159,13 @@ func TestIngressDiscoveryAddTLS(t *testing.T) {
} }
func TestIngressDiscoveryAddMixed(t *testing.T) { func TestIngressDiscoveryAddMixed(t *testing.T) {
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}}) n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"default"}})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeIngress(TLSMixed) obj := makeIngress(TLSMixed)
c.ExtensionsV1beta1().Ingresses("default").Create(obj) c.ExtensionsV1beta1().Ingresses("default").Create(obj)
w.Ingresses().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: expectedTargetGroups("default", TLSMixed), expectedRes: expectedTargetGroups("default", TLSMixed),
@ -174,7 +173,7 @@ func TestIngressDiscoveryAddMixed(t *testing.T) {
} }
func TestIngressDiscoveryNamespaces(t *testing.T) { func TestIngressDiscoveryNamespaces(t *testing.T) {
n, c, w := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) n, c := makeDiscovery(RoleIngress, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
expected := expectedTargetGroups("ns1", TLSNo) expected := expectedTargetGroups("ns1", TLSNo)
for k, v := range expectedTargetGroups("ns2", TLSNo) { for k, v := range expectedTargetGroups("ns2", TLSNo) {
@ -187,7 +186,6 @@ func TestIngressDiscoveryNamespaces(t *testing.T) {
obj := makeIngress(TLSNo) obj := makeIngress(TLSNo)
obj.Namespace = ns obj.Namespace = ns
c.ExtensionsV1beta1().Ingresses(obj.Namespace).Create(obj) c.ExtensionsV1beta1().Ingresses(obj.Namespace).Create(obj)
w.Ingresses().Add(obj)
} }
}, },
expectedMaxItems: 2, expectedMaxItems: 2,

View file

@ -15,18 +15,16 @@ package kubernetes
import ( import (
"context" "context"
"fmt" "reflect"
"io/ioutil"
"sync" "sync"
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
extensionsv1beta1 "k8s.io/api/extensions/v1beta1" extensionsv1beta1 "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -35,6 +33,8 @@ import (
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest" "k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"github.com/prometheus/prometheus/discovery/targetgroup"
) )
const ( const (
@ -43,6 +43,7 @@ const (
metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_" metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"
namespaceLabel = metaLabelPrefix + "namespace" namespaceLabel = metaLabelPrefix + "namespace"
metricsNamespace = "prometheus_sd_kubernetes" metricsNamespace = "prometheus_sd_kubernetes"
presentValue = model.LabelValue("true")
) )
var ( var (
@ -80,7 +81,7 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
case RoleNode, RolePod, RoleService, RoleEndpoint, RoleIngress: case RoleNode, RolePod, RoleService, RoleEndpoint, RoleIngress:
return nil return nil
default: default:
return fmt.Errorf("Unknown Kubernetes SD role %q", *c) return errors.Errorf("unknown Kubernetes SD role %q", *c)
} }
} }
@ -88,10 +89,7 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
type SDConfig struct { type SDConfig struct {
APIServer config_util.URL `yaml:"api_server,omitempty"` APIServer config_util.URL `yaml:"api_server,omitempty"`
Role Role `yaml:"role"` Role Role `yaml:"role"`
BasicAuth *config_util.BasicAuth `yaml:"basic_auth,omitempty"` HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"`
BearerToken config_util.Secret `yaml:"bearer_token,omitempty"`
BearerTokenFile string `yaml:"bearer_token_file,omitempty"`
TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"`
NamespaceDiscovery NamespaceDiscovery `yaml:"namespaces,omitempty"` NamespaceDiscovery NamespaceDiscovery `yaml:"namespaces,omitempty"`
} }
@ -104,18 +102,14 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if c.Role == "" { if c.Role == "" {
return fmt.Errorf("role missing (one of: pod, service, endpoints, node)") return errors.Errorf("role missing (one of: pod, service, endpoints, node, ingress)")
} }
if len(c.BearerToken) > 0 && len(c.BearerTokenFile) > 0 { err = c.HTTPClientConfig.Validate()
return fmt.Errorf("at most one of bearer_token & bearer_token_file must be configured") if err != nil {
return err
} }
if c.BasicAuth != nil && (len(c.BearerToken) > 0 || len(c.BearerTokenFile) > 0) { if c.APIServer.URL == nil && !reflect.DeepEqual(c.HTTPClientConfig, config_util.HTTPClientConfig{}) {
return fmt.Errorf("at most one of basic_auth, bearer_token & bearer_token_file must be configured") return errors.Errorf("to use custom HTTP client configuration please provide the 'api_server' URL explicitly")
}
if c.APIServer.URL == nil &&
(c.BasicAuth != nil || c.BearerToken != "" || c.BearerTokenFile != "" ||
c.TLSConfig.CAFile != "" || c.TLSConfig.CertFile != "" || c.TLSConfig.KeyFile != "") {
return fmt.Errorf("to use custom authentication please provide the 'api_server' URL explicitly")
} }
return nil return nil
} }
@ -137,7 +131,7 @@ func init() {
prometheus.MustRegister(eventCount) prometheus.MustRegister(eventCount)
// Initialize metric vectors. // Initialize metric vectors.
for _, role := range []string{"endpoints", "node", "pod", "service"} { for _, role := range []string{"endpoints", "node", "pod", "service", "ingress"} {
for _, evt := range []string{"add", "delete", "update"} { for _, evt := range []string{"add", "delete", "update"} {
eventCount.WithLabelValues(role, evt) eventCount.WithLabelValues(role, evt)
} }
@ -195,50 +189,19 @@ func New(l log.Logger, conf *SDConfig) (*Discovery, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Because the handling of configuration parameters changes
// we should inform the user when their currently configured values
// will be ignored due to precedence of InClusterConfig
level.Info(l).Log("msg", "Using pod service account via in-cluster config") level.Info(l).Log("msg", "Using pod service account via in-cluster config")
if conf.TLSConfig.CAFile != "" {
level.Warn(l).Log("msg", "Configured TLS CA file is ignored when using pod service account")
}
if conf.TLSConfig.CertFile != "" || conf.TLSConfig.KeyFile != "" {
level.Warn(l).Log("msg", "Configured TLS client certificate is ignored when using pod service account")
}
if conf.BearerToken != "" {
level.Warn(l).Log("msg", "Configured auth token is ignored when using pod service account")
}
if conf.BasicAuth != nil {
level.Warn(l).Log("msg", "Configured basic authentication credentials are ignored when using pod service account")
}
} else { } else {
kcfg = &rest.Config{ rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "kubernetes_sd")
Host: conf.APIServer.String(),
TLSClientConfig: rest.TLSClientConfig{
CAFile: conf.TLSConfig.CAFile,
CertFile: conf.TLSConfig.CertFile,
KeyFile: conf.TLSConfig.KeyFile,
Insecure: conf.TLSConfig.InsecureSkipVerify,
},
}
token := string(conf.BearerToken)
if conf.BearerTokenFile != "" {
bf, err := ioutil.ReadFile(conf.BearerTokenFile)
if err != nil { if err != nil {
return nil, err return nil, err
} }
token = string(bf) kcfg = &rest.Config{
} Host: conf.APIServer.String(),
kcfg.BearerToken = token Transport: rt,
if conf.BasicAuth != nil {
kcfg.Username = conf.BasicAuth.Username
kcfg.Password = string(conf.BasicAuth.Password)
} }
} }
kcfg.UserAgent = "prometheus/discovery" kcfg.UserAgent = "Prometheus/discovery"
c, err := kubernetes.NewForConfig(kcfg) c, err := kubernetes.NewForConfig(kcfg)
if err != nil { if err != nil {

View file

@ -16,7 +16,6 @@ package kubernetes
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"sync"
"testing" "testing"
"time" "time"
@ -24,72 +23,21 @@ import (
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil" "github.com/prometheus/prometheus/util/testutil"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake" "k8s.io/client-go/kubernetes/fake"
k8stesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
) )
type watcherFactory struct {
sync.RWMutex
watchers map[schema.GroupVersionResource]*watch.FakeWatcher
}
func (wf *watcherFactory) watchFor(gvr schema.GroupVersionResource) *watch.FakeWatcher {
wf.Lock()
defer wf.Unlock()
var fakewatch *watch.FakeWatcher
fakewatch, ok := wf.watchers[gvr]
if !ok {
fakewatch = watch.NewFakeWithChanSize(128, true)
wf.watchers[gvr] = fakewatch
}
return fakewatch
}
func (wf *watcherFactory) Nodes() *watch.FakeWatcher {
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "nodes"})
}
func (wf *watcherFactory) Ingresses() *watch.FakeWatcher {
return wf.watchFor(schema.GroupVersionResource{Group: "extensions", Version: "v1beta1", Resource: "ingresses"})
}
func (wf *watcherFactory) Endpoints() *watch.FakeWatcher {
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "endpoints"})
}
func (wf *watcherFactory) Services() *watch.FakeWatcher {
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "services"})
}
func (wf *watcherFactory) Pods() *watch.FakeWatcher {
return wf.watchFor(schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"})
}
// makeDiscovery creates a kubernetes.Discovery instance for testing. // makeDiscovery creates a kubernetes.Discovery instance for testing.
func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface, *watcherFactory) { func makeDiscovery(role Role, nsDiscovery NamespaceDiscovery, objects ...runtime.Object) (*Discovery, kubernetes.Interface) {
clientset := fake.NewSimpleClientset(objects...) clientset := fake.NewSimpleClientset(objects...)
// Current client-go we are using does not support push event on
// Add/Update/Create, so we need to emit event manually.
// See https://github.com/kubernetes/kubernetes/issues/54075.
// TODO update client-go thChanSizeand related packages to kubernetes-1.10.0+
wf := &watcherFactory{
watchers: make(map[schema.GroupVersionResource]*watch.FakeWatcher),
}
clientset.PrependWatchReactor("*", func(action k8stesting.Action) (handled bool, ret watch.Interface, err error) {
gvr := action.GetResource()
return true, wf.watchFor(gvr), nil
})
return &Discovery{ return &Discovery{
client: clientset, client: clientset,
logger: log.NewNopLogger(), logger: log.NewNopLogger(),
role: role, role: role,
namespaceDiscovery: &nsDiscovery, namespaceDiscovery: &nsDiscovery,
}, clientset, wf }, clientset
} }
type k8sDiscoveryTest struct { type k8sDiscoveryTest struct {
@ -106,6 +54,7 @@ type k8sDiscoveryTest struct {
} }
func (d k8sDiscoveryTest) Run(t *testing.T) { func (d k8sDiscoveryTest) Run(t *testing.T) {
t.Helper()
ch := make(chan []*targetgroup.Group) ch := make(chan []*targetgroup.Group)
ctx, cancel := context.WithTimeout(context.Background(), time.Minute) ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel() defer cancel()
@ -156,7 +105,7 @@ Loop:
case <-time.After(timeout): case <-time.After(timeout):
// Because we use queue, an object that is created then // Because we use queue, an object that is created then
// deleted or updated may be processed only once. // deleted or updated may be processed only once.
// So possibliy we may skip events, timed out here. // So possibly we may skip events, timed out here.
t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(allTgs), max) t.Logf("timed out, got %d (max: %d) items, some events are skipped", len(allTgs), max)
break Loop break Loop
} }
@ -176,6 +125,7 @@ Loop:
} }
func requireTargetGroups(t *testing.T, expected, res map[string]*targetgroup.Group) { func requireTargetGroups(t *testing.T, expected, res map[string]*targetgroup.Group) {
t.Helper()
b1, err := json.Marshal(expected) b1, err := json.Marshal(expected)
if err != nil { if err != nil {
panic(err) panic(err)

View file

@ -15,18 +15,19 @@ package kubernetes
import ( import (
"context" "context"
"fmt"
"net" "net"
"strconv" "strconv"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue" "k8s.io/client-go/util/workqueue"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil"
) )
const ( const (
@ -127,7 +128,7 @@ func convertToNode(o interface{}) (*apiv1.Node, error) {
return node, nil return node, nil
} }
return nil, fmt.Errorf("Received unexpected object: %v", o) return nil, errors.Errorf("received unexpected object: %v", o)
} }
func nodeSource(n *apiv1.Node) string { func nodeSource(n *apiv1.Node) string {
@ -141,7 +142,9 @@ func nodeSourceFromName(name string) string {
const ( const (
nodeNameLabel = metaLabelPrefix + "node_name" nodeNameLabel = metaLabelPrefix + "node_name"
nodeLabelPrefix = metaLabelPrefix + "node_label_" nodeLabelPrefix = metaLabelPrefix + "node_label_"
nodeLabelPresentPrefix = metaLabelPrefix + "node_labelpresent_"
nodeAnnotationPrefix = metaLabelPrefix + "node_annotation_" nodeAnnotationPrefix = metaLabelPrefix + "node_annotation_"
nodeAnnotationPresentPrefix = metaLabelPrefix + "node_annotationpresent_"
nodeAddressPrefix = metaLabelPrefix + "node_address_" nodeAddressPrefix = metaLabelPrefix + "node_address_"
) )
@ -151,13 +154,15 @@ func nodeLabels(n *apiv1.Node) model.LabelSet {
ls[nodeNameLabel] = lv(n.Name) ls[nodeNameLabel] = lv(n.Name)
for k, v := range n.Labels { for k, v := range n.Labels {
ln := strutil.SanitizeLabelName(nodeLabelPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(nodeLabelPrefix+ln)] = lv(v)
ls[model.LabelName(nodeLabelPresentPrefix+ln)] = presentValue
} }
for k, v := range n.Annotations { for k, v := range n.Annotations {
ln := strutil.SanitizeLabelName(nodeAnnotationPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(nodeAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(nodeAnnotationPresentPrefix+ln)] = presentValue
} }
return ls return ls
} }
@ -214,5 +219,5 @@ func nodeAddress(node *apiv1.Node) (string, map[apiv1.NodeAddressType][]string,
if addresses, ok := m[apiv1.NodeHostName]; ok { if addresses, ok := m[apiv1.NodeHostName]; ok {
return addresses[0], m, nil return addresses[0], m, nil
} }
return "", m, fmt.Errorf("host address unknown") return "", m, errors.New("host address unknown")
} }

View file

@ -19,7 +19,7 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
@ -51,7 +51,7 @@ func makeEnumeratedNode(i int) *v1.Node {
} }
func TestNodeDiscoveryBeforeStart(t *testing.T) { func TestNodeDiscoveryBeforeStart(t *testing.T) {
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{}) n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -59,11 +59,10 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
obj := makeNode( obj := makeNode(
"test", "test",
"1.2.3.4", "1.2.3.4",
map[string]string{"testlabel": "testvalue"}, map[string]string{"test-label": "testvalue"},
map[string]string{"testannotation": "testannotationvalue"}, map[string]string{"test-annotation": "testannotationvalue"},
) )
c.CoreV1().Nodes().Create(obj) c.CoreV1().Nodes().Create(obj)
w.Nodes().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -77,8 +76,10 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
}, },
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_node_name": "test", "__meta_kubernetes_node_name": "test",
"__meta_kubernetes_node_label_testlabel": "testvalue", "__meta_kubernetes_node_label_test_label": "testvalue",
"__meta_kubernetes_node_annotation_testannotation": "testannotationvalue", "__meta_kubernetes_node_labelpresent_test_label": "true",
"__meta_kubernetes_node_annotation_test_annotation": "testannotationvalue",
"__meta_kubernetes_node_annotationpresent_test_annotation": "true",
}, },
Source: "node/test", Source: "node/test",
}, },
@ -87,14 +88,13 @@ func TestNodeDiscoveryBeforeStart(t *testing.T) {
} }
func TestNodeDiscoveryAdd(t *testing.T) { func TestNodeDiscoveryAdd(t *testing.T) {
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{}) n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeEnumeratedNode(1) obj := makeEnumeratedNode(1)
c.CoreV1().Nodes().Create(obj) c.CoreV1().Nodes().Create(obj)
w.Nodes().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -117,13 +117,12 @@ func TestNodeDiscoveryAdd(t *testing.T) {
func TestNodeDiscoveryDelete(t *testing.T) { func TestNodeDiscoveryDelete(t *testing.T) {
obj := makeEnumeratedNode(0) obj := makeEnumeratedNode(0)
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{}, obj) n, c := makeDiscovery(RoleNode, NamespaceDiscovery{}, obj)
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
c.CoreV1().Nodes().Delete(obj.Name, &metav1.DeleteOptions{}) c.CoreV1().Nodes().Delete(obj.Name, &metav1.DeleteOptions{})
w.Nodes().Delete(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -135,14 +134,13 @@ func TestNodeDiscoveryDelete(t *testing.T) {
} }
func TestNodeDiscoveryUpdate(t *testing.T) { func TestNodeDiscoveryUpdate(t *testing.T) {
n, c, w := makeDiscovery(RoleNode, NamespaceDiscovery{}) n, c := makeDiscovery(RoleNode, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj1 := makeEnumeratedNode(0) obj1 := makeEnumeratedNode(0)
c.CoreV1().Nodes().Create(obj1) c.CoreV1().Nodes().Create(obj1)
w.Nodes().Add(obj1)
obj2 := makeNode( obj2 := makeNode(
"test0", "test0",
"1.2.3.4", "1.2.3.4",
@ -150,7 +148,6 @@ func TestNodeDiscoveryUpdate(t *testing.T) {
map[string]string{}, map[string]string{},
) )
c.CoreV1().Nodes().Update(obj2) c.CoreV1().Nodes().Update(obj2)
w.Nodes().Modify(obj2)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -164,6 +161,7 @@ func TestNodeDiscoveryUpdate(t *testing.T) {
}, },
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_node_label_Unschedulable": "true", "__meta_kubernetes_node_label_Unschedulable": "true",
"__meta_kubernetes_node_labelpresent_Unschedulable": "true",
"__meta_kubernetes_node_name": "test0", "__meta_kubernetes_node_name": "test0",
}, },
Source: "node/test0", Source: "node/test0",

View file

@ -15,13 +15,13 @@ package kubernetes
import ( import (
"context" "context"
"fmt"
"net" "net"
"strconv" "strconv"
"strings" "strings"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -131,7 +131,7 @@ func convertToPod(o interface{}) (*apiv1.Pod, error) {
return pod, nil return pod, nil
} }
return nil, fmt.Errorf("Received unexpected object: %v", o) return nil, errors.Errorf("received unexpected object: %v", o)
} }
const ( const (
@ -141,10 +141,13 @@ const (
podContainerPortNameLabel = metaLabelPrefix + "pod_container_port_name" podContainerPortNameLabel = metaLabelPrefix + "pod_container_port_name"
podContainerPortNumberLabel = metaLabelPrefix + "pod_container_port_number" podContainerPortNumberLabel = metaLabelPrefix + "pod_container_port_number"
podContainerPortProtocolLabel = metaLabelPrefix + "pod_container_port_protocol" podContainerPortProtocolLabel = metaLabelPrefix + "pod_container_port_protocol"
podContainerIsInit = metaLabelPrefix + "pod_container_init"
podReadyLabel = metaLabelPrefix + "pod_ready" podReadyLabel = metaLabelPrefix + "pod_ready"
podPhaseLabel = metaLabelPrefix + "pod_phase" podPhaseLabel = metaLabelPrefix + "pod_phase"
podLabelPrefix = metaLabelPrefix + "pod_label_" podLabelPrefix = metaLabelPrefix + "pod_label_"
podLabelPresentPrefix = metaLabelPrefix + "pod_labelpresent_"
podAnnotationPrefix = metaLabelPrefix + "pod_annotation_" podAnnotationPrefix = metaLabelPrefix + "pod_annotation_"
podAnnotationPresentPrefix = metaLabelPrefix + "pod_annotationpresent_"
podNodeNameLabel = metaLabelPrefix + "pod_node_name" podNodeNameLabel = metaLabelPrefix + "pod_node_name"
podHostIPLabel = metaLabelPrefix + "pod_host_ip" podHostIPLabel = metaLabelPrefix + "pod_host_ip"
podUID = metaLabelPrefix + "pod_uid" podUID = metaLabelPrefix + "pod_uid"
@ -185,13 +188,15 @@ func podLabels(pod *apiv1.Pod) model.LabelSet {
} }
for k, v := range pod.Labels { for k, v := range pod.Labels {
ln := strutil.SanitizeLabelName(podLabelPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(podLabelPrefix+ln)] = lv(v)
ls[model.LabelName(podLabelPresentPrefix+ln)] = presentValue
} }
for k, v := range pod.Annotations { for k, v := range pod.Annotations {
ln := strutil.SanitizeLabelName(podAnnotationPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(podAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(podAnnotationPresentPrefix+ln)] = presentValue
} }
return ls return ls
@ -209,7 +214,10 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
tg.Labels = podLabels(pod) tg.Labels = podLabels(pod)
tg.Labels[namespaceLabel] = lv(pod.Namespace) tg.Labels[namespaceLabel] = lv(pod.Namespace)
for _, c := range pod.Spec.Containers { containers := append(pod.Spec.Containers, pod.Spec.InitContainers...)
for i, c := range containers {
isInit := i >= len(pod.Spec.Containers)
// If no ports are defined for the container, create an anonymous // If no ports are defined for the container, create an anonymous
// target per container. // target per container.
if len(c.Ports) == 0 { if len(c.Ports) == 0 {
@ -218,6 +226,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
tg.Targets = append(tg.Targets, model.LabelSet{ tg.Targets = append(tg.Targets, model.LabelSet{
model.AddressLabel: lv(pod.Status.PodIP), model.AddressLabel: lv(pod.Status.PodIP),
podContainerNameLabel: lv(c.Name), podContainerNameLabel: lv(c.Name),
podContainerIsInit: lv(strconv.FormatBool(isInit)),
}) })
continue continue
} }
@ -232,6 +241,7 @@ func (p *Pod) buildPod(pod *apiv1.Pod) *targetgroup.Group {
podContainerPortNumberLabel: lv(ports), podContainerPortNumberLabel: lv(ports),
podContainerPortNameLabel: lv(port.Name), podContainerPortNameLabel: lv(port.Name),
podContainerPortProtocolLabel: lv(string(port.Protocol)), podContainerPortProtocolLabel: lv(string(port.Protocol)),
podContainerIsInit: lv(strconv.FormatBool(isInit)),
}) })
} }
} }

View file

@ -19,7 +19,7 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
) )
@ -33,8 +33,8 @@ func makeMultiPortPods() *v1.Pod {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "testpod", Name: "testpod",
Namespace: "default", Namespace: "default",
Labels: map[string]string{"testlabel": "testvalue"}, Labels: map[string]string{"test/label": "testvalue"},
Annotations: map[string]string{"testannotation": "testannotationvalue"}, Annotations: map[string]string{"test/annotation": "testannotationvalue"},
UID: types.UID("abc123"), UID: types.UID("abc123"),
OwnerReferences: []metav1.OwnerReference{ OwnerReferences: []metav1.OwnerReference{
{ {
@ -117,6 +117,48 @@ func makePods() *v1.Pod {
} }
} }
func makeInitContainerPods() *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "testpod",
Namespace: "default",
UID: types.UID("abc123"),
},
Spec: v1.PodSpec{
NodeName: "testnode",
Containers: []v1.Container{
{
Name: "testcontainer",
Ports: []v1.ContainerPort{
{
Name: "testport",
Protocol: v1.ProtocolTCP,
ContainerPort: int32(9000),
},
},
},
},
InitContainers: []v1.Container{
{
Name: "initcontainer",
},
},
},
Status: v1.PodStatus{
PodIP: "1.2.3.4",
HostIP: "2.3.4.5",
Phase: "Pending",
Conditions: []v1.PodCondition{
{
Type: v1.PodReady,
Status: v1.ConditionFalse,
},
},
},
}
}
func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group { func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
key := fmt.Sprintf("pod/%s/testpod", ns) key := fmt.Sprintf("pod/%s/testpod", ns)
return map[string]*targetgroup.Group{ return map[string]*targetgroup.Group{
@ -128,6 +170,7 @@ func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
"__meta_kubernetes_pod_container_port_name": "testport", "__meta_kubernetes_pod_container_port_name": "testport",
"__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_container_init": "false",
}, },
}, },
Labels: model.LabelSet{ Labels: model.LabelSet{
@ -146,14 +189,13 @@ func expectedPodTargetGroups(ns string) map[string]*targetgroup.Group {
} }
func TestPodDiscoveryBeforeRun(t *testing.T) { func TestPodDiscoveryBeforeRun(t *testing.T) {
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}) n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
beforeRun: func() { beforeRun: func() {
obj := makeMultiPortPods() obj := makeMultiPortPods()
c.CoreV1().Pods(obj.Namespace).Create(obj) c.CoreV1().Pods(obj.Namespace).Create(obj)
w.Pods().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -165,6 +207,7 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
"__meta_kubernetes_pod_container_port_name": "testport0", "__meta_kubernetes_pod_container_port_name": "testport0",
"__meta_kubernetes_pod_container_port_number": "9000", "__meta_kubernetes_pod_container_port_number": "9000",
"__meta_kubernetes_pod_container_port_protocol": "TCP", "__meta_kubernetes_pod_container_port_protocol": "TCP",
"__meta_kubernetes_pod_container_init": "false",
}, },
{ {
"__address__": "1.2.3.4:9001", "__address__": "1.2.3.4:9001",
@ -172,17 +215,21 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
"__meta_kubernetes_pod_container_port_name": "testport1", "__meta_kubernetes_pod_container_port_name": "testport1",
"__meta_kubernetes_pod_container_port_number": "9001", "__meta_kubernetes_pod_container_port_number": "9001",
"__meta_kubernetes_pod_container_port_protocol": "UDP", "__meta_kubernetes_pod_container_port_protocol": "UDP",
"__meta_kubernetes_pod_container_init": "false",
}, },
{ {
"__address__": "1.2.3.4", "__address__": "1.2.3.4",
"__meta_kubernetes_pod_container_name": "testcontainer1", "__meta_kubernetes_pod_container_name": "testcontainer1",
"__meta_kubernetes_pod_container_init": "false",
}, },
}, },
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_pod_name": "testpod", "__meta_kubernetes_pod_name": "testpod",
"__meta_kubernetes_namespace": "default", "__meta_kubernetes_namespace": "default",
"__meta_kubernetes_pod_label_testlabel": "testvalue", "__meta_kubernetes_pod_label_test_label": "testvalue",
"__meta_kubernetes_pod_annotation_testannotation": "testannotationvalue", "__meta_kubernetes_pod_labelpresent_test_label": "true",
"__meta_kubernetes_pod_annotation_test_annotation": "testannotationvalue",
"__meta_kubernetes_pod_annotationpresent_test_annotation": "true",
"__meta_kubernetes_pod_node_name": "testnode", "__meta_kubernetes_pod_node_name": "testnode",
"__meta_kubernetes_pod_ip": "1.2.3.4", "__meta_kubernetes_pod_ip": "1.2.3.4",
"__meta_kubernetes_pod_host_ip": "2.3.4.5", "__meta_kubernetes_pod_host_ip": "2.3.4.5",
@ -198,15 +245,39 @@ func TestPodDiscoveryBeforeRun(t *testing.T) {
}.Run(t) }.Run(t)
} }
func TestPodDiscoveryInitContainer(t *testing.T) {
n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
ns := "default"
key := fmt.Sprintf("pod/%s/testpod", ns)
expected := expectedPodTargetGroups(ns)
expected[key].Targets = append(expected[key].Targets, model.LabelSet{
"__address__": "1.2.3.4",
"__meta_kubernetes_pod_container_name": "initcontainer",
"__meta_kubernetes_pod_container_init": "true",
})
expected[key].Labels["__meta_kubernetes_pod_phase"] = "Pending"
expected[key].Labels["__meta_kubernetes_pod_ready"] = "false"
k8sDiscoveryTest{
discovery: n,
beforeRun: func() {
obj := makeInitContainerPods()
c.CoreV1().Pods(obj.Namespace).Create(obj)
},
expectedMaxItems: 1,
expectedRes: expected,
}.Run(t)
}
func TestPodDiscoveryAdd(t *testing.T) { func TestPodDiscoveryAdd(t *testing.T) {
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}) n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makePods() obj := makePods()
c.CoreV1().Pods(obj.Namespace).Create(obj) c.CoreV1().Pods(obj.Namespace).Create(obj)
w.Pods().Add(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 1,
expectedRes: expectedPodTargetGroups("default"), expectedRes: expectedPodTargetGroups("default"),
@ -215,14 +286,13 @@ func TestPodDiscoveryAdd(t *testing.T) {
func TestPodDiscoveryDelete(t *testing.T) { func TestPodDiscoveryDelete(t *testing.T) {
obj := makePods() obj := makePods()
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}, obj) n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makePods() obj := makePods()
c.CoreV1().Pods(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{}) c.CoreV1().Pods(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{})
w.Pods().Delete(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -260,14 +330,13 @@ func TestPodDiscoveryUpdate(t *testing.T) {
HostIP: "2.3.4.5", HostIP: "2.3.4.5",
}, },
} }
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}, obj) n, c := makeDiscovery(RolePod, NamespaceDiscovery{}, obj)
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makePods() obj := makePods()
c.CoreV1().Pods(obj.Namespace).Create(obj) c.CoreV1().Pods(obj.Namespace).Update(obj)
w.Pods().Modify(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: expectedPodTargetGroups("default"), expectedRes: expectedPodTargetGroups("default"),
@ -275,7 +344,7 @@ func TestPodDiscoveryUpdate(t *testing.T) {
} }
func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) { func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{}) n, c := makeDiscovery(RolePod, NamespaceDiscovery{})
initialPod := makePods() initialPod := makePods()
updatedPod := makePods() updatedPod := makePods()
@ -285,11 +354,9 @@ func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
discovery: n, discovery: n,
beforeRun: func() { beforeRun: func() {
c.CoreV1().Pods(initialPod.Namespace).Create(initialPod) c.CoreV1().Pods(initialPod.Namespace).Create(initialPod)
w.Pods().Add(initialPod)
}, },
afterStart: func() { afterStart: func() {
c.CoreV1().Pods(updatedPod.Namespace).Create(updatedPod) c.CoreV1().Pods(updatedPod.Namespace).Update(updatedPod)
w.Pods().Modify(updatedPod)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -301,7 +368,7 @@ func TestPodDiscoveryUpdateEmptyPodIP(t *testing.T) {
} }
func TestPodDiscoveryNamespaces(t *testing.T) { func TestPodDiscoveryNamespaces(t *testing.T) {
n, c, w := makeDiscovery(RolePod, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) n, c := makeDiscovery(RolePod, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
expected := expectedPodTargetGroups("ns1") expected := expectedPodTargetGroups("ns1")
for k, v := range expectedPodTargetGroups("ns2") { for k, v := range expectedPodTargetGroups("ns2") {
@ -314,7 +381,6 @@ func TestPodDiscoveryNamespaces(t *testing.T) {
pod := makePods() pod := makePods()
pod.Namespace = ns pod.Namespace = ns
c.CoreV1().Pods(pod.Namespace).Create(pod) c.CoreV1().Pods(pod.Namespace).Create(pod)
w.Pods().Add(pod)
} }
}, },
expectedMaxItems: 2, expectedMaxItems: 2,

View file

@ -15,12 +15,12 @@ package kubernetes
import ( import (
"context" "context"
"fmt"
"net" "net"
"strconv" "strconv"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
apiv1 "k8s.io/api/core/v1" apiv1 "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/cache"
@ -43,7 +43,7 @@ func NewService(l log.Logger, inf cache.SharedInformer) *Service {
if l == nil { if l == nil {
l = log.NewNopLogger() l = log.NewNopLogger()
} }
s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("ingress")} s := &Service{logger: l, informer: inf, store: inf.GetStore(), queue: workqueue.NewNamed("service")}
s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ s.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(o interface{}) { AddFunc: func(o interface{}) {
eventCount.WithLabelValues("service", "add").Inc() eventCount.WithLabelValues("service", "add").Inc()
@ -123,7 +123,7 @@ func convertToService(o interface{}) (*apiv1.Service, error) {
if ok { if ok {
return service, nil return service, nil
} }
return nil, fmt.Errorf("Received unexpected object: %v", o) return nil, errors.Errorf("received unexpected object: %v", o)
} }
func serviceSource(s *apiv1.Service) string { func serviceSource(s *apiv1.Service) string {
@ -137,9 +137,13 @@ func serviceSourceFromNamespaceAndName(namespace, name string) string {
const ( const (
serviceNameLabel = metaLabelPrefix + "service_name" serviceNameLabel = metaLabelPrefix + "service_name"
serviceLabelPrefix = metaLabelPrefix + "service_label_" serviceLabelPrefix = metaLabelPrefix + "service_label_"
serviceLabelPresentPrefix = metaLabelPrefix + "service_labelpresent_"
serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_" serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_"
serviceAnnotationPresentPrefix = metaLabelPrefix + "service_annotationpresent_"
servicePortNameLabel = metaLabelPrefix + "service_port_name" servicePortNameLabel = metaLabelPrefix + "service_port_name"
servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol" servicePortProtocolLabel = metaLabelPrefix + "service_port_protocol"
serviceClusterIPLabel = metaLabelPrefix + "service_cluster_ip"
serviceExternalNameLabel = metaLabelPrefix + "service_external_name"
) )
func serviceLabels(svc *apiv1.Service) model.LabelSet { func serviceLabels(svc *apiv1.Service) model.LabelSet {
@ -149,13 +153,15 @@ func serviceLabels(svc *apiv1.Service) model.LabelSet {
ls[namespaceLabel] = lv(svc.Namespace) ls[namespaceLabel] = lv(svc.Namespace)
for k, v := range svc.Labels { for k, v := range svc.Labels {
ln := strutil.SanitizeLabelName(serviceLabelPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(serviceLabelPrefix+ln)] = lv(v)
ls[model.LabelName(serviceLabelPresentPrefix+ln)] = presentValue
} }
for k, v := range svc.Annotations { for k, v := range svc.Annotations {
ln := strutil.SanitizeLabelName(serviceAnnotationPrefix + k) ln := strutil.SanitizeLabelName(k)
ls[model.LabelName(ln)] = lv(v) ls[model.LabelName(serviceAnnotationPrefix+ln)] = lv(v)
ls[model.LabelName(serviceAnnotationPresentPrefix+ln)] = presentValue
} }
return ls return ls
} }
@ -169,11 +175,19 @@ func (s *Service) buildService(svc *apiv1.Service) *targetgroup.Group {
for _, port := range svc.Spec.Ports { for _, port := range svc.Spec.Ports {
addr := net.JoinHostPort(svc.Name+"."+svc.Namespace+".svc", strconv.FormatInt(int64(port.Port), 10)) addr := net.JoinHostPort(svc.Name+"."+svc.Namespace+".svc", strconv.FormatInt(int64(port.Port), 10))
tg.Targets = append(tg.Targets, model.LabelSet{ labelSet := model.LabelSet{
model.AddressLabel: lv(addr), model.AddressLabel: lv(addr),
servicePortNameLabel: lv(port.Name), servicePortNameLabel: lv(port.Name),
servicePortProtocolLabel: lv(string(port.Protocol)), servicePortProtocolLabel: lv(string(port.Protocol)),
}) }
if svc.Spec.Type == apiv1.ServiceTypeExternalName {
labelSet[serviceExternalNameLabel] = lv(svc.Spec.ExternalName)
} else {
labelSet[serviceClusterIPLabel] = lv(svc.Spec.ClusterIP)
}
tg.Targets = append(tg.Targets, labelSet)
} }
return tg return tg

View file

@ -19,7 +19,7 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
@ -28,8 +28,8 @@ func makeMultiPortService() *v1.Service {
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "testservice", Name: "testservice",
Namespace: "default", Namespace: "default",
Labels: map[string]string{"testlabel": "testvalue"}, Labels: map[string]string{"test-label": "testvalue"},
Annotations: map[string]string{"testannotation": "testannotationvalue"}, Annotations: map[string]string{"test-annotation": "testannotationvalue"},
}, },
Spec: v1.ServiceSpec{ Spec: v1.ServiceSpec{
Ports: []v1.ServicePort{ Ports: []v1.ServicePort{
@ -44,6 +44,8 @@ func makeMultiPortService() *v1.Service {
Port: int32(30901), Port: int32(30901),
}, },
}, },
Type: v1.ServiceTypeClusterIP,
ClusterIP: "10.0.0.1",
}, },
} }
} }
@ -62,6 +64,8 @@ func makeSuffixedService(suffix string) *v1.Service {
Port: int32(30900), Port: int32(30900),
}, },
}, },
Type: v1.ServiceTypeClusterIP,
ClusterIP: "10.0.0.1",
}, },
} }
} }
@ -70,23 +74,45 @@ func makeService() *v1.Service {
return makeSuffixedService("") return makeSuffixedService("")
} }
func makeExternalService() *v1.Service {
return &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: "testservice-external",
Namespace: "default",
},
Spec: v1.ServiceSpec{
Ports: []v1.ServicePort{
{
Name: "testport",
Protocol: v1.ProtocolTCP,
Port: int32(31900),
},
},
Type: v1.ServiceTypeExternalName,
ExternalName: "FooExternalName",
},
}
}
func TestServiceDiscoveryAdd(t *testing.T) { func TestServiceDiscoveryAdd(t *testing.T) {
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{}) n, c := makeDiscovery(RoleService, NamespaceDiscovery{})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeService() obj := makeService()
c.CoreV1().Services(obj.Namespace).Create(obj) c.CoreV1().Services(obj.Namespace).Create(obj)
w.Services().Add(obj) obj = makeExternalService()
c.CoreV1().Services(obj.Namespace).Create(obj)
}, },
expectedMaxItems: 1, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
"svc/default/testservice": { "svc/default/testservice": {
Targets: []model.LabelSet{ Targets: []model.LabelSet{
{ {
"__meta_kubernetes_service_port_protocol": "TCP", "__meta_kubernetes_service_port_protocol": "TCP",
"__address__": "testservice.default.svc:30900", "__address__": "testservice.default.svc:30900",
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
"__meta_kubernetes_service_port_name": "testport", "__meta_kubernetes_service_port_name": "testport",
}, },
}, },
@ -96,19 +122,33 @@ func TestServiceDiscoveryAdd(t *testing.T) {
}, },
Source: "svc/default/testservice", Source: "svc/default/testservice",
}, },
"svc/default/testservice-external": {
Targets: []model.LabelSet{
{
"__meta_kubernetes_service_port_protocol": "TCP",
"__address__": "testservice-external.default.svc:31900",
"__meta_kubernetes_service_port_name": "testport",
"__meta_kubernetes_service_external_name": "FooExternalName",
},
},
Labels: model.LabelSet{
"__meta_kubernetes_service_name": "testservice-external",
"__meta_kubernetes_namespace": "default",
},
Source: "svc/default/testservice-external",
},
}, },
}.Run(t) }.Run(t)
} }
func TestServiceDiscoveryDelete(t *testing.T) { func TestServiceDiscoveryDelete(t *testing.T) {
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService()) n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeService() obj := makeService()
c.CoreV1().Services(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{}) c.CoreV1().Services(obj.Namespace).Delete(obj.Name, &metav1.DeleteOptions{})
w.Services().Delete(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -120,14 +160,13 @@ func TestServiceDiscoveryDelete(t *testing.T) {
} }
func TestServiceDiscoveryUpdate(t *testing.T) { func TestServiceDiscoveryUpdate(t *testing.T) {
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService()) n, c := makeDiscovery(RoleService, NamespaceDiscovery{}, makeService())
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
afterStart: func() { afterStart: func() {
obj := makeMultiPortService() obj := makeMultiPortService()
c.CoreV1().Services(obj.Namespace).Update(obj) c.CoreV1().Services(obj.Namespace).Update(obj)
w.Services().Modify(obj)
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
expectedRes: map[string]*targetgroup.Group{ expectedRes: map[string]*targetgroup.Group{
@ -136,19 +175,23 @@ func TestServiceDiscoveryUpdate(t *testing.T) {
{ {
"__meta_kubernetes_service_port_protocol": "TCP", "__meta_kubernetes_service_port_protocol": "TCP",
"__address__": "testservice.default.svc:30900", "__address__": "testservice.default.svc:30900",
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
"__meta_kubernetes_service_port_name": "testport0", "__meta_kubernetes_service_port_name": "testport0",
}, },
{ {
"__meta_kubernetes_service_port_protocol": "UDP", "__meta_kubernetes_service_port_protocol": "UDP",
"__address__": "testservice.default.svc:30901", "__address__": "testservice.default.svc:30901",
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
"__meta_kubernetes_service_port_name": "testport1", "__meta_kubernetes_service_port_name": "testport1",
}, },
}, },
Labels: model.LabelSet{ Labels: model.LabelSet{
"__meta_kubernetes_service_name": "testservice", "__meta_kubernetes_service_name": "testservice",
"__meta_kubernetes_namespace": "default", "__meta_kubernetes_namespace": "default",
"__meta_kubernetes_service_label_testlabel": "testvalue", "__meta_kubernetes_service_label_test_label": "testvalue",
"__meta_kubernetes_service_annotation_testannotation": "testannotationvalue", "__meta_kubernetes_service_labelpresent_test_label": "true",
"__meta_kubernetes_service_annotation_test_annotation": "testannotationvalue",
"__meta_kubernetes_service_annotationpresent_test_annotation": "true",
}, },
Source: "svc/default/testservice", Source: "svc/default/testservice",
}, },
@ -157,7 +200,7 @@ func TestServiceDiscoveryUpdate(t *testing.T) {
} }
func TestServiceDiscoveryNamespaces(t *testing.T) { func TestServiceDiscoveryNamespaces(t *testing.T) {
n, c, w := makeDiscovery(RoleService, NamespaceDiscovery{Names: []string{"ns1", "ns2"}}) n, c := makeDiscovery(RoleService, NamespaceDiscovery{Names: []string{"ns1", "ns2"}})
k8sDiscoveryTest{ k8sDiscoveryTest{
discovery: n, discovery: n,
@ -166,7 +209,6 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
obj := makeService() obj := makeService()
obj.Namespace = ns obj.Namespace = ns
c.CoreV1().Services(obj.Namespace).Create(obj) c.CoreV1().Services(obj.Namespace).Create(obj)
w.Services().Add(obj)
} }
}, },
expectedMaxItems: 2, expectedMaxItems: 2,
@ -176,6 +218,7 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
{ {
"__meta_kubernetes_service_port_protocol": "TCP", "__meta_kubernetes_service_port_protocol": "TCP",
"__address__": "testservice.ns1.svc:30900", "__address__": "testservice.ns1.svc:30900",
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
"__meta_kubernetes_service_port_name": "testport", "__meta_kubernetes_service_port_name": "testport",
}, },
}, },
@ -190,6 +233,7 @@ func TestServiceDiscoveryNamespaces(t *testing.T) {
{ {
"__meta_kubernetes_service_port_protocol": "TCP", "__meta_kubernetes_service_port_protocol": "TCP",
"__address__": "testservice.ns2.svc:30900", "__address__": "testservice.ns2.svc:30900",
"__meta_kubernetes_service_cluster_ip": "10.0.0.1",
"__meta_kubernetes_service_port_name": "testport", "__meta_kubernetes_service_port_name": "testport",
}, },
}, },

View file

@ -41,11 +41,12 @@ import (
) )
var ( var (
failedConfigs = prometheus.NewCounter( failedConfigs = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Name: "prometheus_sd_configs_failed_total", Name: "prometheus_sd_configs_failed_total",
Help: "Total number of service discovery configurations that failed to load.", Help: "Total number of service discovery configurations that failed to load.",
}, },
[]string{"name"},
) )
discoveredTargets = prometheus.NewGaugeVec( discoveredTargets = prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
@ -54,23 +55,26 @@ var (
}, },
[]string{"name", "config"}, []string{"name", "config"},
) )
receivedUpdates = prometheus.NewCounter( receivedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Name: "prometheus_sd_received_updates_total", Name: "prometheus_sd_received_updates_total",
Help: "Total number of update events received from the SD providers.", Help: "Total number of update events received from the SD providers.",
}, },
[]string{"name"},
) )
delayedUpdates = prometheus.NewCounter( delayedUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Name: "prometheus_sd_updates_delayed_total", Name: "prometheus_sd_updates_delayed_total",
Help: "Total number of update events that couldn't be sent immediately.", Help: "Total number of update events that couldn't be sent immediately.",
}, },
[]string{"name"},
) )
sentUpdates = prometheus.NewCounter( sentUpdates = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Name: "prometheus_sd_updates_total", Name: "prometheus_sd_updates_total",
Help: "Total number of update events sent to the SD consumers.", Help: "Total number of update events sent to the SD consumers.",
}, },
[]string{"name"},
) )
) )
@ -226,7 +230,7 @@ func (m *Manager) updater(ctx context.Context, p *provider, updates chan []*targ
case <-ctx.Done(): case <-ctx.Done():
return return
case tgs, ok := <-updates: case tgs, ok := <-updates:
receivedUpdates.Inc() receivedUpdates.WithLabelValues(m.name).Inc()
if !ok { if !ok {
level.Debug(m.logger).Log("msg", "discoverer channel closed", "provider", p.name) level.Debug(m.logger).Log("msg", "discoverer channel closed", "provider", p.name)
return return
@ -255,11 +259,11 @@ func (m *Manager) sender() {
case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker. case <-ticker.C: // Some discoverers send updates too often so we throttle these with the ticker.
select { select {
case <-m.triggerSend: case <-m.triggerSend:
sentUpdates.Inc() sentUpdates.WithLabelValues(m.name).Inc()
select { select {
case m.syncCh <- m.allGroups(): case m.syncCh <- m.allGroups():
default: default:
delayedUpdates.Inc() delayedUpdates.WithLabelValues(m.name).Inc()
level.Debug(m.logger).Log("msg", "discovery receiver's channel was full so will retry the next cycle") level.Debug(m.logger).Log("msg", "discovery receiver's channel was full so will retry the next cycle")
select { select {
case m.triggerSend <- struct{}{}: case m.triggerSend <- struct{}{}:
@ -314,11 +318,13 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
} }
func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) { func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setName string) {
var added bool
add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) { add := func(cfg interface{}, newDiscoverer func() (Discoverer, error)) {
t := reflect.TypeOf(cfg).String() t := reflect.TypeOf(cfg).String()
for _, p := range m.providers { for _, p := range m.providers {
if reflect.DeepEqual(cfg, p.config) { if reflect.DeepEqual(cfg, p.config) {
p.subs = append(p.subs, setName) p.subs = append(p.subs, setName)
added = true
return return
} }
} }
@ -326,7 +332,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
d, err := newDiscoverer() d, err := newDiscoverer()
if err != nil { if err != nil {
level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t) level.Error(m.logger).Log("msg", "Cannot create service discovery", "err", err, "type", t)
failedConfigs.Inc() failedConfigs.WithLabelValues(m.name).Inc()
return return
} }
@ -337,6 +343,7 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
subs: []string{setName}, subs: []string{setName},
} }
m.providers = append(m.providers, &provider) m.providers = append(m.providers, &provider)
added = true
} }
for _, c := range cfg.DNSSDConfigs { for _, c := range cfg.DNSSDConfigs {
@ -401,7 +408,17 @@ func (m *Manager) registerProviders(cfg sd_config.ServiceDiscoveryConfig, setNam
} }
if len(cfg.StaticConfigs) > 0 { if len(cfg.StaticConfigs) > 0 {
add(setName, func() (Discoverer, error) { add(setName, func() (Discoverer, error) {
return &StaticProvider{cfg.StaticConfigs}, nil return &StaticProvider{TargetGroups: cfg.StaticConfigs}, nil
})
}
if !added {
// Add an empty target group to force the refresh of the corresponding
// scrape pool and to notify the receiver that this target set has no
// current targets.
// It can happen because the combined set of SD configurations is empty
// or because we fail to instantiate all the SD configurations.
add(setName, func() (Discoverer, error) {
return &StaticProvider{TargetGroups: []*targetgroup.Group{{}}}, nil
}) })
} }
} }

View file

@ -51,7 +51,7 @@ func TestTargetUpdatesOrder(t *testing.T) {
expectedTargets: nil, expectedTargets: nil,
}, },
{ {
title: "Multips TPs no updates", title: "Multiple TPs no updates",
updates: map[string][]update{ updates: map[string][]update{
"tp1": {}, "tp1": {},
"tp2": {}, "tp2": {},
@ -719,6 +719,7 @@ func assertEqualGroups(t *testing.T, got, expected []*targetgroup.Group, msg fun
} }
func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) { func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Group, poolKey poolKey, label string, present bool) {
t.Helper()
if _, ok := tSets[poolKey]; !ok { if _, ok := tSets[poolKey]; !ok {
t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets) t.Fatalf("'%s' should be present in Pool keys: %v", poolKey, tSets)
return return
@ -741,7 +742,7 @@ func verifyPresence(t *testing.T, tSets map[poolKey]map[string]*targetgroup.Grou
if !present { if !present {
msg = "not" msg = "not"
} }
t.Fatalf("'%s' should %s be present in Targets labels: %v", label, msg, mergedTargets) t.Fatalf("%q should %s be present in Targets labels: %q", label, msg, mergedTargets)
} }
} }
@ -781,7 +782,7 @@ scrape_configs:
- targets: ["foo:9090"] - targets: ["foo:9090"]
` `
if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil { if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil {
t.Fatalf("Unable to load YAML config sOne: %s", err) t.Fatalf("Unable to load YAML config sTwo: %s", err)
} }
c = make(map[string]sd_config.ServiceDiscoveryConfig) c = make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfg.ScrapeConfigs { for _, v := range cfg.ScrapeConfigs {
@ -794,6 +795,67 @@ scrape_configs:
verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"bar:9090\"}", false) verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"bar:9090\"}", false)
} }
// TestTargetSetRecreatesEmptyStaticConfigs ensures that reloading a config file after
// removing all targets from the static_configs sends an update with empty targetGroups.
// This is required to signal the receiver that this target set has no current targets.
func TestTargetSetRecreatesEmptyStaticConfigs(t *testing.T) {
cfg := &config.Config{}
sOne := `
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ["foo:9090"]
`
if err := yaml.UnmarshalStrict([]byte(sOne), cfg); err != nil {
t.Fatalf("Unable to load YAML config sOne: %s", err)
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
discoveryManager := NewManager(ctx, log.NewNopLogger())
discoveryManager.updatert = 100 * time.Millisecond
go discoveryManager.Run()
c := make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfg.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
verifyPresence(t, discoveryManager.targets, poolKey{setName: "prometheus", provider: "string/0"}, "{__address__=\"foo:9090\"}", true)
sTwo := `
scrape_configs:
- job_name: 'prometheus'
static_configs:
`
if err := yaml.UnmarshalStrict([]byte(sTwo), cfg); err != nil {
t.Fatalf("Unable to load YAML config sTwo: %s", err)
}
c = make(map[string]sd_config.ServiceDiscoveryConfig)
for _, v := range cfg.ScrapeConfigs {
c[v.JobName] = v.ServiceDiscoveryConfig
}
discoveryManager.ApplyConfig(c)
<-discoveryManager.SyncCh()
pkey := poolKey{setName: "prometheus", provider: "string/0"}
targetGroups, ok := discoveryManager.targets[pkey]
if !ok {
t.Fatalf("'%v' should be present in target groups", pkey)
}
group, ok := targetGroups[""]
if !ok {
t.Fatalf("missing '' key in target groups %v", targetGroups)
}
if len(group.Targets) != 0 {
t.Fatalf("Invalid number of targets: expected 0, got %d", len(group.Targets))
}
}
func TestIdenticalConfigurationsAreCoalesced(t *testing.T) { func TestIdenticalConfigurationsAreCoalesced(t *testing.T) {
tmpFile, err := ioutil.TempFile("", "sd") tmpFile, err := ioutil.TempFile("", "sd")
if err != nil { if err != nil {
@ -926,7 +988,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
expected: []expect{ expected: []expect{
{ {
tgs: map[string][]*targetgroup.Group{ tgs: map[string][]*targetgroup.Group{
"once1": []*targetgroup.Group{ "once1": {
{ {
Source: "tg1", Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}}, Targets: []model.LabelSet{{"__instance__": "1"}},
@ -936,13 +998,13 @@ func TestCoordinationWithReceiver(t *testing.T) {
}, },
{ {
tgs: map[string][]*targetgroup.Group{ tgs: map[string][]*targetgroup.Group{
"once1": []*targetgroup.Group{ "once1": {
{ {
Source: "tg1", Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}}, Targets: []model.LabelSet{{"__instance__": "1"}},
}, },
}, },
"mock1": []*targetgroup.Group{ "mock1": {
{ {
Source: "tg2", Source: "tg2",
Targets: []model.LabelSet{{"__instance__": "2"}}, Targets: []model.LabelSet{{"__instance__": "2"}},
@ -979,7 +1041,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
{ {
delay: 2 * updateDelay, delay: 2 * updateDelay,
tgs: map[string][]*targetgroup.Group{ tgs: map[string][]*targetgroup.Group{
"mock1": []*targetgroup.Group{ "mock1": {
{ {
Source: "tg1", Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}}, Targets: []model.LabelSet{{"__instance__": "1"}},
@ -990,7 +1052,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
{ {
delay: 4 * updateDelay, delay: 4 * updateDelay,
tgs: map[string][]*targetgroup.Group{ tgs: map[string][]*targetgroup.Group{
"mock1": []*targetgroup.Group{ "mock1": {
{ {
Source: "tg1", Source: "tg1",
Targets: []model.LabelSet{{"__instance__": "1"}}, Targets: []model.LabelSet{{"__instance__": "1"}},

View file

@ -17,6 +17,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"math/rand" "math/rand"
"net" "net"
@ -26,10 +27,11 @@ import (
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/strutil"
) )
@ -54,29 +56,12 @@ const (
portMappingLabelPrefix = metaLabelPrefix + "port_mapping_label_" portMappingLabelPrefix = metaLabelPrefix + "port_mapping_label_"
// portDefinitionLabelPrefix is the prefix for the application portDefinitions labels. // portDefinitionLabelPrefix is the prefix for the application portDefinitions labels.
portDefinitionLabelPrefix = metaLabelPrefix + "port_definition_label_" portDefinitionLabelPrefix = metaLabelPrefix + "port_definition_label_"
// Constants for instrumentation.
namespace = "prometheus"
) )
var ( // DefaultSDConfig is the default Marathon SD configuration.
refreshFailuresCount = prometheus.NewCounter( var DefaultSDConfig = SDConfig{
prometheus.CounterOpts{
Namespace: namespace,
Name: "sd_marathon_refresh_failures_total",
Help: "The number of Marathon-SD refresh failures.",
})
refreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "sd_marathon_refresh_duration_seconds",
Help: "The duration of a Marathon-SD refresh in seconds.",
})
// DefaultSDConfig is the default Marathon SD configuration.
DefaultSDConfig = SDConfig{
RefreshInterval: model.Duration(30 * time.Second), RefreshInterval: model.Duration(30 * time.Second),
} }
)
// SDConfig is the configuration for services running on Marathon. // SDConfig is the configuration for services running on Marathon.
type SDConfig struct { type SDConfig struct {
@ -96,43 +81,33 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if len(c.Servers) == 0 { if len(c.Servers) == 0 {
return fmt.Errorf("marathon_sd: must contain at least one Marathon server") return errors.New("marathon_sd: must contain at least one Marathon server")
} }
if len(c.AuthToken) > 0 && len(c.AuthTokenFile) > 0 { if len(c.AuthToken) > 0 && len(c.AuthTokenFile) > 0 {
return fmt.Errorf("marathon_sd: at most one of auth_token & auth_token_file must be configured") return errors.New("marathon_sd: at most one of auth_token & auth_token_file must be configured")
} }
if c.HTTPClientConfig.BasicAuth != nil && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) { if c.HTTPClientConfig.BasicAuth != nil && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) {
return fmt.Errorf("marathon_sd: at most one of basic_auth, auth_token & auth_token_file must be configured") return errors.New("marathon_sd: at most one of basic_auth, auth_token & auth_token_file must be configured")
} }
if (len(c.HTTPClientConfig.BearerToken) > 0 || len(c.HTTPClientConfig.BearerTokenFile) > 0) && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) { if (len(c.HTTPClientConfig.BearerToken) > 0 || len(c.HTTPClientConfig.BearerTokenFile) > 0) && (len(c.AuthToken) > 0 || len(c.AuthTokenFile) > 0) {
return fmt.Errorf("marathon_sd: at most one of bearer_token, bearer_token_file, auth_token & auth_token_file must be configured") return errors.New("marathon_sd: at most one of bearer_token, bearer_token_file, auth_token & auth_token_file must be configured")
} }
return c.HTTPClientConfig.Validate() return c.HTTPClientConfig.Validate()
} }
func init() {
prometheus.MustRegister(refreshFailuresCount)
prometheus.MustRegister(refreshDuration)
}
const appListPath string = "/v2/apps/?embed=apps.tasks" const appListPath string = "/v2/apps/?embed=apps.tasks"
// Discovery provides service discovery based on a Marathon instance. // Discovery provides service discovery based on a Marathon instance.
type Discovery struct { type Discovery struct {
*refresh.Discovery
client *http.Client client *http.Client
servers []string servers []string
refreshInterval time.Duration
lastRefresh map[string]*targetgroup.Group lastRefresh map[string]*targetgroup.Group
appsClient AppListClient appsClient appListClient
logger log.Logger
} }
// NewDiscovery returns a new Marathon Discovery. // NewDiscovery returns a new Marathon Discovery.
func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) { func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd") rt, err := config_util.NewRoundTripperFromConfig(conf.HTTPClientConfig, "marathon_sd")
if err != nil { if err != nil {
return nil, err return nil, err
@ -147,13 +122,18 @@ func NewDiscovery(conf SDConfig, logger log.Logger) (*Discovery, error) {
return nil, err return nil, err
} }
return &Discovery{ d := &Discovery{
client: &http.Client{Transport: rt}, client: &http.Client{Transport: rt},
servers: conf.Servers, servers: conf.Servers,
refreshInterval: time.Duration(conf.RefreshInterval),
appsClient: fetchApps, appsClient: fetchApps,
logger: logger, }
}, nil d.Discovery = refresh.NewDiscovery(
logger,
"marathon",
time.Duration(conf.RefreshInterval),
d.refresh,
)
return d, nil
} }
type authTokenRoundTripper struct { type authTokenRoundTripper struct {
@ -185,7 +165,7 @@ func newAuthTokenFileRoundTripper(tokenFile string, rt http.RoundTripper) (http.
// fail-fast if we can't read the file. // fail-fast if we can't read the file.
_, err := ioutil.ReadFile(tokenFile) _, err := ioutil.ReadFile(tokenFile)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to read auth token file %s: %s", tokenFile, err) return nil, errors.Wrapf(err, "unable to read auth token file %s", tokenFile)
} }
return &authTokenFileRoundTripper{tokenFile, rt}, nil return &authTokenFileRoundTripper{tokenFile, rt}, nil
} }
@ -193,7 +173,7 @@ func newAuthTokenFileRoundTripper(tokenFile string, rt http.RoundTripper) (http.
func (rt *authTokenFileRoundTripper) RoundTrip(request *http.Request) (*http.Response, error) { func (rt *authTokenFileRoundTripper) RoundTrip(request *http.Request) (*http.Response, error) {
b, err := ioutil.ReadFile(rt.authTokenFile) b, err := ioutil.ReadFile(rt.authTokenFile)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to read auth token file %s: %s", rt.authTokenFile, err) return nil, errors.Wrapf(err, "unable to read auth token file %s", rt.authTokenFile)
} }
authToken := strings.TrimSpace(string(b)) authToken := strings.TrimSpace(string(b))
@ -204,33 +184,10 @@ func (rt *authTokenFileRoundTripper) RoundTrip(request *http.Request) (*http.Res
return rt.rt.RoundTrip(request) return rt.rt.RoundTrip(request)
} }
// Run implements the Discoverer interface. func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { targetMap, err := d.fetchTargetGroups(ctx)
for {
select {
case <-ctx.Done():
return
case <-time.After(d.refreshInterval):
err := d.updateServices(ctx, ch)
if err != nil { if err != nil {
level.Error(d.logger).Log("msg", "Error while updating services", "err", err) return nil, err
}
}
}
}
func (d *Discovery) updateServices(ctx context.Context, ch chan<- []*targetgroup.Group) (err error) {
t0 := time.Now()
defer func() {
refreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
refreshFailuresCount.Inc()
}
}()
targetMap, err := d.fetchTargetGroups()
if err != nil {
return err
} }
all := make([]*targetgroup.Group, 0, len(targetMap)) all := make([]*targetgroup.Group, 0, len(targetMap))
@ -240,154 +197,143 @@ func (d *Discovery) updateServices(ctx context.Context, ch chan<- []*targetgroup
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return nil, ctx.Err()
case ch <- all: default:
} }
// Remove services which did disappear. // Remove services which did disappear.
for source := range d.lastRefresh { for source := range d.lastRefresh {
_, ok := targetMap[source] _, ok := targetMap[source]
if !ok { if !ok {
select { all = append(all, &targetgroup.Group{Source: source})
case <-ctx.Done():
return ctx.Err()
case ch <- []*targetgroup.Group{{Source: source}}:
level.Debug(d.logger).Log("msg", "Removing group", "source", source)
}
} }
} }
d.lastRefresh = targetMap d.lastRefresh = targetMap
return nil return all, nil
} }
func (d *Discovery) fetchTargetGroups() (map[string]*targetgroup.Group, error) { func (d *Discovery) fetchTargetGroups(ctx context.Context) (map[string]*targetgroup.Group, error) {
url := RandomAppsURL(d.servers) url := randomAppsURL(d.servers)
apps, err := d.appsClient(d.client, url) apps, err := d.appsClient(ctx, d.client, url)
if err != nil { if err != nil {
return nil, err return nil, err
} }
groups := AppsToTargetGroups(apps) groups := appsToTargetGroups(apps)
return groups, nil return groups, nil
} }
// Task describes one instance of a service running on Marathon. // task describes one instance of a service running on Marathon.
type Task struct { type task struct {
ID string `json:"id"` ID string `json:"id"`
Host string `json:"host"` Host string `json:"host"`
Ports []uint32 `json:"ports"` Ports []uint32 `json:"ports"`
IPAddresses []IPAddress `json:"ipAddresses"` IPAddresses []ipAddress `json:"ipAddresses"`
} }
// IPAddress describes the address and protocol the container's network interface is bound to. // ipAddress describes the address and protocol the container's network interface is bound to.
type IPAddress struct { type ipAddress struct {
Address string `json:"ipAddress"` Address string `json:"ipAddress"`
Proto string `json:"protocol"` Proto string `json:"protocol"`
} }
// PortMapping describes in which port the process are binding inside the docker container. // PortMapping describes in which port the process are binding inside the docker container.
type PortMapping struct { type portMapping struct {
Labels map[string]string `json:"labels"` Labels map[string]string `json:"labels"`
ContainerPort uint32 `json:"containerPort"` ContainerPort uint32 `json:"containerPort"`
HostPort uint32 `json:"hostPort"`
ServicePort uint32 `json:"servicePort"` ServicePort uint32 `json:"servicePort"`
} }
// DockerContainer describes a container which uses the docker runtime. // DockerContainer describes a container which uses the docker runtime.
type DockerContainer struct { type dockerContainer struct {
Image string `json:"image"` Image string `json:"image"`
PortMappings []PortMapping `json:"portMappings"` PortMappings []portMapping `json:"portMappings"`
} }
// Container describes the runtime an app in running in. // Container describes the runtime an app in running in.
type Container struct { type container struct {
Docker DockerContainer `json:"docker"` Docker dockerContainer `json:"docker"`
PortMappings []PortMapping `json:"portMappings"` PortMappings []portMapping `json:"portMappings"`
} }
// PortDefinition describes which load balancer port you should access to access the service. // PortDefinition describes which load balancer port you should access to access the service.
type PortDefinition struct { type portDefinition struct {
Labels map[string]string `json:"labels"` Labels map[string]string `json:"labels"`
Port uint32 `json:"port"` Port uint32 `json:"port"`
} }
// Network describes the name and type of network the container is attached to. // Network describes the name and type of network the container is attached to.
type Network struct { type network struct {
Name string `json:"name"` Name string `json:"name"`
Mode string `json:"mode"` Mode string `json:"mode"`
} }
// App describes a service running on Marathon. // App describes a service running on Marathon.
type App struct { type app struct {
ID string `json:"id"` ID string `json:"id"`
Tasks []Task `json:"tasks"` Tasks []task `json:"tasks"`
RunningTasks int `json:"tasksRunning"` RunningTasks int `json:"tasksRunning"`
Labels map[string]string `json:"labels"` Labels map[string]string `json:"labels"`
Container Container `json:"container"` Container container `json:"container"`
PortDefinitions []PortDefinition `json:"portDefinitions"` PortDefinitions []portDefinition `json:"portDefinitions"`
Networks []Network `json:"networks"` Networks []network `json:"networks"`
RequirePorts bool `json:"requirePorts"`
} }
// isContainerNet checks if the app's first network is set to mode 'container'. // isContainerNet checks if the app's first network is set to mode 'container'.
func (app App) isContainerNet() bool { func (app app) isContainerNet() bool {
return len(app.Networks) > 0 && app.Networks[0].Mode == "container" return len(app.Networks) > 0 && app.Networks[0].Mode == "container"
} }
// AppList is a list of Marathon apps. // appList is a list of Marathon apps.
type AppList struct { type appList struct {
Apps []App `json:"apps"` Apps []app `json:"apps"`
} }
// AppListClient defines a function that can be used to get an application list from marathon. // appListClient defines a function that can be used to get an application list from marathon.
type AppListClient func(client *http.Client, url string) (*AppList, error) type appListClient func(ctx context.Context, client *http.Client, url string) (*appList, error)
// fetchApps requests a list of applications from a marathon server. // fetchApps requests a list of applications from a marathon server.
func fetchApps(client *http.Client, url string) (*AppList, error) { func fetchApps(ctx context.Context, client *http.Client, url string) (*appList, error) {
request, err := http.NewRequest("GET", url, nil) request, err := http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
request = request.WithContext(ctx)
resp, err := client.Do(request) resp, err := client.Do(request)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer func() {
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
}()
if (resp.StatusCode < 200) || (resp.StatusCode >= 300) { if (resp.StatusCode < 200) || (resp.StatusCode >= 300) {
return nil, fmt.Errorf("Non 2xx status '%v' response during marathon service discovery", resp.StatusCode) return nil, errors.Errorf("non 2xx status '%v' response during marathon service discovery", resp.StatusCode)
} }
body, err := ioutil.ReadAll(resp.Body) var apps appList
err = json.NewDecoder(resp.Body).Decode(&apps)
if err != nil { if err != nil {
return nil, err return nil, errors.Wrapf(err, "%q", url)
} }
return &apps, nil
apps, err := parseAppJSON(body)
if err != nil {
return nil, fmt.Errorf("%v in %s", err, url)
}
return apps, nil
} }
func parseAppJSON(body []byte) (*AppList, error) { // randomAppsURL randomly selects a server from an array and creates
apps := &AppList{}
err := json.Unmarshal(body, apps)
if err != nil {
return nil, err
}
return apps, nil
}
// RandomAppsURL randomly selects a server from an array and creates
// an URL pointing to the app list. // an URL pointing to the app list.
func RandomAppsURL(servers []string) string { func randomAppsURL(servers []string) string {
// TODO: If possible update server list from Marathon at some point. // TODO: If possible update server list from Marathon at some point.
server := servers[rand.Intn(len(servers))] server := servers[rand.Intn(len(servers))]
return fmt.Sprintf("%s%s", server, appListPath) return fmt.Sprintf("%s%s", server, appListPath)
} }
// AppsToTargetGroups takes an array of Marathon apps and converts them into target groups. // appsToTargetGroups takes an array of Marathon apps and converts them into target groups.
func AppsToTargetGroups(apps *AppList) map[string]*targetgroup.Group { func appsToTargetGroups(apps *appList) map[string]*targetgroup.Group {
tgroups := map[string]*targetgroup.Group{} tgroups := map[string]*targetgroup.Group{}
for _, a := range apps.Apps { for _, a := range apps.Apps {
group := createTargetGroup(&a) group := createTargetGroup(&a)
@ -396,7 +342,7 @@ func AppsToTargetGroups(apps *AppList) map[string]*targetgroup.Group {
return tgroups return tgroups
} }
func createTargetGroup(app *App) *targetgroup.Group { func createTargetGroup(app *app) *targetgroup.Group {
var ( var (
targets = targetsForApp(app) targets = targetsForApp(app)
appName = model.LabelValue(app.ID) appName = model.LabelValue(app.ID)
@ -419,7 +365,7 @@ func createTargetGroup(app *App) *targetgroup.Group {
return tg return tg
} }
func targetsForApp(app *App) []model.LabelSet { func targetsForApp(app *app) []model.LabelSet {
targets := make([]model.LabelSet, 0, len(app.Tasks)) targets := make([]model.LabelSet, 0, len(app.Tasks))
var ports []uint32 var ports []uint32
@ -446,8 +392,12 @@ func targetsForApp(app *App) []model.LabelSet {
for i := 0; i < len(app.PortDefinitions); i++ { for i := 0; i < len(app.PortDefinitions); i++ {
labels[i] = app.PortDefinitions[i].Labels labels[i] = app.PortDefinitions[i].Labels
// When requirePorts is false, this port becomes the 'servicePort', not the listen port.
// In this case, the port needs to be taken from the task instead of the app.
if app.RequirePorts {
ports[i] = app.PortDefinitions[i].Port ports[i] = app.PortDefinitions[i].Port
} }
}
prefix = portDefinitionLabelPrefix prefix = portDefinitionLabelPrefix
} }
@ -467,6 +417,13 @@ func targetsForApp(app *App) []model.LabelSet {
// Iterate over the ports we gathered using one of the methods above. // Iterate over the ports we gathered using one of the methods above.
for i, port := range ports { for i, port := range ports {
// A zero port here means that either the portMapping has a zero port defined,
// or there is a portDefinition with requirePorts set to false. This means the port
// is auto-generated by Mesos and needs to be looked up in the task.
if port == 0 && len(t.Ports) == len(ports) {
port = t.Ports[i]
}
// Each port represents a possible Prometheus target. // Each port represents a possible Prometheus target.
targetAddress := targetEndpoint(&t, port, app.isContainerNet()) targetAddress := targetEndpoint(&t, port, app.isContainerNet())
target := model.LabelSet{ target := model.LabelSet{
@ -492,7 +449,7 @@ func targetsForApp(app *App) []model.LabelSet {
} }
// Generate a target endpoint string in host:port format. // Generate a target endpoint string in host:port format.
func targetEndpoint(task *Task, port uint32, containerNet bool) string { func targetEndpoint(task *task, port uint32, containerNet bool) string {
var host string var host string
@ -507,7 +464,7 @@ func targetEndpoint(task *Task, port uint32, containerNet bool) string {
} }
// Get a list of ports and a list of labels from a PortMapping. // Get a list of ports and a list of labels from a PortMapping.
func extractPortMapping(portMappings []PortMapping, containerNet bool) ([]uint32, []map[string]string) { func extractPortMapping(portMappings []portMapping, containerNet bool) ([]uint32, []map[string]string) {
ports := make([]uint32, len(portMappings)) ports := make([]uint32, len(portMappings))
labels := make([]map[string]string, len(portMappings)) labels := make([]map[string]string, len(portMappings))
@ -520,8 +477,10 @@ func extractPortMapping(portMappings []PortMapping, containerNet bool) ([]uint32
// If the app is in a container network, connect directly to the container port. // If the app is in a container network, connect directly to the container port.
ports[i] = portMappings[i].ContainerPort ports[i] = portMappings[i].ContainerPort
} else { } else {
// Otherwise, connect to the randomly-generated service port. // Otherwise, connect to the allocated host port for the container.
ports[i] = portMappings[i].ServicePort // Note that this host port is likely set to 0 in the app definition, which means it is
// automatically generated and needs to be extracted from the task's 'ports' array at a later stage.
ports[i] = portMappings[i].HostPort
} }
} }

View file

@ -20,7 +20,6 @@ import (
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"testing" "testing"
"time"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
@ -32,86 +31,86 @@ var (
conf = SDConfig{Servers: testServers} conf = SDConfig{Servers: testServers}
) )
func testUpdateServices(client AppListClient, ch chan []*targetgroup.Group) error { func testUpdateServices(client appListClient) ([]*targetgroup.Group, error) {
md, err := NewDiscovery(conf, nil) md, err := NewDiscovery(conf, nil)
if err != nil { if err != nil {
return err return nil, err
} }
if client != nil {
md.appsClient = client md.appsClient = client
return md.updateServices(context.Background(), ch) }
return md.refresh(context.Background())
} }
func TestMarathonSDHandleError(t *testing.T) { func TestMarathonSDHandleError(t *testing.T) {
var ( var (
errTesting = errors.New("testing failure") errTesting = errors.New("testing failure")
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) { return nil, errTesting } return nil, errTesting
}
) )
if err := testUpdateServices(client, ch); err != errTesting { tgs, err := testUpdateServices(client)
if err != errTesting {
t.Fatalf("Expected error: %s", err) t.Fatalf("Expected error: %s", err)
} }
select { if len(tgs) != 0 {
case tg := <-ch: t.Fatalf("Got group: %s", tgs)
t.Fatalf("Got group: %s", tg)
default:
} }
} }
func TestMarathonSDEmptyList(t *testing.T) { func TestMarathonSDEmptyList(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) { return &appList{}, nil }
client = func(client *http.Client, url string) (*AppList, error) { return &AppList{}, nil }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) > 0 {
case tg := <-ch: t.Fatalf("Got group: %v", tgs)
if len(tg) > 0 {
t.Fatalf("Got group: %v", tg)
}
default:
} }
} }
func marathonTestAppList(labels map[string]string, runningTasks int) *AppList { func marathonTestAppList(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
} }
docker = DockerContainer{ docker = dockerContainer{
Image: "repo/image:tag", Image: "repo/image:tag",
} }
portMappings = []PortMapping{ portMappings = []portMapping{
{Labels: labels, ServicePort: 31000}, {Labels: labels, HostPort: 31000},
} }
container = Container{Docker: docker, PortMappings: portMappings} container = container{Docker: docker, PortMappings: portMappings}
app = App{ a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroup(t *testing.T) { func TestMarathonSDSendGroup(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppList(marathonValidLabel, 1), nil return marathonTestAppList(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -127,116 +126,86 @@ func TestMarathonSDSendGroup(t *testing.T) {
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" {
t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func TestMarathonSDRemoveApp(t *testing.T) { func TestMarathonSDRemoveApp(t *testing.T) {
var ch = make(chan []*targetgroup.Group, 1)
md, err := NewDiscovery(conf, nil) md, err := NewDiscovery(conf, nil)
if err != nil { if err != nil {
t.Fatalf("%s", err) t.Fatalf("%s", err)
} }
md.appsClient = func(client *http.Client, url string) (*AppList, error) { md.appsClient = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
return marathonTestAppList(marathonValidLabel, 1), nil return marathonTestAppList(marathonValidLabel, 1), nil
} }
if err := md.updateServices(context.Background(), ch); err != nil { tgs, err := md.refresh(context.Background())
if err != nil {
t.Fatalf("Got error on first update: %s", err) t.Fatalf("Got error on first update: %s", err)
} }
up1 := (<-ch)[0] if len(tgs) != 1 {
t.Fatal("Expected 1 targetgroup, got", len(tgs))
}
tg1 := tgs[0]
md.appsClient = func(client *http.Client, url string) (*AppList, error) { md.appsClient = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
return marathonTestAppList(marathonValidLabel, 0), nil return marathonTestAppList(marathonValidLabel, 0), nil
} }
if err := md.updateServices(context.Background(), ch); err != nil { tgs, err = md.refresh(context.Background())
if err != nil {
t.Fatalf("Got error on second update: %s", err) t.Fatalf("Got error on second update: %s", err)
} }
up2 := (<-ch)[0] if len(tgs) != 1 {
t.Fatal("Expected 1 targetgroup, got", len(tgs))
}
tg2 := tgs[0]
if up2.Source != up1.Source { if tg2.Source != tg1.Source {
t.Fatalf("Source is different: %s", up2) t.Fatalf("Source is different: %s != %s", tg1.Source, tg2.Source)
if len(up2.Targets) > 0 { if len(tg2.Targets) > 0 {
t.Fatalf("Got a non-empty target set: %s", up2.Targets) t.Fatalf("Got a non-empty target set: %s", tg2.Targets)
} }
} }
} }
func TestMarathonSDRunAndStop(t *testing.T) { func marathonTestAppListWithMultiplePorts(labels map[string]string, runningTasks int) *appList {
var ( var (
refreshInterval = model.Duration(time.Millisecond * 10) t = task{
conf = SDConfig{Servers: testServers, RefreshInterval: refreshInterval}
ch = make(chan []*targetgroup.Group)
doneCh = make(chan error)
)
md, err := NewDiscovery(conf, nil)
if err != nil {
t.Fatalf("%s", err)
}
md.appsClient = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppList(marathonValidLabel, 1), nil
}
ctx, cancel := context.WithCancel(context.Background())
go func() {
md.Run(ctx, ch)
close(doneCh)
}()
timeout := time.After(md.refreshInterval * 3)
for {
select {
case <-ch:
cancel()
case <-doneCh:
cancel()
return
case <-timeout:
t.Fatalf("Update took too long.")
}
}
}
func marathonTestAppListWithMultiplePorts(labels map[string]string, runningTasks int) *AppList {
var (
task = Task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
} }
docker = DockerContainer{ docker = dockerContainer{
Image: "repo/image:tag", Image: "repo/image:tag",
} }
portMappings = []PortMapping{ portMappings = []portMapping{
{Labels: labels, ServicePort: 31000}, {Labels: labels, HostPort: 31000},
{Labels: make(map[string]string), ServicePort: 32000}, {Labels: make(map[string]string), HostPort: 32000},
} }
container = Container{Docker: docker, PortMappings: portMappings} container = container{Docker: docker, PortMappings: portMappings}
app = App{ a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroupWithMultiplePort(t *testing.T) { func TestMarathonSDSendGroupWithMultiplePort(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithMultiplePorts(marathonValidLabel, 1), nil return marathonTestAppListWithMultiplePorts(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -259,45 +228,43 @@ func TestMarathonSDSendGroupWithMultiplePort(t *testing.T) {
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func marathonTestZeroTaskPortAppList(labels map[string]string, runningTasks int) *AppList { func marathonTestZeroTaskPortAppList(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-2", ID: "test-task-2",
Host: "mesos-slave-2", Host: "mesos-slave-2",
Ports: []uint32{}, Ports: []uint32{},
} }
docker = DockerContainer{Image: "repo/image:tag"} docker = dockerContainer{Image: "repo/image:tag"}
container = Container{Docker: docker} container = container{Docker: docker}
app = App{ a = app{
ID: "test-service-zero-ports", ID: "test-service-zero-ports",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonZeroTaskPorts(t *testing.T) { func TestMarathonZeroTaskPorts(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestZeroTaskPortAppList(marathonValidLabel, 1), nil return marathonTestZeroTaskPortAppList(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service-zero-ports" { if tg.Source != "test-service-zero-ports" {
@ -306,16 +273,9 @@ func TestMarathonZeroTaskPorts(t *testing.T) {
if len(tg.Targets) != 0 { if len(tg.Targets) != 0 {
t.Fatalf("Wrong number of targets: %v", tg.Targets) t.Fatalf("Wrong number of targets: %v", tg.Targets)
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func Test500ErrorHttpResponseWithValidJSONBody(t *testing.T) { func Test500ErrorHttpResponseWithValidJSONBody(t *testing.T) {
var (
ch = make(chan []*targetgroup.Group, 1)
client = fetchApps
)
// Simulate 500 error with a valid JSON response. // Simulate 500 error with a valid JSON response.
respHandler := func(w http.ResponseWriter, r *http.Request) { respHandler := func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError) w.WriteHeader(http.StatusInternalServerError)
@ -332,51 +292,128 @@ func Test500ErrorHttpResponseWithValidJSONBody(t *testing.T) {
}() }()
// Setup conf for the test case. // Setup conf for the test case.
conf = SDConfig{Servers: []string{ts.URL}} conf = SDConfig{Servers: []string{ts.URL}}
// Execute test case and validate behaviour. // Execute test case and validate behavior.
if err := testUpdateServices(client, ch); err == nil { _, err := testUpdateServices(nil)
t.Fatalf("Expected error for 5xx HTTP response from marathon server") if err == nil {
t.Fatalf("Expected error for 5xx HTTP response from marathon server, got nil")
} }
} }
func marathonTestAppListWithPortDefinitions(labels map[string]string, runningTasks int) *AppList { func marathonTestAppListWithPortDefinitions(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
// Auto-generated ports when requirePorts is false
Ports: []uint32{1234, 5678},
} }
docker = DockerContainer{ docker = dockerContainer{
Image: "repo/image:tag", Image: "repo/image:tag",
} }
container = Container{Docker: docker} container = container{Docker: docker}
app = App{ a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
PortDefinitions: []PortDefinition{ PortDefinitions: []portDefinition{
{Labels: make(map[string]string), Port: 31000}, {Labels: make(map[string]string), Port: 31000},
{Labels: labels, Port: 32000}, {Labels: labels, Port: 32000},
}, },
RequirePorts: false, // default
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroupWithPortDefinitions(t *testing.T) { func TestMarathonSDSendGroupWithPortDefinitions(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithPortDefinitions(marathonValidLabel, 1), nil return marathonTestAppListWithPortDefinitions(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0]
if tg.Source != "test-service" {
t.Fatalf("Wrong target group name: %s", tg.Source)
}
if len(tg.Targets) != 2 {
t.Fatalf("Wrong number of targets: %v", tg.Targets)
}
tgt := tg.Targets[0]
if tgt[model.AddressLabel] != "mesos-slave1:1234" {
t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel])
}
tgt = tg.Targets[1]
if tgt[model.AddressLabel] != "mesos-slave1:5678" {
t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "yes" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
}
}
func marathonTestAppListWithPortDefinitionsRequirePorts(labels map[string]string, runningTasks int) *appList {
var (
t = task{
ID: "test-task-1",
Host: "mesos-slave1",
Ports: []uint32{31000, 32000},
}
docker = dockerContainer{
Image: "repo/image:tag",
}
container = container{Docker: docker}
a = app{
ID: "test-service",
Tasks: []task{t},
RunningTasks: runningTasks,
Labels: labels,
Container: container,
PortDefinitions: []portDefinition{
{Labels: make(map[string]string), Port: 31000},
{Labels: labels, Port: 32000},
},
RequirePorts: true,
}
)
return &appList{
Apps: []app{a},
}
}
func TestMarathonSDSendGroupWithPortDefinitionsRequirePorts(t *testing.T) {
var (
client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
return marathonTestAppListWithPortDefinitionsRequirePorts(marathonValidLabel, 1), nil
}
)
tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err)
}
if len(tgs) != 1 {
t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -405,47 +442,45 @@ func TestMarathonSDSendGroupWithPortDefinitions(t *testing.T) {
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "yes" { if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "yes" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func marathonTestAppListWithPorts(labels map[string]string, runningTasks int) *AppList { func marathonTestAppListWithPorts(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
Ports: []uint32{31000, 32000}, Ports: []uint32{31000, 32000},
} }
docker = DockerContainer{ docker = dockerContainer{
Image: "repo/image:tag", Image: "repo/image:tag",
} }
container = Container{Docker: docker} container = container{Docker: docker}
app = App{ a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroupWithPorts(t *testing.T) { func TestMarathonSDSendGroupWithPorts(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithPorts(marathonValidLabel, 1), nil return marathonTestAppListWithPorts(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -474,52 +509,54 @@ func TestMarathonSDSendGroupWithPorts(t *testing.T) {
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func marathonTestAppListWithContainerPortMappings(labels map[string]string, runningTasks int) *AppList { func marathonTestAppListWithContainerPortMappings(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
} Ports: []uint32{
docker = DockerContainer{ 12345, // 'Automatically-generated' port
Image: "repo/image:tag", 32000,
}
container = Container{
Docker: docker,
PortMappings: []PortMapping{
{Labels: labels, ServicePort: 31000},
{Labels: make(map[string]string), ServicePort: 32000},
}, },
} }
app = App{ docker = dockerContainer{
Image: "repo/image:tag",
}
container = container{
Docker: docker,
PortMappings: []portMapping{
{Labels: labels, HostPort: 0},
{Labels: make(map[string]string), HostPort: 32000},
},
}
a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroupWithContainerPortMappings(t *testing.T) { func TestMarathonSDSendGroupWithContainerPortMappings(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithContainerPortMappings(marathonValidLabel, 1), nil return marathonTestAppListWithContainerPortMappings(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -529,7 +566,7 @@ func TestMarathonSDSendGroupWithContainerPortMappings(t *testing.T) {
t.Fatalf("Wrong number of targets: %v", tg.Targets) t.Fatalf("Wrong number of targets: %v", tg.Targets)
} }
tgt := tg.Targets[0] tgt := tg.Targets[0]
if tgt[model.AddressLabel] != "mesos-slave1:31000" { if tgt[model.AddressLabel] != "mesos-slave1:12345" {
t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel])
} }
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" { if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "yes" {
@ -548,52 +585,54 @@ func TestMarathonSDSendGroupWithContainerPortMappings(t *testing.T) {
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func marathonTestAppListWithDockerContainerPortMappings(labels map[string]string, runningTasks int) *AppList { func marathonTestAppListWithDockerContainerPortMappings(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
} Ports: []uint32{
docker = DockerContainer{ 31000,
Image: "repo/image:tag", 12345, // 'Automatically-generated' port
PortMappings: []PortMapping{
{Labels: labels, ServicePort: 31000},
{Labels: make(map[string]string), ServicePort: 32000},
}, },
} }
container = Container{ docker = dockerContainer{
Image: "repo/image:tag",
PortMappings: []portMapping{
{Labels: labels, HostPort: 31000},
{Labels: make(map[string]string), HostPort: 0},
},
}
container = container{
Docker: docker, Docker: docker,
} }
app = App{ a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroupWithDockerContainerPortMappings(t *testing.T) { func TestMarathonSDSendGroupWithDockerContainerPortMappings(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithDockerContainerPortMappings(marathonValidLabel, 1), nil return marathonTestAppListWithDockerContainerPortMappings(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -613,7 +652,7 @@ func TestMarathonSDSendGroupWithDockerContainerPortMappings(t *testing.T) {
t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel])
} }
tgt = tg.Targets[1] tgt = tg.Targets[1]
if tgt[model.AddressLabel] != "mesos-slave1:32000" { if tgt[model.AddressLabel] != "mesos-slave1:12345" {
t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel])
} }
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" { if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" {
@ -622,60 +661,58 @@ func TestMarathonSDSendGroupWithDockerContainerPortMappings(t *testing.T) {
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
} }
func marathonTestAppListWithContainerNetworkAndPortMappings(labels map[string]string, runningTasks int) *AppList { func marathonTestAppListWithContainerNetworkAndPortMappings(labels map[string]string, runningTasks int) *appList {
var ( var (
task = Task{ t = task{
ID: "test-task-1", ID: "test-task-1",
Host: "mesos-slave1", Host: "mesos-slave1",
IPAddresses: []IPAddress{ IPAddresses: []ipAddress{
{Address: "1.2.3.4"}, {Address: "1.2.3.4"},
}, },
} }
docker = DockerContainer{ docker = dockerContainer{
Image: "repo/image:tag", Image: "repo/image:tag",
} }
portMappings = []PortMapping{ portMappings = []portMapping{
{Labels: labels, ContainerPort: 8080, ServicePort: 31000}, {Labels: labels, ContainerPort: 8080, HostPort: 31000},
{Labels: make(map[string]string), ContainerPort: 1234, ServicePort: 32000}, {Labels: make(map[string]string), ContainerPort: 1234, HostPort: 32000},
} }
container = Container{ container = container{
Docker: docker, Docker: docker,
PortMappings: portMappings, PortMappings: portMappings,
} }
networks = []Network{ networks = []network{
{Mode: "container", Name: "test-network"}, {Mode: "container", Name: "test-network"},
} }
app = App{ a = app{
ID: "test-service", ID: "test-service",
Tasks: []Task{task}, Tasks: []task{t},
RunningTasks: runningTasks, RunningTasks: runningTasks,
Labels: labels, Labels: labels,
Container: container, Container: container,
Networks: networks, Networks: networks,
} }
) )
return &AppList{ return &appList{
Apps: []App{app}, Apps: []app{a},
} }
} }
func TestMarathonSDSendGroupWithContainerNetworkAndPortMapping(t *testing.T) { func TestMarathonSDSendGroupWithContainerNetworkAndPortMapping(t *testing.T) {
var ( var (
ch = make(chan []*targetgroup.Group, 1) client = func(_ context.Context, _ *http.Client, _ string) (*appList, error) {
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithContainerNetworkAndPortMappings(marathonValidLabel, 1), nil return marathonTestAppListWithContainerNetworkAndPortMappings(marathonValidLabel, 1), nil
} }
) )
if err := testUpdateServices(client, ch); err != nil { tgs, err := testUpdateServices(client)
if err != nil {
t.Fatalf("Got error: %s", err) t.Fatalf("Got error: %s", err)
} }
select { if len(tgs) != 1 {
case tgs := <-ch: t.Fatal("Expected 1 target group, got", len(tgs))
}
tg := tgs[0] tg := tgs[0]
if tg.Source != "test-service" { if tg.Source != "test-service" {
@ -704,89 +741,4 @@ func TestMarathonSDSendGroupWithContainerNetworkAndPortMapping(t *testing.T) {
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" { if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel]) t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
} }
default:
t.Fatal("Did not get a target group.")
}
}
func marathonTestAppListWithContainerNetworkAndPortDefinition(labels map[string]string, runningTasks int) *AppList {
var (
task = Task{
ID: "test-task-1",
Host: "mesos-slave1",
IPAddresses: []IPAddress{
{Address: "1.2.3.4"},
},
}
docker = DockerContainer{
Image: "repo/image:tag",
}
portDefinitions = []PortDefinition{
{Labels: labels, Port: 8080},
{Labels: make(map[string]string), Port: 1234},
}
container = Container{
Docker: docker,
}
networks = []Network{
{Mode: "container", Name: "test-network"},
}
app = App{
ID: "test-service",
Tasks: []Task{task},
RunningTasks: runningTasks,
Labels: labels,
Container: container,
Networks: networks,
PortDefinitions: portDefinitions,
}
)
return &AppList{
Apps: []App{app},
}
}
func TestMarathonSDSendGroupWithContainerNetworkAndPortDefinition(t *testing.T) {
var (
ch = make(chan []*targetgroup.Group, 1)
client = func(client *http.Client, url string) (*AppList, error) {
return marathonTestAppListWithContainerNetworkAndPortDefinition(marathonValidLabel, 1), nil
}
)
if err := testUpdateServices(client, ch); err != nil {
t.Fatalf("Got error: %s", err)
}
select {
case tgs := <-ch:
tg := tgs[0]
if tg.Source != "test-service" {
t.Fatalf("Wrong target group name: %s", tg.Source)
}
if len(tg.Targets) != 2 {
t.Fatalf("Wrong number of targets: %v", tg.Targets)
}
tgt := tg.Targets[0]
if tgt[model.AddressLabel] != "1.2.3.4:8080" {
t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong first portMappings label from the first port: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "yes" {
t.Fatalf("Wrong first portDefinitions label from the first port: %s", tgt[model.AddressLabel])
}
tgt = tg.Targets[1]
if tgt[model.AddressLabel] != "1.2.3.4:1234" {
t.Fatalf("Wrong target address: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portMappingLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portMappings label from the second port: %s", tgt[model.AddressLabel])
}
if tgt[model.LabelName(portDefinitionLabelPrefix+"prometheus")] != "" {
t.Fatalf("Wrong portDefinitions label from the second port: %s", tgt[model.AddressLabel])
}
default:
t.Fatal("Did not get a target group.")
}
} }

View file

@ -17,15 +17,15 @@ import (
"context" "context"
"fmt" "fmt"
"net" "net"
"time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack"
"github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/hypervisors"
"github.com/gophercloud/gophercloud/pagination" "github.com/gophercloud/gophercloud/pagination"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
) )
@ -42,74 +42,28 @@ type HypervisorDiscovery struct {
provider *gophercloud.ProviderClient provider *gophercloud.ProviderClient
authOpts *gophercloud.AuthOptions authOpts *gophercloud.AuthOptions
region string region string
interval time.Duration
logger log.Logger logger log.Logger
port int port int
} }
// NewHypervisorDiscovery returns a new hypervisor discovery. // newHypervisorDiscovery returns a new hypervisor discovery.
func NewHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, func newHypervisorDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
interval time.Duration, port int, region string, l log.Logger) *HypervisorDiscovery { port int, region string, l log.Logger) *HypervisorDiscovery {
return &HypervisorDiscovery{provider: provider, authOpts: opts, return &HypervisorDiscovery{provider: provider, authOpts: opts,
region: region, interval: interval, port: port, logger: l} region: region, port: port, logger: l}
} }
// Run implements the Discoverer interface. func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (h *HypervisorDiscovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) { h.provider.Context = ctx
// Get an initial set right away. err := openstack.Authenticate(h.provider, *h.authOpts)
tg, err := h.refresh()
if err != nil { if err != nil {
level.Error(h.logger).Log("msg", "Unable refresh target groups", "err", err.Error()) return nil, errors.Wrap(err, "could not authenticate to OpenStack")
} else {
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
return
}
}
ticker := time.NewTicker(h.interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
tg, err := h.refresh()
if err != nil {
level.Error(h.logger).Log("msg", "Unable refresh target groups", "err", err.Error())
continue
}
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}
func (h *HypervisorDiscovery) refresh() (*targetgroup.Group, error) {
var err error
t0 := time.Now()
defer func() {
refreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
refreshFailuresCount.Inc()
}
}()
err = openstack.Authenticate(h.provider, *h.authOpts)
if err != nil {
return nil, fmt.Errorf("could not authenticate to OpenStack: %s", err)
} }
client, err := openstack.NewComputeV2(h.provider, gophercloud.EndpointOpts{ client, err := openstack.NewComputeV2(h.provider, gophercloud.EndpointOpts{
Region: h.region, Region: h.region,
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("could not create OpenStack compute session: %s", err) return nil, errors.Wrap(err, "could not create OpenStack compute session")
} }
tg := &targetgroup.Group{ tg := &targetgroup.Group{
@ -121,7 +75,7 @@ func (h *HypervisorDiscovery) refresh() (*targetgroup.Group, error) {
err = pagerHypervisors.EachPage(func(page pagination.Page) (bool, error) { err = pagerHypervisors.EachPage(func(page pagination.Page) (bool, error) {
hypervisorList, err := hypervisors.ExtractHypervisors(page) hypervisorList, err := hypervisors.ExtractHypervisors(page)
if err != nil { if err != nil {
return false, fmt.Errorf("could not extract hypervisors: %s", err) return false, errors.Wrap(err, "could not extract hypervisors")
} }
for _, hypervisor := range hypervisorList { for _, hypervisor := range hypervisorList {
labels := model.LabelSet{} labels := model.LabelSet{}
@ -140,5 +94,5 @@ func (h *HypervisorDiscovery) refresh() (*targetgroup.Group, error) {
return nil, err return nil, err
} }
return tg, nil return []*targetgroup.Group{tg}, nil
} }

View file

@ -14,6 +14,8 @@
package openstack package openstack
import ( import (
"context"
"strings"
"testing" "testing"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
@ -38,7 +40,7 @@ func (s *OpenstackSDHypervisorTestSuite) SetupTest(t *testing.T) {
s.Mock.HandleAuthSuccessfully() s.Mock.HandleAuthSuccessfully()
} }
func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (Discovery, error) { func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (refresher, error) {
conf := SDConfig{ conf := SDConfig{
IdentityEndpoint: s.Mock.Endpoint(), IdentityEndpoint: s.Mock.Endpoint(),
Password: "test", Password: "test",
@ -47,7 +49,7 @@ func (s *OpenstackSDHypervisorTestSuite) openstackAuthSuccess() (Discovery, erro
Region: "RegionOne", Region: "RegionOne",
Role: "hypervisor", Role: "hypervisor",
} }
return NewDiscovery(&conf, nil) return newRefresher(&conf, nil)
} }
func TestOpenstackSDHypervisorRefresh(t *testing.T) { func TestOpenstackSDHypervisorRefresh(t *testing.T) {
@ -56,7 +58,10 @@ func TestOpenstackSDHypervisorRefresh(t *testing.T) {
mock.SetupTest(t) mock.SetupTest(t)
hypervisor, _ := mock.openstackAuthSuccess() hypervisor, _ := mock.openstackAuthSuccess()
tg, err := hypervisor.refresh() ctx := context.Background()
tgs, err := hypervisor.refresh(ctx)
testutil.Equals(t, 1, len(tgs))
tg := tgs[0]
testutil.Ok(t, err) testutil.Ok(t, err)
testutil.Assert(t, tg != nil, "") testutil.Assert(t, tg != nil, "")
testutil.Assert(t, tg.Targets != nil, "") testutil.Assert(t, tg.Targets != nil, "")
@ -78,3 +83,17 @@ func TestOpenstackSDHypervisorRefresh(t *testing.T) {
mock.TearDownSuite() mock.TearDownSuite()
} }
func TestOpenstackSDHypervisorRefreshWithDoneContext(t *testing.T) {
mock := &OpenstackSDHypervisorTestSuite{}
mock.SetupTest(t)
hypervisor, _ := mock.openstackAuthSuccess()
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := hypervisor.refresh(ctx)
testutil.NotOk(t, err, "")
testutil.Assert(t, strings.Contains(err.Error(), context.Canceled.Error()), "%q doesn't contain %q", err, context.Canceled)
mock.TearDownSuite()
}

View file

@ -17,7 +17,6 @@ import (
"context" "context"
"fmt" "fmt"
"net" "net"
"time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" "github.com/go-kit/kit/log/level"
@ -26,6 +25,7 @@ import (
"github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips" "github.com/gophercloud/gophercloud/openstack/compute/v2/extensions/floatingips"
"github.com/gophercloud/gophercloud/openstack/compute/v2/servers" "github.com/gophercloud/gophercloud/openstack/compute/v2/servers"
"github.com/gophercloud/gophercloud/pagination" "github.com/gophercloud/gophercloud/pagination"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/strutil" "github.com/prometheus/prometheus/util/strutil"
@ -34,13 +34,15 @@ import (
const ( const (
openstackLabelPrefix = model.MetaLabelPrefix + "openstack_" openstackLabelPrefix = model.MetaLabelPrefix + "openstack_"
openstackLabelAddressPool = openstackLabelPrefix + "address_pool" openstackLabelAddressPool = openstackLabelPrefix + "address_pool"
openstackLabelInstanceFlavor = openstackLabelPrefix + "instance_flavor"
openstackLabelInstanceID = openstackLabelPrefix + "instance_id" openstackLabelInstanceID = openstackLabelPrefix + "instance_id"
openstackLabelInstanceName = openstackLabelPrefix + "instance_name" openstackLabelInstanceName = openstackLabelPrefix + "instance_name"
openstackLabelInstanceStatus = openstackLabelPrefix + "instance_status" openstackLabelInstanceStatus = openstackLabelPrefix + "instance_status"
openstackLabelInstanceFlavor = openstackLabelPrefix + "instance_flavor"
openstackLabelPublicIP = openstackLabelPrefix + "public_ip"
openstackLabelPrivateIP = openstackLabelPrefix + "private_ip" openstackLabelPrivateIP = openstackLabelPrefix + "private_ip"
openstackLabelProjectID = openstackLabelPrefix + "project_id"
openstackLabelPublicIP = openstackLabelPrefix + "public_ip"
openstackLabelTagPrefix = openstackLabelPrefix + "tag_" openstackLabelTagPrefix = openstackLabelPrefix + "tag_"
openstackLabelUserID = openstackLabelPrefix + "user_id"
) )
// InstanceDiscovery discovers OpenStack instances. // InstanceDiscovery discovers OpenStack instances.
@ -48,57 +50,19 @@ type InstanceDiscovery struct {
provider *gophercloud.ProviderClient provider *gophercloud.ProviderClient
authOpts *gophercloud.AuthOptions authOpts *gophercloud.AuthOptions
region string region string
interval time.Duration
logger log.Logger logger log.Logger
port int port int
allTenants bool allTenants bool
} }
// NewInstanceDiscovery returns a new instance discovery. // NewInstanceDiscovery returns a new instance discovery.
func NewInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions, func newInstanceDiscovery(provider *gophercloud.ProviderClient, opts *gophercloud.AuthOptions,
interval time.Duration, port int, region string, allTenants bool, l log.Logger) *InstanceDiscovery { port int, region string, allTenants bool, l log.Logger) *InstanceDiscovery {
if l == nil { if l == nil {
l = log.NewNopLogger() l = log.NewNopLogger()
} }
return &InstanceDiscovery{provider: provider, authOpts: opts, return &InstanceDiscovery{provider: provider, authOpts: opts,
region: region, interval: interval, port: port, allTenants: allTenants, logger: l} region: region, port: port, allTenants: allTenants, logger: l}
}
// Run implements the Discoverer interface.
func (i *InstanceDiscovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// Get an initial set right away.
tg, err := i.refresh()
if err != nil {
level.Error(i.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
} else {
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
return
}
}
ticker := time.NewTicker(i.interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
tg, err := i.refresh()
if err != nil {
level.Error(i.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
continue
}
select {
case ch <- []*targetgroup.Group{tg}:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
} }
type floatingIPKey struct { type floatingIPKey struct {
@ -106,25 +70,17 @@ type floatingIPKey struct {
fixed string fixed string
} }
func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) { func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
var err error i.provider.Context = ctx
t0 := time.Now() err := openstack.Authenticate(i.provider, *i.authOpts)
defer func() {
refreshDuration.Observe(time.Since(t0).Seconds())
if err != nil { if err != nil {
refreshFailuresCount.Inc() return nil, errors.Wrap(err, "could not authenticate to OpenStack")
}
}()
err = openstack.Authenticate(i.provider, *i.authOpts)
if err != nil {
return nil, fmt.Errorf("could not authenticate to OpenStack: %s", err)
} }
client, err := openstack.NewComputeV2(i.provider, gophercloud.EndpointOpts{ client, err := openstack.NewComputeV2(i.provider, gophercloud.EndpointOpts{
Region: i.region, Region: i.region,
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("could not create OpenStack compute session: %s", err) return nil, errors.Wrap(err, "could not create OpenStack compute session")
} }
// OpenStack API reference // OpenStack API reference
@ -135,7 +91,7 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
err = pagerFIP.EachPage(func(page pagination.Page) (bool, error) { err = pagerFIP.EachPage(func(page pagination.Page) (bool, error) {
result, err := floatingips.ExtractFloatingIPs(page) result, err := floatingips.ExtractFloatingIPs(page)
if err != nil { if err != nil {
return false, fmt.Errorf("could not extract floatingips: %s", err) return false, errors.Wrap(err, "could not extract floatingips")
} }
for _, ip := range result { for _, ip := range result {
// Skip not associated ips // Skip not associated ips
@ -161,9 +117,12 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
Source: fmt.Sprintf("OS_" + i.region), Source: fmt.Sprintf("OS_" + i.region),
} }
err = pager.EachPage(func(page pagination.Page) (bool, error) { err = pager.EachPage(func(page pagination.Page) (bool, error) {
if ctx.Err() != nil {
return false, errors.Wrap(ctx.Err(), "could not extract instances")
}
instanceList, err := servers.ExtractServers(page) instanceList, err := servers.ExtractServers(page)
if err != nil { if err != nil {
return false, fmt.Errorf("could not extract instances: %s", err) return false, errors.Wrap(err, "could not extract instances")
} }
for _, s := range instanceList { for _, s := range instanceList {
@ -176,6 +135,8 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
openstackLabelInstanceID: model.LabelValue(s.ID), openstackLabelInstanceID: model.LabelValue(s.ID),
openstackLabelInstanceStatus: model.LabelValue(s.Status), openstackLabelInstanceStatus: model.LabelValue(s.Status),
openstackLabelInstanceName: model.LabelValue(s.Name), openstackLabelInstanceName: model.LabelValue(s.Name),
openstackLabelProjectID: model.LabelValue(s.TenantID),
openstackLabelUserID: model.LabelValue(s.UserID),
} }
id, ok := s.Flavor["id"].(string) id, ok := s.Flavor["id"].(string)
@ -234,5 +195,5 @@ func (i *InstanceDiscovery) refresh() (*targetgroup.Group, error) {
return nil, err return nil, err
} }
return tg, nil return []*targetgroup.Group{tg}, nil
} }

View file

@ -14,7 +14,9 @@
package openstack package openstack
import ( import (
"context"
"fmt" "fmt"
"strings"
"testing" "testing"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
@ -40,7 +42,7 @@ func (s *OpenstackSDInstanceTestSuite) SetupTest(t *testing.T) {
s.Mock.HandleAuthSuccessfully() s.Mock.HandleAuthSuccessfully()
} }
func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (Discovery, error) { func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (refresher, error) {
conf := SDConfig{ conf := SDConfig{
IdentityEndpoint: s.Mock.Endpoint(), IdentityEndpoint: s.Mock.Endpoint(),
Password: "test", Password: "test",
@ -50,7 +52,7 @@ func (s *OpenstackSDInstanceTestSuite) openstackAuthSuccess() (Discovery, error)
Role: "instance", Role: "instance",
AllTenants: true, AllTenants: true,
} }
return NewDiscovery(&conf, nil) return newRefresher(&conf, nil)
} }
func TestOpenstackSDInstanceRefresh(t *testing.T) { func TestOpenstackSDInstanceRefresh(t *testing.T) {
@ -61,15 +63,19 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
instance, err := mock.openstackAuthSuccess() instance, err := mock.openstackAuthSuccess()
testutil.Ok(t, err) testutil.Ok(t, err)
tg, err := instance.refresh() ctx := context.Background()
tgs, err := instance.refresh(ctx)
testutil.Ok(t, err) testutil.Ok(t, err)
testutil.Equals(t, 1, len(tgs))
tg := tgs[0]
testutil.Assert(t, tg != nil, "") testutil.Assert(t, tg != nil, "")
testutil.Assert(t, tg.Targets != nil, "") testutil.Assert(t, tg.Targets != nil, "")
testutil.Equals(t, 4, len(tg.Targets)) testutil.Equals(t, 4, len(tg.Targets))
for i, lbls := range []model.LabelSet{ for i, lbls := range []model.LabelSet{
model.LabelSet{ {
"__address__": model.LabelValue("10.0.0.32:0"), "__address__": model.LabelValue("10.0.0.32:0"),
"__meta_openstack_instance_flavor": model.LabelValue("1"), "__meta_openstack_instance_flavor": model.LabelValue("1"),
"__meta_openstack_instance_id": model.LabelValue("ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"), "__meta_openstack_instance_id": model.LabelValue("ef079b0c-e610-4dfb-b1aa-b49f07ac48e5"),
@ -78,8 +84,10 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
"__meta_openstack_private_ip": model.LabelValue("10.0.0.32"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.32"),
"__meta_openstack_public_ip": model.LabelValue("10.10.10.2"), "__meta_openstack_public_ip": model.LabelValue("10.10.10.2"),
"__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_address_pool": model.LabelValue("private"),
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
}, },
model.LabelSet{ {
"__address__": model.LabelValue("10.0.0.31:0"), "__address__": model.LabelValue("10.0.0.31:0"),
"__meta_openstack_instance_flavor": model.LabelValue("1"), "__meta_openstack_instance_flavor": model.LabelValue("1"),
"__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682ba"), "__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682ba"),
@ -87,8 +95,10 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
"__meta_openstack_instance_name": model.LabelValue("derp"), "__meta_openstack_instance_name": model.LabelValue("derp"),
"__meta_openstack_private_ip": model.LabelValue("10.0.0.31"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.31"),
"__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_address_pool": model.LabelValue("private"),
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
}, },
model.LabelSet{ {
"__address__": model.LabelValue("10.0.0.33:0"), "__address__": model.LabelValue("10.0.0.33:0"),
"__meta_openstack_instance_flavor": model.LabelValue("4"), "__meta_openstack_instance_flavor": model.LabelValue("4"),
"__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"), "__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"),
@ -97,8 +107,10 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
"__meta_openstack_private_ip": model.LabelValue("10.0.0.33"), "__meta_openstack_private_ip": model.LabelValue("10.0.0.33"),
"__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_address_pool": model.LabelValue("private"),
"__meta_openstack_tag_env": model.LabelValue("prod"), "__meta_openstack_tag_env": model.LabelValue("prod"),
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
}, },
model.LabelSet{ {
"__address__": model.LabelValue("10.0.0.34:0"), "__address__": model.LabelValue("10.0.0.34:0"),
"__meta_openstack_instance_flavor": model.LabelValue("4"), "__meta_openstack_instance_flavor": model.LabelValue("4"),
"__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"), "__meta_openstack_instance_id": model.LabelValue("9e5476bd-a4ec-4653-93d6-72c93aa682bb"),
@ -108,6 +120,8 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
"__meta_openstack_address_pool": model.LabelValue("private"), "__meta_openstack_address_pool": model.LabelValue("private"),
"__meta_openstack_tag_env": model.LabelValue("prod"), "__meta_openstack_tag_env": model.LabelValue("prod"),
"__meta_openstack_public_ip": model.LabelValue("10.10.10.4"), "__meta_openstack_public_ip": model.LabelValue("10.10.10.4"),
"__meta_openstack_project_id": model.LabelValue("fcad67a6189847c4aecfa3c81a05783b"),
"__meta_openstack_user_id": model.LabelValue("9349aff8be7545ac9d2f1d00999a23cd"),
}, },
} { } {
t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) { t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
@ -117,3 +131,17 @@ func TestOpenstackSDInstanceRefresh(t *testing.T) {
mock.TearDownSuite() mock.TearDownSuite()
} }
func TestOpenstackSDInstanceRefreshWithDoneContext(t *testing.T) {
mock := &OpenstackSDHypervisorTestSuite{}
mock.SetupTest(t)
hypervisor, _ := mock.openstackAuthSuccess()
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := hypervisor.refresh(ctx)
testutil.NotOk(t, err, "")
testutil.Assert(t, strings.Contains(err.Error(), context.Canceled.Error()), "%q doesn't contain %q", err, context.Canceled)
mock.TearDownSuite()
}

View file

@ -247,7 +247,7 @@ func (m *SDMock) HandleHypervisorListSuccessfully() {
testHeader(m.t, r, "X-Auth-Token", tokenID) testHeader(m.t, r, "X-Auth-Token", tokenID)
w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/json")
fmt.Fprintf(w, hypervisorListBody) fmt.Fprint(w, hypervisorListBody)
}) })
} }
@ -544,7 +544,7 @@ func (m *SDMock) HandleServerListSuccessfully() {
testHeader(m.t, r, "X-Auth-Token", tokenID) testHeader(m.t, r, "X-Auth-Token", tokenID)
w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/json")
fmt.Fprintf(w, serverListBody) fmt.Fprint(w, serverListBody)
}) })
} }
@ -583,6 +583,6 @@ func (m *SDMock) HandleFloatingIPListSuccessfully() {
testHeader(m.t, r, "X-Auth-Token", tokenID) testHeader(m.t, r, "X-Auth-Token", tokenID)
w.Header().Add("Content-Type", "application/json") w.Header().Add("Content-Type", "application/json")
fmt.Fprintf(w, listOutput) fmt.Fprint(w, listOutput)
}) })
} }

View file

@ -15,38 +15,26 @@ package openstack
import ( import (
"context" "context"
"errors"
"fmt"
"net/http" "net/http"
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/gophercloud/gophercloud" "github.com/gophercloud/gophercloud"
"github.com/gophercloud/gophercloud/openstack" "github.com/gophercloud/gophercloud/openstack"
"github.com/mwitkow/go-conntrack" conntrack "github.com/mwitkow/go-conntrack"
"github.com/prometheus/client_golang/prometheus" "github.com/pkg/errors"
config_util "github.com/prometheus/common/config" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
) )
var ( // DefaultSDConfig is the default OpenStack SD configuration.
refreshFailuresCount = prometheus.NewCounter( var DefaultSDConfig = SDConfig{
prometheus.CounterOpts{
Name: "prometheus_sd_openstack_refresh_failures_total",
Help: "The number of OpenStack-SD scrape failures.",
})
refreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_openstack_refresh_duration_seconds",
Help: "The duration of an OpenStack-SD refresh in seconds.",
})
// DefaultSDConfig is the default OpenStack SD configuration.
DefaultSDConfig = SDConfig{
Port: 80, Port: 80,
RefreshInterval: model.Duration(60 * time.Second), RefreshInterval: model.Duration(60 * time.Second),
} }
)
// SDConfig is the configuration for OpenStack based service discovery. // SDConfig is the configuration for OpenStack based service discovery.
type SDConfig struct { type SDConfig struct {
@ -58,6 +46,9 @@ type SDConfig struct {
ProjectID string `yaml:"project_id"` ProjectID string `yaml:"project_id"`
DomainName string `yaml:"domain_name"` DomainName string `yaml:"domain_name"`
DomainID string `yaml:"domain_id"` DomainID string `yaml:"domain_id"`
ApplicationCredentialName string `yaml:"application_credential_name"`
ApplicationCredentialID string `yaml:"application_credential_id"`
ApplicationCredentialSecret config_util.Secret `yaml:"application_credential_secret"`
Role Role `yaml:"role"` Role Role `yaml:"role"`
Region string `yaml:"region"` Region string `yaml:"region"`
RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
@ -66,7 +57,7 @@ type SDConfig struct {
TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"` TLSConfig config_util.TLSConfig `yaml:"tls_config,omitempty"`
} }
// OpenStackRole is role of the target in OpenStack. // Role is the role of the target in OpenStack.
type Role string type Role string
// The valid options for OpenStackRole. // The valid options for OpenStackRole.
@ -88,7 +79,7 @@ func (c *Role) UnmarshalYAML(unmarshal func(interface{}) error) error {
case OpenStackRoleHypervisor, OpenStackRoleInstance: case OpenStackRoleHypervisor, OpenStackRoleInstance:
return nil return nil
default: default:
return fmt.Errorf("unknown OpenStack SD role %q", *c) return errors.Errorf("unknown OpenStack SD role %q", *c)
} }
} }
@ -101,28 +92,34 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if c.Role == "" { if c.Role == "" {
return fmt.Errorf("role missing (one of: instance, hypervisor)") return errors.New("role missing (one of: instance, hypervisor)")
} }
if c.Region == "" { if c.Region == "" {
return fmt.Errorf("Openstack SD configuration requires a region") return errors.New("openstack SD configuration requires a region")
} }
return nil return nil
} }
func init() { type refresher interface {
prometheus.MustRegister(refreshFailuresCount) refresh(context.Context) ([]*targetgroup.Group, error)
prometheus.MustRegister(refreshDuration)
} }
// Discovery periodically performs OpenStack-SD requests. It implements // NewDiscovery returns a new OpenStack Discoverer which periodically refreshes its targets.
// the Discoverer interface. func NewDiscovery(conf *SDConfig, l log.Logger) (*refresh.Discovery, error) {
type Discovery interface { r, err := newRefresher(conf, l)
Run(ctx context.Context, ch chan<- []*targetgroup.Group) if err != nil {
refresh() (tg *targetgroup.Group, err error) return nil, err
}
return refresh.NewDiscovery(
l,
"openstack",
time.Duration(conf.RefreshInterval),
r.refresh,
), nil
} }
// NewDiscovery returns a new OpenStackDiscovery which periodically refreshes its targets. func newRefresher(conf *SDConfig, l log.Logger) (refresher, error) {
func NewDiscovery(conf *SDConfig, l log.Logger) (Discovery, error) {
var opts gophercloud.AuthOptions var opts gophercloud.AuthOptions
if conf.IdentityEndpoint == "" { if conf.IdentityEndpoint == "" {
var err error var err error
@ -140,6 +137,9 @@ func NewDiscovery(conf *SDConfig, l log.Logger) (Discovery, error) {
TenantID: conf.ProjectID, TenantID: conf.ProjectID,
DomainName: conf.DomainName, DomainName: conf.DomainName,
DomainID: conf.DomainID, DomainID: conf.DomainID,
ApplicationCredentialID: conf.ApplicationCredentialID,
ApplicationCredentialName: conf.ApplicationCredentialName,
ApplicationCredentialSecret: string(conf.ApplicationCredentialSecret),
} }
} }
client, err := openstack.NewClient(opts.IdentityEndpoint) client, err := openstack.NewClient(opts.IdentityEndpoint)
@ -163,14 +163,9 @@ func NewDiscovery(conf *SDConfig, l log.Logger) (Discovery, error) {
} }
switch conf.Role { switch conf.Role {
case OpenStackRoleHypervisor: case OpenStackRoleHypervisor:
hypervisor := NewHypervisorDiscovery(client, &opts, return newHypervisorDiscovery(client, &opts, conf.Port, conf.Region, l), nil
time.Duration(conf.RefreshInterval), conf.Port, conf.Region, l)
return hypervisor, nil
case OpenStackRoleInstance: case OpenStackRoleInstance:
instance := NewInstanceDiscovery(client, &opts, return newInstanceDiscovery(client, &opts, conf.Port, conf.Region, conf.AllTenants, l), nil
time.Duration(conf.RefreshInterval), conf.Port, conf.Region, conf.AllTenants, l)
return instance, nil
default:
return nil, errors.New("unknown OpenStack discovery role")
} }
return nil, errors.New("unknown OpenStack discovery role")
} }

View file

@ -0,0 +1,118 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package refresh
import (
"context"
"time"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/prometheus/discovery/targetgroup"
)
var (
failuresCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "prometheus_sd_refresh_failures_total",
Help: "Number of refresh failures for the given SD mechanism.",
},
[]string{"mechanism"},
)
duration = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "prometheus_sd_refresh_duration_seconds",
Help: "The duration of a refresh in seconds for the given SD mechanism.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"mechanism"},
)
)
func init() {
prometheus.MustRegister(duration, failuresCount)
}
// Discovery implements the Discoverer interface.
type Discovery struct {
logger log.Logger
interval time.Duration
refreshf func(ctx context.Context) ([]*targetgroup.Group, error)
failures prometheus.Counter
duration prometheus.Observer
}
// NewDiscovery returns a Discoverer function that calls a refresh() function at every interval.
func NewDiscovery(l log.Logger, mech string, interval time.Duration, refreshf func(ctx context.Context) ([]*targetgroup.Group, error)) *Discovery {
if l == nil {
l = log.NewNopLogger()
}
return &Discovery{
logger: l,
interval: interval,
refreshf: refreshf,
failures: failuresCount.WithLabelValues(mech),
duration: duration.WithLabelValues(mech),
}
}
// Run implements the Discoverer interface.
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
// Get an initial set right away.
tgs, err := d.refresh(ctx)
if err != nil {
level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
} else {
select {
case ch <- tgs:
case <-ctx.Done():
return
}
}
ticker := time.NewTicker(d.interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
tgs, err := d.refresh(ctx)
if err != nil {
level.Error(d.logger).Log("msg", "Unable to refresh target groups", "err", err.Error())
continue
}
select {
case ch <- tgs:
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
now := time.Now()
defer d.duration.Observe(time.Since(now).Seconds())
tgs, err := d.refreshf(ctx)
if err != nil {
d.failures.Inc()
}
return tgs, err
}

View file

@ -0,0 +1,83 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package refresh
import (
"context"
"fmt"
"testing"
"time"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil"
)
func TestRefresh(t *testing.T) {
tg1 := []*targetgroup.Group{
{
Source: "tg",
Targets: []model.LabelSet{
{
model.LabelName("t1"): model.LabelValue("v1"),
},
{
model.LabelName("t2"): model.LabelValue("v2"),
},
},
Labels: model.LabelSet{
model.LabelName("l1"): model.LabelValue("lv1"),
},
},
}
tg2 := []*targetgroup.Group{
{
Source: "tg",
},
}
var i int
refresh := func(ctx context.Context) ([]*targetgroup.Group, error) {
i++
switch i {
case 1:
return tg1, nil
case 2:
return tg2, nil
}
return nil, fmt.Errorf("some error")
}
interval := time.Millisecond
d := NewDiscovery(nil, "test", interval, refresh)
ch := make(chan []*targetgroup.Group)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go d.Run(ctx, ch)
tg := <-ch
testutil.Equals(t, tg1, tg)
tg = <-ch
testutil.Equals(t, tg2, tg)
tick := time.NewTicker(2 * interval)
defer tick.Stop()
select {
case <-ch:
t.Fatal("Unexpected target group")
case <-tick.C:
}
}

View file

@ -17,6 +17,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"net/url" "net/url"
@ -24,12 +25,12 @@ import (
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level" conntrack "github.com/mwitkow/go-conntrack"
"github.com/mwitkow/go-conntrack" "github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus" config_util "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
config_util "github.com/prometheus/common/config" "github.com/prometheus/prometheus/discovery/refresh"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
) )
@ -41,27 +42,14 @@ const (
tritonLabelMachineBrand = tritonLabel + "machine_brand" tritonLabelMachineBrand = tritonLabel + "machine_brand"
tritonLabelMachineImage = tritonLabel + "machine_image" tritonLabelMachineImage = tritonLabel + "machine_image"
tritonLabelServerID = tritonLabel + "server_id" tritonLabelServerID = tritonLabel + "server_id"
namespace = "prometheus"
) )
var ( // DefaultSDConfig is the default Triton SD configuration.
refreshFailuresCount = prometheus.NewCounter( var DefaultSDConfig = SDConfig{
prometheus.CounterOpts{
Name: "prometheus_sd_triton_refresh_failures_total",
Help: "The number of Triton-SD scrape failures.",
})
refreshDuration = prometheus.NewSummary(
prometheus.SummaryOpts{
Name: "prometheus_sd_triton_refresh_duration_seconds",
Help: "The duration of a Triton-SD refresh in seconds.",
})
// DefaultSDConfig is the default Triton SD configuration.
DefaultSDConfig = SDConfig{
Port: 9163, Port: 9163,
RefreshInterval: model.Duration(60 * time.Second), RefreshInterval: model.Duration(60 * time.Second),
Version: 1, Version: 1,
} }
)
// SDConfig is the configuration for Triton based service discovery. // SDConfig is the configuration for Triton based service discovery.
type SDConfig struct { type SDConfig struct {
@ -84,27 +72,22 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if c.Account == "" { if c.Account == "" {
return fmt.Errorf("Triton SD configuration requires an account") return errors.New("triton SD configuration requires an account")
} }
if c.DNSSuffix == "" { if c.DNSSuffix == "" {
return fmt.Errorf("Triton SD configuration requires a dns_suffix") return errors.New("triton SD configuration requires a dns_suffix")
} }
if c.Endpoint == "" { if c.Endpoint == "" {
return fmt.Errorf("Triton SD configuration requires an endpoint") return errors.New("triton SD configuration requires an endpoint")
} }
if c.RefreshInterval <= 0 { if c.RefreshInterval <= 0 {
return fmt.Errorf("Triton SD configuration requires RefreshInterval to be a positive integer") return errors.New("triton SD configuration requires RefreshInterval to be a positive integer")
} }
return nil return nil
} }
func init() {
prometheus.MustRegister(refreshFailuresCount)
prometheus.MustRegister(refreshDuration)
}
// DiscoveryResponse models a JSON response from the Triton discovery. // DiscoveryResponse models a JSON response from the Triton discovery.
type DiscoveryResponse struct { type discoveryResponse struct {
Containers []struct { Containers []struct {
Groups []string `json:"groups"` Groups []string `json:"groups"`
ServerUUID string `json:"server_uuid"` ServerUUID string `json:"server_uuid"`
@ -118,18 +101,14 @@ type DiscoveryResponse struct {
// Discovery periodically performs Triton-SD requests. It implements // Discovery periodically performs Triton-SD requests. It implements
// the Discoverer interface. // the Discoverer interface.
type Discovery struct { type Discovery struct {
*refresh.Discovery
client *http.Client client *http.Client
interval time.Duration interval time.Duration
logger log.Logger
sdConfig *SDConfig sdConfig *SDConfig
} }
// New returns a new Discovery which periodically refreshes its targets. // New returns a new Discovery which periodically refreshes its targets.
func New(logger log.Logger, conf *SDConfig) (*Discovery, error) { func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
if logger == nil {
logger = log.NewNopLogger()
}
tls, err := config_util.NewTLSConfig(&conf.TLSConfig) tls, err := config_util.NewTLSConfig(&conf.TLSConfig)
if err != nil { if err != nil {
return nil, err return nil, err
@ -144,79 +123,55 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
} }
client := &http.Client{Transport: transport} client := &http.Client{Transport: transport}
return &Discovery{ d := &Discovery{
client: client, client: client,
interval: time.Duration(conf.RefreshInterval), interval: time.Duration(conf.RefreshInterval),
logger: logger,
sdConfig: conf, sdConfig: conf,
}, nil }
d.Discovery = refresh.NewDiscovery(
logger,
"triton",
time.Duration(conf.RefreshInterval),
d.refresh,
)
return d, nil
} }
// Run implements the Discoverer interface. func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
func (d *Discovery) Run(ctx context.Context, ch chan<- []*targetgroup.Group) {
defer close(ch)
ticker := time.NewTicker(d.interval)
defer ticker.Stop()
// Get an initial set right away.
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refreshing targets failed", "err", err)
} else {
ch <- []*targetgroup.Group{tg}
}
for {
select {
case <-ticker.C:
tg, err := d.refresh()
if err != nil {
level.Error(d.logger).Log("msg", "Refreshing targets failed", "err", err)
} else {
ch <- []*targetgroup.Group{tg}
}
case <-ctx.Done():
return
}
}
}
func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
t0 := time.Now()
defer func() {
refreshDuration.Observe(time.Since(t0).Seconds())
if err != nil {
refreshFailuresCount.Inc()
}
}()
var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version) var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
if len(d.sdConfig.Groups) > 0 { if len(d.sdConfig.Groups) > 0 {
groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ",")) groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ","))
endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups) endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups)
} }
tg = &targetgroup.Group{ tg := &targetgroup.Group{
Source: endpoint, Source: endpoint,
} }
resp, err := d.client.Get(endpoint) req, err := http.NewRequest("GET", endpoint, nil)
if err != nil { if err != nil {
return tg, fmt.Errorf("an error occurred when requesting targets from the discovery endpoint. %s", err) return nil, err
}
req = req.WithContext(ctx)
resp, err := d.client.Do(req)
if err != nil {
return nil, errors.Wrap(err, "an error occurred when requesting targets from the discovery endpoint")
} }
defer resp.Body.Close() defer func() {
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
}()
data, err := ioutil.ReadAll(resp.Body) data, err := ioutil.ReadAll(resp.Body)
if err != nil { if err != nil {
return tg, fmt.Errorf("an error occurred when reading the response body. %s", err) return nil, errors.Wrap(err, "an error occurred when reading the response body")
} }
dr := DiscoveryResponse{} dr := discoveryResponse{}
err = json.Unmarshal(data, &dr) err = json.Unmarshal(data, &dr)
if err != nil { if err != nil {
return tg, fmt.Errorf("an error occurred unmarshaling the disovery response json. %s", err) return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json")
} }
for _, container := range dr.Containers { for _, container := range dr.Containers {
@ -238,5 +193,5 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
tg.Targets = append(tg.Targets, labels) tg.Targets = append(tg.Targets, labels)
} }
return tg, nil return []*targetgroup.Group{tg}, nil
} }

View file

@ -23,11 +23,10 @@ import (
"strconv" "strconv"
"strings" "strings"
"testing" "testing"
"time"
"github.com/prometheus/common/config" "github.com/prometheus/common/config"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/util/testutil" "github.com/prometheus/prometheus/util/testutil"
) )
@ -67,8 +66,12 @@ var (
} }
) )
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
return New(nil, &c)
}
func TestTritonSDNew(t *testing.T) { func TestTritonSDNew(t *testing.T) {
td, err := New(nil, &conf) td, err := newTritonDiscovery(conf)
testutil.Ok(t, err) testutil.Ok(t, err)
testutil.Assert(t, td != nil, "") testutil.Assert(t, td != nil, "")
testutil.Assert(t, td.client != nil, "") testutil.Assert(t, td.client != nil, "")
@ -81,13 +84,13 @@ func TestTritonSDNew(t *testing.T) {
} }
func TestTritonSDNewBadConfig(t *testing.T) { func TestTritonSDNewBadConfig(t *testing.T) {
td, err := New(nil, &badconf) td, err := newTritonDiscovery(badconf)
testutil.NotOk(t, err, "") testutil.NotOk(t, err, "")
testutil.Assert(t, td == nil, "") testutil.Assert(t, td == nil, "")
} }
func TestTritonSDNewGroupsConfig(t *testing.T) { func TestTritonSDNewGroupsConfig(t *testing.T) {
td, err := New(nil, &groupsconf) td, err := newTritonDiscovery(groupsconf)
testutil.Ok(t, err) testutil.Ok(t, err)
testutil.Assert(t, td != nil, "") testutil.Assert(t, td != nil, "")
testutil.Assert(t, td.client != nil, "") testutil.Assert(t, td.client != nil, "")
@ -100,33 +103,6 @@ func TestTritonSDNewGroupsConfig(t *testing.T) {
testutil.Equals(t, groupsconf.Port, td.sdConfig.Port) testutil.Equals(t, groupsconf.Port, td.sdConfig.Port)
} }
func TestTritonSDRun(t *testing.T) {
var (
td, err = New(nil, &conf)
ch = make(chan []*targetgroup.Group)
ctx, cancel = context.WithCancel(context.Background())
)
testutil.Ok(t, err)
testutil.Assert(t, td != nil, "")
wait := make(chan struct{})
go func() {
td.Run(ctx, ch)
close(wait)
}()
select {
case <-time.After(60 * time.Millisecond):
// Expected.
case tgs := <-ch:
t.Fatalf("Unexpected target groups in triton discovery: %s", tgs)
}
cancel()
<-wait
}
func TestTritonSDRefreshNoTargets(t *testing.T) { func TestTritonSDRefreshNoTargets(t *testing.T) {
tgts := testTritonSDRefresh(t, "{\"containers\":[]}") tgts := testTritonSDRefresh(t, "{\"containers\":[]}")
testutil.Assert(t, tgts == nil, "") testutil.Assert(t, tgts == nil, "")
@ -160,21 +136,29 @@ func TestTritonSDRefreshMultipleTargets(t *testing.T) {
func TestTritonSDRefreshNoServer(t *testing.T) { func TestTritonSDRefreshNoServer(t *testing.T) {
var ( var (
td, err = New(nil, &conf) td, _ = newTritonDiscovery(conf)
) )
testutil.Ok(t, err)
testutil.Assert(t, td != nil, "")
tg, rerr := td.refresh() _, err := td.refresh(context.Background())
testutil.NotOk(t, rerr, "") testutil.NotOk(t, err, "")
testutil.Equals(t, strings.Contains(rerr.Error(), "an error occurred when requesting targets from the discovery endpoint."), true) testutil.Equals(t, strings.Contains(err.Error(), "an error occurred when requesting targets from the discovery endpoint"), true)
testutil.Assert(t, tg != nil, "") }
testutil.Assert(t, tg.Targets == nil, "")
func TestTritonSDRefreshCancelled(t *testing.T) {
var (
td, _ = newTritonDiscovery(conf)
)
ctx, cancel := context.WithCancel(context.Background())
cancel()
_, err := td.refresh(ctx)
testutil.NotOk(t, err, "")
testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
} }
func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet { func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet {
var ( var (
td, err = New(nil, &conf) td, _ = newTritonDiscovery(conf)
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, dstr) fmt.Fprintln(w, dstr)
})) }))
@ -182,26 +166,25 @@ func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet {
defer s.Close() defer s.Close()
u, uperr := url.Parse(s.URL) u, err := url.Parse(s.URL)
testutil.Ok(t, uperr) testutil.Ok(t, err)
testutil.Assert(t, u != nil, "") testutil.Assert(t, u != nil, "")
host, strport, sherr := net.SplitHostPort(u.Host) host, strport, err := net.SplitHostPort(u.Host)
testutil.Ok(t, sherr) testutil.Ok(t, err)
testutil.Assert(t, host != "", "") testutil.Assert(t, host != "", "")
testutil.Assert(t, strport != "", "") testutil.Assert(t, strport != "", "")
port, atoierr := strconv.Atoi(strport) port, err := strconv.Atoi(strport)
testutil.Ok(t, atoierr) testutil.Ok(t, err)
testutil.Assert(t, port != 0, "") testutil.Assert(t, port != 0, "")
td.sdConfig.Port = port td.sdConfig.Port = port
tgs, err := td.refresh(context.Background())
testutil.Ok(t, err) testutil.Ok(t, err)
testutil.Assert(t, td != nil, "") testutil.Equals(t, 1, len(tgs))
tg := tgs[0]
tg, err := td.refresh()
testutil.Ok(t, err)
testutil.Assert(t, tg != nil, "") testutil.Assert(t, tg != nil, "")
return tg.Targets return tg.Targets

View file

@ -23,6 +23,7 @@ import (
"time" "time"
"github.com/go-kit/kit/log" "github.com/go-kit/kit/log"
"github.com/pkg/errors"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/samuel/go-zookeeper/zk" "github.com/samuel/go-zookeeper/zk"
@ -58,14 +59,14 @@ func (c *ServersetSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
return err return err
} }
if len(c.Servers) == 0 { if len(c.Servers) == 0 {
return fmt.Errorf("serverset SD config must contain at least one Zookeeper server") return errors.New("serverset SD config must contain at least one Zookeeper server")
} }
if len(c.Paths) == 0 { if len(c.Paths) == 0 {
return fmt.Errorf("serverset SD config must contain at least one path") return errors.New("serverset SD config must contain at least one path")
} }
for _, path := range c.Paths { for _, path := range c.Paths {
if !strings.HasPrefix(path, "/") { if !strings.HasPrefix(path, "/") {
return fmt.Errorf("serverset SD config paths must begin with '/': %s", path) return errors.Errorf("serverset SD config paths must begin with '/': %s", path)
} }
} }
return nil return nil
@ -87,14 +88,14 @@ func (c *NerveSDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
return err return err
} }
if len(c.Servers) == 0 { if len(c.Servers) == 0 {
return fmt.Errorf("nerve SD config must contain at least one Zookeeper server") return errors.New("nerve SD config must contain at least one Zookeeper server")
} }
if len(c.Paths) == 0 { if len(c.Paths) == 0 {
return fmt.Errorf("nerve SD config must contain at least one path") return errors.New("nerve SD config must contain at least one path")
} }
for _, path := range c.Paths { for _, path := range c.Paths {
if !strings.HasPrefix(path, "/") { if !strings.HasPrefix(path, "/") {
return fmt.Errorf("nerve SD config paths must begin with '/': %s", path) return errors.Errorf("nerve SD config paths must begin with '/': %s", path)
} }
} }
return nil return nil
@ -223,7 +224,7 @@ func parseServersetMember(data []byte, path string) (model.LabelSet, error) {
member := serversetMember{} member := serversetMember{}
if err := json.Unmarshal(data, &member); err != nil { if err := json.Unmarshal(data, &member); err != nil {
return nil, fmt.Errorf("error unmarshaling serverset member %q: %s", path, err) return nil, errors.Wrapf(err, "error unmarshaling serverset member %q", path)
} }
labels := model.LabelSet{} labels := model.LabelSet{}
@ -265,7 +266,7 @@ func parseNerveMember(data []byte, path string) (model.LabelSet, error) {
member := nerveMember{} member := nerveMember{}
err := json.Unmarshal(data, &member) err := json.Unmarshal(data, &member)
if err != nil { if err != nil {
return nil, fmt.Errorf("error unmarshaling nerve member %q: %s", path, err) return nil, errors.Wrapf(err, "error unmarshaling nerve member %q", path)
} }
labels := model.LabelSet{} labels := model.LabelSet{}

View file

@ -22,7 +22,7 @@ An example rules file with an alert would be:
groups: groups:
- name: example - name: example
rules: rules:
- alert: HighErrorRate - alert: HighRequestLatency
expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5 expr: job:request_latency_seconds:mean5m{job="myjob"} > 0.5
for: 10m for: 10m
labels: labels:
@ -42,9 +42,11 @@ The `annotations` clause specifies a set of informational labels that can be use
#### Templating #### Templating
Label and annotation values can be templated using [console templates](https://prometheus.io/docs/visualization/consoles). Label and annotation values can be templated using [console
The `$labels` variable holds the label key/value pairs of an alert instance templates](https://prometheus.io/docs/visualization/consoles). The `$labels`
and `$value` holds the evaluated value of an alert instance. variable holds the label key/value pairs of an alert instance. The configured
external labels can be accessed via the `$externalLabels` variable. The
`$value` variable holds the evaluated value of an alert instance.
# To insert a firing element's label values: # To insert a firing element's label values:
{{ $labels.<labelname> }} {{ $labels.<labelname> }}

View file

@ -24,7 +24,7 @@ This will also reload any configured rule files.
To specify which configuration file to load, use the `--config.file` flag. To specify which configuration file to load, use the `--config.file` flag.
The file is written in [YAML format](http://en.wikipedia.org/wiki/YAML), The file is written in [YAML format](https://en.wikipedia.org/wiki/YAML),
defined by the scheme described below. defined by the scheme described below.
Brackets indicate that a parameter is optional. For non-list parameters the Brackets indicate that a parameter is optional. For non-list parameters the
value is set to the specified default. value is set to the specified default.
@ -135,6 +135,16 @@ job_name: <job_name>
# when a time series does not have a given label yet and are ignored otherwise. # when a time series does not have a given label yet and are ignored otherwise.
[ honor_labels: <boolean> | default = false ] [ honor_labels: <boolean> | default = false ]
# honor_timestamps controls whether Prometheus respects the timestamps present
# in scraped data.
#
# If honor_timestamps is set to "true", the timestamps of the metrics exposed
# by the target will be used.
#
# If honor_timestamps is set to "false", the timestamps of the metrics exposed
# by the target will be ignored.
[ honor_timestamps: <boolean> | default = true ]
# Configures the protocol scheme used for requests. # Configures the protocol scheme used for requests.
[ scheme: <scheme> | default = http ] [ scheme: <scheme> | default = http ]
@ -246,7 +256,7 @@ A `tls_config` allows configuring TLS connections.
[ key_file: <filename> ] [ key_file: <filename> ]
# ServerName extension to indicate the name of the server. # ServerName extension to indicate the name of the server.
# http://tools.ietf.org/html/rfc4366#section-3.1 # https://tools.ietf.org/html/rfc4366#section-3.1
[ server_name: <string> ] [ server_name: <string> ]
# Disable validation of the server certificate. # Disable validation of the server certificate.
@ -264,9 +274,12 @@ The following meta labels are available on targets during relabeling:
* `__meta_azure_machine_name`: the machine name * `__meta_azure_machine_name`: the machine name
* `__meta_azure_machine_os_type`: the machine operating system * `__meta_azure_machine_os_type`: the machine operating system
* `__meta_azure_machine_private_ip`: the machine's private IP * `__meta_azure_machine_private_ip`: the machine's private IP
* `__meta_azure_machine_public_ip`: the machine's public IP if it exists
* `__meta_azure_machine_resource_group`: the machine's resource group * `__meta_azure_machine_resource_group`: the machine's resource group
* `__meta_azure_machine_tag_<tagname>`: each tag value of the machine * `__meta_azure_machine_tag_<tagname>`: each tag value of the machine
* `__meta_azure_machine_scale_set`: the name of the scale set which the vm is part of (this value is only set if you are using a [scale set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/)) * `__meta_azure_machine_scale_set`: the name of the scale set which the vm is part of (this value is only set if you are using a [scale set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/))
* `__meta_azure_subscription_id`: the subscription ID
* `__meta_azure_tenant_id`: the tenant ID
See below for the configuration options for Azure discovery: See below for the configuration options for Azure discovery:
@ -274,14 +287,18 @@ See below for the configuration options for Azure discovery:
# The information to access the Azure API. # The information to access the Azure API.
# The Azure environment. # The Azure environment.
[ environment: <string> | default = AzurePublicCloud ] [ environment: <string> | default = AzurePublicCloud ]
# The subscription ID.
# The authentication method, either OAuth or ManagedIdentity.
# See https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview
[ authentication_method: <string> | default = OAuth]
# The subscription ID. Always required.
subscription_id: <string> subscription_id: <string>
# The tenant ID. # Optional tenant ID. Only required with authentication_method OAuth.
tenant_id: <string> [ tenant_id: <string> ]
# The client ID. # Optional client ID. Only required with authentication_method OAuth.
client_id: <string> [ client_id: <string> ]
# The client secret. # Optional client secret. Only required with authentication_method OAuth.
client_secret: <secret> [ client_secret: <secret> ]
# Refresh interval to re-read the instance list. # Refresh interval to re-read the instance list.
[ refresh_interval: <duration> | default = 300s ] [ refresh_interval: <duration> | default = 300s ]
@ -300,6 +317,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_consul_address`: the address of the target * `__meta_consul_address`: the address of the target
* `__meta_consul_dc`: the datacenter name for the target * `__meta_consul_dc`: the datacenter name for the target
* `__meta_consul_tagged_address_<key>`: each node tagged address key value of the target
* `__meta_consul_metadata_<key>`: each node metadata key value of the target * `__meta_consul_metadata_<key>`: each node metadata key value of the target
* `__meta_consul_node`: the node name defined for the target * `__meta_consul_node`: the node name defined for the target
* `__meta_consul_service_address`: the service address of the target * `__meta_consul_service_address`: the service address of the target
@ -330,8 +348,9 @@ services:
# See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more # See https://www.consul.io/api/catalog.html#list-nodes-for-service to know more
# about the possible filters that can be used. # about the possible filters that can be used.
# An optional tag used to filter nodes for a given service. # An optional list of tags used to filter nodes for a given service. Services must contain all tags in the list.
[ tag: <string> ] tags:
[ - <string> ]
# Node metadata used to filter nodes for a given service. # Node metadata used to filter nodes for a given service.
[ node_meta: [ node_meta:
@ -340,7 +359,7 @@ services:
# The string by which Consul tags are joined into the tag label. # The string by which Consul tags are joined into the tag label.
[ tag_separator: <string> | default = , ] [ tag_separator: <string> | default = , ]
# Allow stale Consul results (see https://www.consul.io/api/index.html#consistency-modes). Will reduce load on Consul. # Allow stale Consul results (see https://www.consul.io/api/features/consistency.html). Will reduce load on Consul.
[ allow_stale: <bool> ] [ allow_stale: <bool> ]
# The time after which the provided names are refreshed. # The time after which the provided names are refreshed.
@ -407,6 +426,7 @@ The following meta labels are available on targets during [relabeling](#relabel_
* `__meta_ec2_owner_id`: the ID of the AWS account that owns the EC2 instance * `__meta_ec2_owner_id`: the ID of the AWS account that owns the EC2 instance
* `__meta_ec2_platform`: the Operating System platform, set to 'windows' on Windows servers, absent otherwise * `__meta_ec2_platform`: the Operating System platform, set to 'windows' on Windows servers, absent otherwise
* `__meta_ec2_primary_subnet_id`: the subnet ID of the primary network interface, if available * `__meta_ec2_primary_subnet_id`: the subnet ID of the primary network interface, if available
* `__meta_ec2_private_dns_name`: the private DNS name of the instance, if available
* `__meta_ec2_private_ip`: the private IP address of the instance, if present * `__meta_ec2_private_ip`: the private IP address of the instance, if present
* `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available * `__meta_ec2_public_dns_name`: the public DNS name of the instance, if available
* `__meta_ec2_public_ip`: the public IP address of the instance, if available * `__meta_ec2_public_ip`: the public IP address of the instance, if available
@ -484,14 +504,16 @@ interface.
The following meta labels are available on targets during [relabeling](#relabel_config): The following meta labels are available on targets during [relabeling](#relabel_config):
* `__meta_openstack_address_pool`: the pool of the private IP.
* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance.
* `__meta_openstack_instance_id`: the OpenStack instance ID. * `__meta_openstack_instance_id`: the OpenStack instance ID.
* `__meta_openstack_instance_name`: the OpenStack instance name. * `__meta_openstack_instance_name`: the OpenStack instance name.
* `__meta_openstack_instance_status`: the status of the OpenStack instance. * `__meta_openstack_instance_status`: the status of the OpenStack instance.
* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance.
* `__meta_openstack_public_ip`: the public IP of the OpenStack instance.
* `__meta_openstack_private_ip`: the private IP of the OpenStack instance. * `__meta_openstack_private_ip`: the private IP of the OpenStack instance.
* `__meta_openstack_address_pool`: the pool of the private IP. * `__meta_openstack_project_id`: the project (tenant) owning this instance.
* `__meta_openstack_public_ip`: the public IP of the OpenStack instance.
* `__meta_openstack_tag_<tagkey>`: each tag value of the instance. * `__meta_openstack_tag_<tagkey>`: each tag value of the instance.
* `__meta_openstack_user_id`: the user account owning the tenant.
See below for the configuration options for OpenStack discovery: See below for the configuration options for OpenStack discovery:
@ -532,6 +554,17 @@ region: <string>
[ project_name: <string> ] [ project_name: <string> ]
[ project_id: <string> ] [ project_id: <string> ]
# The application_credential_id or application_credential_name fields are
# required if using an application credential to authenticate. Some providers
# allow you to create an application credential to authenticate rather than a
# password.
[ application_credential_name: <string> ]
[ application_credential_id: <string> ]
# The application_credential_secret field is required if using an application
# credential to authenticate.
[ application_credential_secret: <secret> ]
# Whether the service discovery should list all instances for all projects. # Whether the service discovery should list all instances for all projects.
# It is only relevant for the 'instance' role and usually requires admin permissions. # It is only relevant for the 'instance' role and usually requires admin permissions.
[ all_tenants: <boolean> | default: false ] [ all_tenants: <boolean> | default: false ]
@ -659,7 +692,7 @@ service account and place the credential file in one of the expected locations.
### `<kubernetes_sd_config>` ### `<kubernetes_sd_config>`
Kubernetes SD configurations allow retrieving scrape targets from Kubernetes SD configurations allow retrieving scrape targets from
[Kubernetes'](http://kubernetes.io/) REST API and always staying synchronized with [Kubernetes'](https://kubernetes.io/) REST API and always staying synchronized with
the cluster state. the cluster state.
One of the following `role` types can be configured to discover targets: One of the following `role` types can be configured to discover targets:
@ -676,7 +709,9 @@ Available meta labels:
* `__meta_kubernetes_node_name`: The name of the node object. * `__meta_kubernetes_node_name`: The name of the node object.
* `__meta_kubernetes_node_label_<labelname>`: Each label from the node object. * `__meta_kubernetes_node_label_<labelname>`: Each label from the node object.
* `__meta_kubernetes_node_labelpresent_<labelname>`: `true` for each label from the node object.
* `__meta_kubernetes_node_annotation_<annotationname>`: Each annotation from the node object. * `__meta_kubernetes_node_annotation_<annotationname>`: Each annotation from the node object.
* `__meta_kubernetes_node_annotationpresent_<annotationname>`: `true` for each annotation from the node object.
* `__meta_kubernetes_node_address_<address_type>`: The first address for each node address type, if it exists. * `__meta_kubernetes_node_address_<address_type>`: The first address for each node address type, if it exists.
In addition, the `instance` label for the node will be set to the node name In addition, the `instance` label for the node will be set to the node name
@ -692,9 +727,13 @@ service port.
Available meta labels: Available meta labels:
* `__meta_kubernetes_namespace`: The namespace of the service object. * `__meta_kubernetes_namespace`: The namespace of the service object.
* `__meta_kubernetes_service_annotation_<annotationname>`: Each annotation from the service object.
* `__meta_kubernetes_service_annotationpresent_<annotationname>`: "true" for each annotation of the service object.
* `__meta_kubernetes_service_cluster_ip`: The cluster IP address of the service. (Does not apply to services of type ExternalName)
* `__meta_kubernetes_service_external_name`: The DNS name of the service. (Applies to services of type ExternalName)
* `__meta_kubernetes_service_label_<labelname>`: Each label from the service object.
* `__meta_kubernetes_service_labelpresent_<labelname>`: `true` for each label of the service object.
* `__meta_kubernetes_service_name`: The name of the service object. * `__meta_kubernetes_service_name`: The name of the service object.
* `__meta_kubernetes_service_label_<labelname>`: The label of the service object.
* `__meta_kubernetes_service_annotation_<annotationname>`: The annotation of the service object.
* `__meta_kubernetes_service_port_name`: Name of the service port for the target. * `__meta_kubernetes_service_port_name`: Name of the service port for the target.
* `__meta_kubernetes_service_port_number`: Number of the service port for the target. * `__meta_kubernetes_service_port_number`: Number of the service port for the target.
* `__meta_kubernetes_service_port_protocol`: Protocol of the service port for the target. * `__meta_kubernetes_service_port_protocol`: Protocol of the service port for the target.
@ -710,8 +749,11 @@ Available meta labels:
* `__meta_kubernetes_namespace`: The namespace of the pod object. * `__meta_kubernetes_namespace`: The namespace of the pod object.
* `__meta_kubernetes_pod_name`: The name of the pod object. * `__meta_kubernetes_pod_name`: The name of the pod object.
* `__meta_kubernetes_pod_ip`: The pod IP of the pod object. * `__meta_kubernetes_pod_ip`: The pod IP of the pod object.
* `__meta_kubernetes_pod_label_<labelname>`: The label of the pod object. * `__meta_kubernetes_pod_label_<labelname>`: Each label from the pod object.
* `__meta_kubernetes_pod_annotation_<annotationname>`: The annotation of the pod object. * `__meta_kubernetes_pod_labelpresent_<labelname>`: `true`for each label from the pod object.
* `__meta_kubernetes_pod_annotation_<annotationname>`: Each annotation from the pod object.
* `__meta_kubernetes_pod_annotationpresent_<annotationname>`: `true` for each annotation from the pod object.
* `__meta_kubernetes_pod_container_init`: `true` if the container is an [InitContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/)
* `__meta_kubernetes_pod_container_name`: Name of the container the target address points to. * `__meta_kubernetes_pod_container_name`: Name of the container the target address points to.
* `__meta_kubernetes_pod_container_port_name`: Name of the container port. * `__meta_kubernetes_pod_container_port_name`: Name of the container port.
* `__meta_kubernetes_pod_container_port_number`: Number of the container port. * `__meta_kubernetes_pod_container_port_number`: Number of the container port.
@ -737,6 +779,8 @@ Available meta labels:
* `__meta_kubernetes_endpoints_name`: The names of the endpoints object. * `__meta_kubernetes_endpoints_name`: The names of the endpoints object.
* For all targets discovered directly from the endpoints list (those not additionally inferred * For all targets discovered directly from the endpoints list (those not additionally inferred
from underlying pods), the following labels are attached: from underlying pods), the following labels are attached:
* `__meta_kubernetes_endpoint_hostname`: Hostname of the endpoint.
* `__meta_kubernetes_endpoint_node_name`: Name of the node hosting the endpoint.
* `__meta_kubernetes_endpoint_ready`: Set to `true` or `false` for the endpoint's ready state. * `__meta_kubernetes_endpoint_ready`: Set to `true` or `false` for the endpoint's ready state.
* `__meta_kubernetes_endpoint_port_name`: Name of the endpoint port. * `__meta_kubernetes_endpoint_port_name`: Name of the endpoint port.
* `__meta_kubernetes_endpoint_port_protocol`: Protocol of the endpoint port. * `__meta_kubernetes_endpoint_port_protocol`: Protocol of the endpoint port.
@ -755,8 +799,10 @@ Available meta labels:
* `__meta_kubernetes_namespace`: The namespace of the ingress object. * `__meta_kubernetes_namespace`: The namespace of the ingress object.
* `__meta_kubernetes_ingress_name`: The name of the ingress object. * `__meta_kubernetes_ingress_name`: The name of the ingress object.
* `__meta_kubernetes_ingress_label_<labelname>`: The label of the ingress object. * `__meta_kubernetes_ingress_label_<labelname>`: Each label from the ingress object.
* `__meta_kubernetes_ingress_annotation_<annotationname>`: The annotation of the ingress object. * `__meta_kubernetes_ingress_labelpresent_<labelname>`: `true` for each label from the ingress object.
* `__meta_kubernetes_ingress_annotation_<annotationname>`: Each annotation from the ingress object.
* `__meta_kubernetes_ingress_annotationpresent_<annotationname>`: `true` for each annotation from the ingress object.
* `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, `https` if TLS * `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, `https` if TLS
config is set. Defaults to `http`. config is set. Defaults to `http`.
* `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`. * `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`.
@ -791,6 +837,9 @@ basic_auth:
# Optional bearer token file authentication information. # Optional bearer token file authentication information.
[ bearer_token_file: <filename> ] [ bearer_token_file: <filename> ]
# Optional proxy URL.
[ proxy_url: <string> ]
# TLS configuration. # TLS configuration.
tls_config: tls_config:
[ <tls_config> ] [ <tls_config> ]
@ -914,7 +963,7 @@ Serverset SD configurations allow retrieving scrape targets from [Serversets]
(https://github.com/twitter/finagle/tree/master/finagle-serversets) which are (https://github.com/twitter/finagle/tree/master/finagle-serversets) which are
stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are commonly stored in [Zookeeper](https://zookeeper.apache.org/). Serversets are commonly
used by [Finagle](https://twitter.github.io/finagle/) and used by [Finagle](https://twitter.github.io/finagle/) and
[Aurora](http://aurora.apache.org/). [Aurora](https://aurora.apache.org/).
The following meta labels are available on targets during relabeling: The following meta labels are available on targets during relabeling:
@ -974,7 +1023,7 @@ groups:
# The port to use for discovery and metric scraping. # The port to use for discovery and metric scraping.
[ port: <int> | default = 9163 ] [ port: <int> | default = 9163 ]
# The interval which should should be used for refreshing target containers. # The interval which should be used for refreshing target containers.
[ refresh_interval: <duration> | default = 60s ] [ refresh_interval: <duration> | default = 60s ]
# The Triton discovery API version. # The Triton discovery API version.
@ -1020,7 +1069,8 @@ Additional labels prefixed with `__meta_` may be available during the
relabeling phase. They are set by the service discovery mechanism that provided relabeling phase. They are set by the service discovery mechanism that provided
the target and vary between mechanisms. the target and vary between mechanisms.
Labels starting with `__` will be removed from the label set after relabeling is completed. Labels starting with `__` will be removed from the label set after target
relabeling is completed.
If a relabeling step needs to store a label value only temporarily (as the If a relabeling step needs to store a label value only temporarily (as the
input to a subsequent relabeling step), use the `__tmp` label name prefix. This input to a subsequent relabeling step), use the `__tmp` label name prefix. This
@ -1242,12 +1292,12 @@ queue_config:
[ capacity: <int> | default = 10000 ] [ capacity: <int> | default = 10000 ]
# Maximum number of shards, i.e. amount of concurrency. # Maximum number of shards, i.e. amount of concurrency.
[ max_shards: <int> | default = 1000 ] [ max_shards: <int> | default = 1000 ]
# Minimum number of shards, i.e. amount of concurrency.
[ min_shards: <int> | default = 1 ]
# Maximum number of samples per send. # Maximum number of samples per send.
[ max_samples_per_send: <int> | default = 100] [ max_samples_per_send: <int> | default = 100]
# Maximum time a sample will wait in buffer. # Maximum time a sample will wait in buffer.
[ batch_send_deadline: <duration> | default = 5s ] [ batch_send_deadline: <duration> | default = 5s ]
# Maximum number of times to retry a batch on recoverable errors.
[ max_retries: <int> | default = 3 ]
# Initial retry delay. Gets doubled for every retry. # Initial retry delay. Gets doubled for every retry.
[ min_backoff: <duration> | default = 30ms ] [ min_backoff: <duration> | default = 30ms ]
# Maximum retry delay. # Maximum retry delay.

View file

@ -9,7 +9,7 @@ Prometheus supports templating in the annotations and labels of alerts,
as well as in served console pages. Templates have the ability to run as well as in served console pages. Templates have the ability to run
queries against the local database, iterate over data, use conditionals, queries against the local database, iterate over data, use conditionals,
format data, etc. The Prometheus templating language is based on the [Go format data, etc. The Prometheus templating language is based on the [Go
templating](http://golang.org/pkg/text/template/) system. templating](https://golang.org/pkg/text/template/) system.
## Simple alert field templates ## Simple alert field templates
@ -60,7 +60,7 @@ formatting of results, and linking to the [expression browser](https://prometheu
```go ```go
{{ with printf "node_memory_MemTotal{job='node',instance='%s'}" .Params.instance | query }} {{ with printf "node_memory_MemTotal{job='node',instance='%s'}" .Params.instance | query }}
{{ . | first | value | humanize1024}}B {{ . | first | value | humanize1024 }}B
{{ end }} {{ end }}
``` ```
@ -80,7 +80,7 @@ If accessed as `console.html?instance=hostname`, `.Params.instance` will evaluat
<td>Transmitted</td> <td>Transmitted</td>
<td>{{ with printf "rate(node_network_transmit_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}</td> <td>{{ with printf "rate(node_network_transmit_bytes{job='node',instance='%s',device='%s'}[5m])" .Labels.instance .Labels.device | query }}{{ . | first | value | humanize }}B/s{{end}}</td>
</tr>{{ end }} </tr>{{ end }}
<table> </table>
``` ```
Here we iterate over all network devices and display the network traffic for each. Here we iterate over all network devices and display the network traffic for each.

View file

@ -9,7 +9,7 @@ Prometheus supports templating in the annotations and labels of alerts,
as well as in served console pages. Templates have the ability to run as well as in served console pages. Templates have the ability to run
queries against the local database, iterate over data, use conditionals, queries against the local database, iterate over data, use conditionals,
format data, etc. The Prometheus templating language is based on the [Go format data, etc. The Prometheus templating language is based on the [Go
templating](http://golang.org/pkg/text/template/) system. templating](https://golang.org/pkg/text/template/) system.
## Data Structures ## Data Structures
@ -31,7 +31,7 @@ The metric name of the sample is encoded in a special `__name__` label in the `L
## Functions ## Functions
In addition to the [default In addition to the [default
functions](http://golang.org/pkg/text/template/#hdr-Functions) provided by Go functions](https://golang.org/pkg/text/template/#hdr-Functions) provided by Go
templating, Prometheus provides functions for easier processing of query templating, Prometheus provides functions for easier processing of query
results in templates. results in templates.
@ -53,9 +53,10 @@ If functions are used in a pipeline, the pipeline value is passed as the last ar
| Name | Arguments | Returns | Notes | | Name | Arguments | Returns | Notes |
| ------------- | --------------| --------| --------- | | ------------- | --------------| --------| --------- |
| humanize | number | string | Converts a number to a more readable format, using [metric prefixes](http://en.wikipedia.org/wiki/Metric_prefix). | humanize | number | string | Converts a number to a more readable format, using [metric prefixes](https://en.wikipedia.org/wiki/Metric_prefix).
| humanize1024 | number | string | Like `humanize`, but uses 1024 as the base rather than 1000. | | humanize1024 | number | string | Like `humanize`, but uses 1024 as the base rather than 1000. |
| humanizeDuration | number | string | Converts a duration in seconds to a more readable format. | | humanizeDuration | number | string | Converts a duration in seconds to a more readable format. |
| humanizePercentage | number | string | Converts a ratio value to a fraction of 100. |
| humanizeTimestamp | number | string | Converts a Unix timestamp in seconds to a more readable format. | | humanizeTimestamp | number | string | Converts a Unix timestamp in seconds to a more readable format. |
Humanizing functions are intended to produce reasonable output for consumption Humanizing functions are intended to produce reasonable output for consumption
@ -66,11 +67,11 @@ versions.
| Name | Arguments | Returns | Notes | | Name | Arguments | Returns | Notes |
| ------------- | ------------- | ------- | ----------- | | ------------- | ------------- | ------- | ----------- |
| title | string | string | [strings.Title](http://golang.org/pkg/strings/#Title), capitalises first character of each word.| | title | string | string | [strings.Title](https://golang.org/pkg/strings/#Title), capitalises first character of each word.|
| toUpper | string | string | [strings.ToUpper](http://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.| | toUpper | string | string | [strings.ToUpper](https://golang.org/pkg/strings/#ToUpper), converts all characters to upper case.|
| toLower | string | string | [strings.ToLower](http://golang.org/pkg/strings/#ToLower), converts all characters to lower case.| | toLower | string | string | [strings.ToLower](https://golang.org/pkg/strings/#ToLower), converts all characters to lower case.|
| match | pattern, text | boolean | [regexp.MatchString](http://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. | | match | pattern, text | boolean | [regexp.MatchString](https://golang.org/pkg/regexp/#MatchString) Tests for a unanchored regexp match. |
| reReplaceAll | pattern, replacement, text | string | [Regexp.ReplaceAllString](http://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. | | reReplaceAll | pattern, replacement, text | string | [Regexp.ReplaceAllString](https://golang.org/pkg/regexp/#Regexp.ReplaceAllString) Regexp substitution, unanchored. |
| graphLink | expr | string | Returns path to graph view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. | | graphLink | expr | string | Returns path to graph view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. |
| tableLink | expr | string | Returns path to tabular ("Console") view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. | | tableLink | expr | string | Returns path to tabular ("Console") view in the [expression browser](https://prometheus.io/docs/visualization/browser/) for the expression. |
@ -89,8 +90,10 @@ parameterize templates, and have a few other differences.
### Alert field templates ### Alert field templates
`.Value` and `.Labels` contain the alert value and labels. They are also exposed `.Value`, `.Labels`, and `ExternalLabels` contain the alert value, the alert
as the `$value` and `$labels` variables for convenience. labels, and the globally configured external labels, respectively. They are
also exposed as the `$value`, `$labels`, and `$externalLabels` variables for
convenience.
### Console templates ### Console templates
@ -98,13 +101,15 @@ Consoles are exposed on `/consoles/`, and sourced from the directory pointed to
by the `-web.console.templates` flag. by the `-web.console.templates` flag.
Console templates are rendered with Console templates are rendered with
[html/template](http://golang.org/pkg/html/template/), which provides [html/template](https://golang.org/pkg/html/template/), which provides
auto-escaping. To bypass the auto-escaping use the `safe*` functions., auto-escaping. To bypass the auto-escaping use the `safe*` functions.,
URL parameters are available as a map in `.Params`. To access multiple URL URL parameters are available as a map in `.Params`. To access multiple URL
parameters by the same name, `.RawParams` is a map of the list values for each parameters by the same name, `.RawParams` is a map of the list values for each
parameter. The URL path is available in `.Path`, excluding the `/consoles/` parameter. The URL path is available in `.Path`, excluding the `/consoles/`
prefix. prefix. The globally configured external labels are available as
`.ExternalLabels`. There are also convenience variables for all four:
`$rawParams`, `$params`, `$path`, and `$externalLabels`.
Consoles also have access to all the templates defined with `{{define Consoles also have access to all the templates defined with `{{define
"templateName"}}...{{end}}` found in `*.lib` files in the directory pointed to "templateName"}}...{{end}}` found in `*.lib` files in the directory pointed to

View file

@ -18,7 +18,7 @@ You can use `promtool` to test your rules.
## Test file format ## Test file format
```yaml ```yaml
# This is a list of rule files to consider for testing. # This is a list of rule files to consider for testing. Globs are supported.
rule_files: rule_files:
[ - <file_name> ] [ - <file_name> ]

View file

@ -222,8 +222,7 @@ groups:
expr: avg(rate(rpc_durations_seconds_count[5m])) by (job, service) expr: avg(rate(rpc_durations_seconds_count[5m])) by (job, service)
``` ```
To make Prometheus pick up this new rule, add a `rule_files` statement to the To make Prometheus pick up this new rule, add a `rule_files` statement in your `prometheus.yml`. The config should now
`global` configuration section in your `prometheus.yml`. The config should now
look like this: look like this:
```yaml ```yaml

View file

@ -2,12 +2,12 @@
# todo: internal # todo: internal
--- ---
# Prometheus 2.0 # Prometheus
Welcome to the documentation of the Prometheus server. Welcome to the documentation of the Prometheus server.
The documentation is available alongside all the project documentation at The documentation is available alongside all the project documentation at
[prometheus.io](https://prometheus.io/docs/prometheus/2.0/). [prometheus.io](https://prometheus.io/docs/prometheus/latest/).
## Content ## Content

View file

@ -20,7 +20,7 @@ the respective repository.
All Prometheus services are available as Docker images on All Prometheus services are available as Docker images on
[Quay.io](https://quay.io/repository/prometheus/prometheus) or [Quay.io](https://quay.io/repository/prometheus/prometheus) or
[Docker Hub[(https://hub.docker.com/u/prom/). [Docker Hub](https://hub.docker.com/u/prom/).
Running Prometheus on Docker is as simple as `docker run -p 9090:9090 Running Prometheus on Docker is as simple as `docker run -p 9090:9090
prom/prometheus`. This starts Prometheus with a sample prom/prometheus`. This starts Prometheus with a sample

View file

@ -18,12 +18,16 @@ and one of the following HTTP response codes:
- `400 Bad Request` when parameters are missing or incorrect. - `400 Bad Request` when parameters are missing or incorrect.
- `422 Unprocessable Entity` when an expression can't be executed - `422 Unprocessable Entity` when an expression can't be executed
([RFC4918](http://tools.ietf.org/html/rfc4918#page-78)). ([RFC4918](https://tools.ietf.org/html/rfc4918#page-78)).
- `503 Service Unavailable` when queries time out or abort. - `503 Service Unavailable` when queries time out or abort.
Other non-`2xx` codes may be returned for errors occurring before the API Other non-`2xx` codes may be returned for errors occurring before the API
endpoint is reached. endpoint is reached.
An array of warnings may be returned if there are errors that do
not inhibit the request execution. All of the data that was successfully
collected will be returned in the data field.
The JSON response envelope format is as follows: The JSON response envelope format is as follows:
``` ```
@ -34,7 +38,11 @@ The JSON response envelope format is as follows:
// Only set if status is "error". The data field may still hold // Only set if status is "error". The data field may still hold
// additional data. // additional data.
"errorType": "<string>", "errorType": "<string>",
"error": "<string>" "error": "<string>",
// Only if there were warnings while executing the request.
// There will still be data in the data field.
"warnings": ["<string>"]
} }
``` ```
@ -66,6 +74,7 @@ The following endpoint evaluates an instant query at a single point in time:
``` ```
GET /api/v1/query GET /api/v1/query
POST /api/v1/query
``` ```
URL query parameters: URL query parameters:
@ -77,6 +86,10 @@ URL query parameters:
The current server time is used if the `time` parameter is omitted. The current server time is used if the `time` parameter is omitted.
You can URL-encode these parameters directly in the request body by using the `POST` method and
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
query that may breach server-side URL character limits.
The `data` section of the query result has the following format: The `data` section of the query result has the following format:
``` ```
@ -127,6 +140,7 @@ The following endpoint evaluates an expression query over a range of time:
``` ```
GET /api/v1/query_range GET /api/v1/query_range
POST /api/v1/query_range
``` ```
URL query parameters: URL query parameters:
@ -138,6 +152,10 @@ URL query parameters:
- `timeout=<duration>`: Evaluation timeout. Optional. Defaults to and - `timeout=<duration>`: Evaluation timeout. Optional. Defaults to and
is capped by the value of the `-query.timeout` flag. is capped by the value of the `-query.timeout` flag.
You can URL-encode these parameters directly in the request body by using the `POST` method and
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
query that may breach server-side URL character limits.
The `data` section of the query result has the following format: The `data` section of the query result has the following format:
``` ```
@ -197,6 +215,7 @@ The following endpoint returns the list of time series that match a certain labe
``` ```
GET /api/v1/series GET /api/v1/series
POST /api/v1/series
``` ```
URL query parameters: URL query parameters:
@ -206,6 +225,10 @@ URL query parameters:
- `start=<rfc3339 | unix_timestamp>`: Start timestamp. - `start=<rfc3339 | unix_timestamp>`: Start timestamp.
- `end=<rfc3339 | unix_timestamp>`: End timestamp. - `end=<rfc3339 | unix_timestamp>`: End timestamp.
You can URL-encode these parameters directly in the request body by using the `POST` method and
`Content-Type: application/x-www-form-urlencoded` header. This is useful when specifying a large
or dynamic number of series selectors that may breach server-side URL character limits.
The `data` section of the query result consists of a list of objects that The `data` section of the query result consists of a list of objects that
contain the label name/value pairs which identify each series. contain the label name/value pairs which identify each series.
@ -213,7 +236,7 @@ The following example returns all series that match either of the selectors
`up` or `process_start_time_seconds{job="prometheus"}`: `up` or `process_start_time_seconds{job="prometheus"}`:
```json ```json
$ curl -g 'http://localhost:9090/api/v1/series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}' $ curl -g 'http://localhost:9090/api/v1/series?' --data-urlencode='match[]=up' --data-urlencode='match[]=process_start_time_seconds{job="prometheus"}'
{ {
"status" : "success", "status" : "success",
"data" : [ "data" : [
@ -236,6 +259,49 @@ $ curl -g 'http://localhost:9090/api/v1/series?match[]=up&match[]=process_start_
} }
``` ```
### Getting label names
The following endpoint returns a list of label names:
```
GET /api/v1/labels
POST /api/v1/labels
```
The `data` section of the JSON response is a list of string label names.
Here is an example.
```json
$ curl 'localhost:9090/api/v1/labels'
{
"status": "success",
"data": [
"__name__",
"call",
"code",
"config",
"dialer_name",
"endpoint",
"event",
"goversion",
"handler",
"instance",
"interval",
"job",
"le",
"listener_name",
"name",
"quantile",
"reason",
"role",
"scrape_job",
"slice",
"version"
]
}
```
### Querying label values ### Querying label values
The following endpoint returns a list of label values for a provided label name: The following endpoint returns a list of label values for a provided label name:
@ -244,7 +310,7 @@ The following endpoint returns a list of label values for a provided label name:
GET /api/v1/label/<label_name>/values GET /api/v1/label/<label_name>/values
``` ```
The `data` section of the JSON response is a list of string label names. The `data` section of the JSON response is a list of string label values.
This example queries for all label values for the `job` label: This example queries for all label values for the `job` label:
@ -399,7 +465,7 @@ $ curl http://localhost:9090/api/v1/rules
"severity": "page" "severity": "page"
}, },
"state": "firing", "state": "firing",
"value": 1 "value": "1e+00"
} }
], ],
"annotations": { "annotations": {
@ -456,7 +522,7 @@ $ curl http://localhost:9090/api/v1/alerts
"alertname": "my-alert" "alertname": "my-alert"
}, },
"state": "firing", "state": "firing",
"value": 1 "value": "1e+00"
} }
] ]
}, },
@ -642,9 +708,14 @@ Snapshot creates a snapshot of all current data into `snapshots/<datetime>-<rand
It will optionally skip snapshotting data that is only present in the head block, and which has not yet been compacted to disk. It will optionally skip snapshotting data that is only present in the head block, and which has not yet been compacted to disk.
``` ```
POST /api/v1/admin/tsdb/snapshot?skip_head=<bool> POST /api/v1/admin/tsdb/snapshot
PUT /api/v1/admin/tsdb/snapshot
``` ```
URL query parameters:
- `skip_head=<bool>`: Skip data present in the head block. Optional.
```json ```json
$ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot $ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot
{ {
@ -654,10 +725,9 @@ $ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/snapshot
} }
} }
``` ```
The snapshot now exists at `<data-dir>/snapshots/20171210T211224Z-2be650b6d019eb54` The snapshot now exists at `<data-dir>/snapshots/20171210T211224Z-2be650b6d019eb54`
*New in v2.1* *New in v2.1 and supports PUT from v2.9*
### Delete Series ### Delete Series
DeleteSeries deletes data for a selection of series in a time range. The actual data still exists on disk and is cleaned up in future compactions or can be explicitly cleaned up by hitting the Clean Tombstones endpoint. DeleteSeries deletes data for a selection of series in a time range. The actual data still exists on disk and is cleaned up in future compactions or can be explicitly cleaned up by hitting the Clean Tombstones endpoint.
@ -666,6 +736,7 @@ If successful, a `204` is returned.
``` ```
POST /api/v1/admin/tsdb/delete_series POST /api/v1/admin/tsdb/delete_series
PUT /api/v1/admin/tsdb/delete_series
``` ```
URL query parameters: URL query parameters:
@ -682,7 +753,7 @@ Example:
$ curl -X POST \ $ curl -X POST \
-g 'http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}' -g 'http://localhost:9090/api/v1/admin/tsdb/delete_series?match[]=up&match[]=process_start_time_seconds{job="prometheus"}'
``` ```
*New in v2.1* *New in v2.1 and supports PUT from v2.9*
### Clean Tombstones ### Clean Tombstones
CleanTombstones removes the deleted data from disk and cleans up the existing tombstones. This can be used after deleting series to free up space. CleanTombstones removes the deleted data from disk and cleans up the existing tombstones. This can be used after deleting series to free up space.
@ -691,6 +762,7 @@ If successful, a `204` is returned.
``` ```
POST /api/v1/admin/tsdb/clean_tombstones POST /api/v1/admin/tsdb/clean_tombstones
PUT /api/v1/admin/tsdb/clean_tombstones
``` ```
This takes no parameters or body. This takes no parameters or body.
@ -699,4 +771,4 @@ This takes no parameters or body.
$ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/clean_tombstones $ curl -XPOST http://localhost:9090/api/v1/admin/tsdb/clean_tombstones
``` ```
*New in v2.1* *New in v2.1 and supports PUT from v2.9*

View file

@ -6,10 +6,11 @@ sort_rank: 1
# Querying Prometheus # Querying Prometheus
Prometheus provides a functional expression language that lets the user select Prometheus provides a functional query language called PromQL (Prometheus Query
and aggregate time series data in real time. The result of an expression can Language) that lets the user select and aggregate time series data in real
either be shown as a graph, viewed as tabular data in Prometheus's expression time. The result of an expression can either be shown as a graph, viewed as
browser, or consumed by external systems via the [HTTP API](api.md). tabular data in Prometheus's expression browser, or consumed by external
systems via the [HTTP API](api.md).
## Examples ## Examples
@ -87,8 +88,8 @@ against regular expressions. The following label matching operators exist:
* `=`: Select labels that are exactly equal to the provided string. * `=`: Select labels that are exactly equal to the provided string.
* `!=`: Select labels that are not equal to the provided string. * `!=`: Select labels that are not equal to the provided string.
* `=~`: Select labels that regex-match the provided string (or substring). * `=~`: Select labels that regex-match the provided string.
* `!~`: Select labels that do not regex-match the provided string (or substring). * `!~`: Select labels that do not regex-match the provided string.
For example, this selects all `http_requests_total` time series for `staging`, For example, this selects all `http_requests_total` time series for `staging`,
`testing`, and `development` environments and HTTP methods other than `GET`. `testing`, and `development` environments and HTTP methods other than `GET`.
@ -169,6 +170,14 @@ The same works for range vectors. This returns the 5-minutes rate that
rate(http_requests_total[5m] offset 1w) rate(http_requests_total[5m] offset 1w)
## Subquery
Subquery allows you to run an instant query for a given range and resolution. The result of a subquery is a range vector.
Syntax: `<instant_query> '[' <range> ':' [<resolution>] ']' [ offset <duration> ]`
* `<resolution>` is optional. Default is the global evaluation interval.
## Operators ## Operators
Prometheus supports many binary and aggregation operators. These are described Prometheus supports many binary and aggregation operators. These are described

Some files were not shown because too many files have changed in this diff Show more