diff --git a/.circleci/config.yml b/.circleci/config.yml index 8210552866..6b5b0bb655 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,17 +6,28 @@ executors: # should also be updated. golang: docker: - - image: circleci/golang:1.11 + - image: circleci/golang:1.12 jobs: test: executor: golang - resource_class: large steps: - checkout - run: make promu - - run: make check_license style unused staticcheck build check_assets + - run: + command: make check_license style unused lint build check_assets + environment: + # Run garbage collection more aggresively to avoid getting OOMed during the lint phase. + GOGC: "20" + - run: + command: | + curl -s -L https://github.com/protocolbuffers/protobuf/releases/download/v3.5.1/protoc-3.5.1-linux-x86_64.zip > /tmp/protoc.zip + unzip -d /tmp /tmp/protoc.zip + chmod +x /tmp/bin/protoc + echo 'export PATH=/tmp/bin:$PATH' >> $BASH_ENV + source $BASH_ENV + make proto - run: git diff --exit-code - store_artifacts: path: prometheus @@ -46,11 +57,11 @@ jobs: steps: - checkout - - setup_remote_docker + - setup_remote_docker: + version: 18.06.0-ce + - run: docker run --privileged linuxkit/binfmt:v0.6 - attach_workspace: at: . - - run: ln -s .build/linux-amd64/prometheus prometheus - - run: ln -s .build/linux-amd64/promtool promtool - run: make docker - run: make docker DOCKER_REPO=quay.io/prometheus - run: docker images @@ -58,16 +69,17 @@ jobs: - run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io - run: make docker-publish - run: make docker-publish DOCKER_REPO=quay.io/prometheus + - run: make docker-manifest + - run: make docker-manifest DOCKER_REPO=quay.io/prometheus docker_hub_release_tags: executor: golang steps: - checkout - - setup_remote_docker - - run: mkdir -v -p ${HOME}/bin - - run: curl -L 'https://github.com/aktau/github-release/releases/download/v0.7.2/linux-amd64-github-release.tar.bz2' | tar xvjf - --strip-components 3 -C ${HOME}/bin - - run: echo 'export PATH=${HOME}/bin:${PATH}' >> ${BASH_ENV} + - setup_remote_docker: + version: 18.06.0-ce + - run: docker run --privileged linuxkit/binfmt:v0.6 - attach_workspace: at: . - run: make promu @@ -77,19 +89,29 @@ jobs: - store_artifacts: path: .tarballs destination: releases - - run: ln -s .build/linux-amd64/prometheus prometheus - - run: ln -s .build/linux-amd64/promtool promtool - run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG - run: make docker DOCKER_IMAGE_TAG=$CIRCLE_TAG DOCKER_REPO=quay.io/prometheus - run: docker login -u $DOCKER_LOGIN -p $DOCKER_PASSWORD - run: docker login -u $QUAY_LOGIN -p $QUAY_PASSWORD quay.io + - run: make docker-publish DOCKER_IMAGE_TAG="$CIRCLE_TAG" + - run: make docker-publish DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus + - run: make docker-manifest DOCKER_IMAGE_TAG="$CIRCLE_TAG" + - run: make docker-manifest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus - run: | if [[ "$CIRCLE_TAG" =~ ^v[0-9]+(\.[0-9]+){2}$ ]]; then make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG" make docker-tag-latest DOCKER_IMAGE_TAG="$CIRCLE_TAG" DOCKER_REPO=quay.io/prometheus + make docker-publish DOCKER_IMAGE_TAG="latest" + make docker-publish DOCKER_IMAGE_TAG="latest" DOCKER_REPO=quay.io/prometheus + make docker-manifest DOCKER_IMAGE_TAG="latest" + make docker-manifest DOCKER_IMAGE_TAG="latest" DOCKER_REPO=quay.io/prometheus fi - - run: make docker-publish - - run: make docker-publish DOCKER_REPO=quay.io/prometheus + + makefile_sync: + executor: golang + steps: + - checkout + - run: ./scripts/sync_makefiles.sh workflows: version: 2 @@ -104,6 +126,7 @@ workflows: tags: only: /.*/ - docker_hub_master: + context: org-context requires: - test - build @@ -111,6 +134,7 @@ workflows: branches: only: master - docker_hub_release_tags: + context: org-context requires: - test - build @@ -119,3 +143,14 @@ workflows: only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/ branches: ignore: /.*/ + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master + jobs: + - makefile_sync: + context: org-context diff --git a/.dockerignore b/.dockerignore index a4d092b226..07a4d4f571 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,5 @@ data/ .tarballs/ !.build/linux-amd64/ +!.build/linux-armv7/ +!.build/linux-arm64/ diff --git a/.github/lock.yml b/.github/lock.yml new file mode 100644 index 0000000000..bed690b33b --- /dev/null +++ b/.github/lock.yml @@ -0,0 +1,35 @@ +# Configuration for Lock Threads - https://github.com/dessant/lock-threads + +# Number of days of inactivity before a closed issue or pull request is locked +daysUntilLock: 180 + +# Skip issues and pull requests created before a given timestamp. Timestamp must +# follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable +skipCreatedBefore: false + +# Issues and pull requests with these labels will be ignored. Set to `[]` to disable +exemptLabels: [] + +# Label to add before locking, such as `outdated`. Set to `false` to disable +lockLabel: false + +# Comment to post before locking. Set to `false` to disable +lockComment: false + +# Assign `resolved` as the reason for locking. Set to `false` to disable +setLockReason: false + +# Limit to only `issues` or `pulls` +only: issues + +# Optionally, specify configuration settings just for `issues` or `pulls` +# issues: +# exemptLabels: +# - help-wanted +# lockLabel: outdated + +# pulls: +# daysUntilLock: 30 + +# Repository to extend settings from +# _extends: repo diff --git a/.gitignore b/.gitignore index 05ffbb95f2..de38f8dc75 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,7 @@ *# .#* -*-stamp /*.yaml /*.yml -/*.rules *.exe /prometheus @@ -12,12 +10,9 @@ benchmark.txt /data /cmd/prometheus/data /cmd/prometheus/debug -/.build -/.release -/.tarballs -!/circle.yml !/.travis.yml !/.promu.yml +!/.golangci.yml /documentation/examples/remote_storage/remote_storage_adapter/remote_storage_adapter /documentation/examples/remote_storage/example_write_adapter/example_writer_adapter diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000000..1a05236e27 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,13 @@ +run: + modules-download-mode: vendor + deadline: 5m + +issues: + exclude-rules: + - path: _test.go + linters: + - errcheck + +linters-settings: + errcheck: + exclude: scripts/errcheck_excludes.txt diff --git a/.promu.yml b/.promu.yml index 68016677b5..9fc1271d09 100644 --- a/.promu.yml +++ b/.promu.yml @@ -1,7 +1,7 @@ go: # Whenever the Go version is updated here, .travis.yml and # .circle/config.yml should also be updated. - version: 1.11 + version: 1.12 repository: path: github.com/prometheus/prometheus build: @@ -12,11 +12,11 @@ build: path: ./cmd/promtool flags: -mod=vendor -a -tags netgo ldflags: | - -X {{repoPath}}/vendor/github.com/prometheus/common/version.Version={{.Version}} - -X {{repoPath}}/vendor/github.com/prometheus/common/version.Revision={{.Revision}} - -X {{repoPath}}/vendor/github.com/prometheus/common/version.Branch={{.Branch}} - -X {{repoPath}}/vendor/github.com/prometheus/common/version.BuildUser={{user}}@{{host}} - -X {{repoPath}}/vendor/github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} + -X github.com/prometheus/common/version.Version={{.Version}} + -X github.com/prometheus/common/version.Revision={{.Revision}} + -X github.com/prometheus/common/version.Branch={{.Branch}} + -X github.com/prometheus/common/version.BuildUser={{user}}@{{host}} + -X github.com/prometheus/common/version.BuildDate={{date "20060102-15:04:05"}} tarball: files: - consoles diff --git a/.travis.yml b/.travis.yml index bb704e9cd4..f1e2287aae 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,18 @@ -sudo: false - language: go # Whenever the Go version is updated here, .circleci/config.yml and .promu.yml # should also be updated. go: -- 1.11.x +- 1.12.x go_import_path: github.com/prometheus/prometheus +# This ensures that the local cache is filled before running the CI. +# travis_retry retries the command 3 times if it fails as we've experienced +# random issues on Travis. +before_install: +- travis_retry make deps + script: -- make check_license style unused test staticcheck check_assets +- make check_license style unused test lint check_assets - git diff --exit-code diff --git a/CHANGELOG.md b/CHANGELOG.md index d8ead71453..88b8144c92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,185 @@ +## 2.10.0 / 2019-05-25 + +* [CHANGE/BUGFIX] API: Encode alert values as string to correctly represent Inf/NaN. #5582 +* [FEATURE] Template expansion: Make external labels available as `$externalLabels` in alert and console template expansion. #5463 +* [FEATURE] TSDB: Add `prometheus_tsdb_wal_segment_current` metric for the WAL segment index that TSDB is currently writing to. tsdb#601 +* [FEATURE] Scrape: Add `scrape_series_added` per-scrape metric. #5546 +* [ENHANCEMENT] Discovery/kubernetes: Add labels `__meta_kubernetes_endpoint_node_name` and `__meta_kubernetes_endpoint_hostname`. #5571 +* [ENHANCEMENT] Discovery/azure: Add label `__meta_azure_machine_public_ip`. #5475 +* [ENHANCEMENT] TSDB: Simplify mergedPostings.Seek, resulting in better performance if there are many posting lists. tsdb#595 +* [ENHANCEMENT] Log filesystem type on startup. #5558 +* [ENHANCEMENT] Cmd/promtool: Use POST requests for Query and QueryRange. client_golang#557 +* [ENHANCEMENT] Web: Sort alerts by group name. #5448 +* [ENHANCEMENT] Console templates: Add convenience variables `$rawParams`, `$params`, `$path`. #5463 +* [BUGFIX] TSDB: Don't panic when running out of disk space and recover nicely from the condition. tsdb#582 +* [BUGFIX] TSDB: Correctly handle empty labels. tsdb#594 +* [BUGFIX] TSDB: Don't crash on an unknown tombstone reference. tsdb#604 +* [BUGFIX] Storage/remote: Remove queue-manager specific metrics if queue no longer exists. #5445 #5485 #5555 +* [BUGFIX] PromQL: Correctly display `{__name__="a"}`. #5552 +* [BUGFIX] Discovery/kubernetes: Use `service` rather than `ingress` as the name for the service workqueue. #5520 +* [BUGFIX] Discovery/azure: Don't panic on a VM with a public IP. #5587 +* [BUGFIX] Discovery/triton: Always read HTTP body to completion. #5596 +* [BUGFIX] Web: Fixed Content-Type for js and css instead of using `/etc/mime.types`. #5551 + +## 2.9.2 / 2019-04-24 + +* [BUGFIX] Make sure subquery range is taken into account for selection #5467 +* [BUGFIX] Exhaust every request body before closing it #5166 +* [BUGFIX] Cmd/promtool: return errors from rule evaluations #5483 +* [BUGFIX] Remote Storage: string interner should not panic in release #5487 +* [BUGFIX] Fix memory allocation regression in mergedPostings.Seek tsdb#586 + +## 2.9.1 / 2019-04-16 + +* [BUGFIX] Discovery/kubernetes: fix missing label sanitization #5462 +* [BUGFIX] Remote_write: Prevent reshard concurrent with calling stop #5460 + +## 2.9.0 / 2019-04-15 + +This releases uses Go 1.12, which includes a change in how memory is released +to Linux. This will cause RSS to be reported as higher, however this is harmless +and the memory is available to the kernel when it needs it. + +* [CHANGE/ENHANCEMENT] Update Consul to support catalog.ServiceMultipleTags. #5151 +* [FEATURE] Add honor_timestamps scrape option. #5304 +* [ENHANCEMENT] Discovery/kubernetes: add present labels for labels/annotations. #5443 +* [ENHANCEMENT] OpenStack SD: Add ProjectID and UserID meta labels. #5431 +* [ENHANCEMENT] Add GODEBUG and retention to the runtime page. #5324 #5322 +* [ENHANCEMENT] Add support for POSTing to /series endpoint. #5422 +* [ENHANCEMENT] Support PUT methods for Lifecycle and Admin APIs. #5376 +* [ENHANCEMENT] Scrape: Add global jitter for HA server. #5181 +* [ENHANCEMENT] Check for cancellation on every step of a range evaluation. #5131 +* [ENHANCEMENT] String interning for labels & values in the remote_write path. #5316 +* [ENHANCEMENT] Don't lose the scrape cache on a failed scrape. #5414 +* [ENHANCEMENT] Reload cert files from disk automatically. common#173 +* [ENHANCEMENT] Use fixed length millisecond timestamp format for logs. common#172 +* [ENHANCEMENT] Performance improvements for postings. tsdb#509 tsdb#572 +* [BUGFIX] Remote Write: fix checkpoint reading. #5429 +* [BUGFIX] Check if label value is valid when unmarshaling external labels from YAML. #5316 +* [BUGFIX] Promparse: sort all labels when parsing. #5372 +* [BUGFIX] Reload rules: copy state on both name and labels. #5368 +* [BUGFIX] Exponentation operator to drop metric name in result of operation. #5329 +* [BUGFIX] Config: resolve more file paths. #5284 +* [BUGFIX] Promtool: resolve relative paths in alert test files. #5336 +* [BUGFIX] Set TLSHandshakeTimeout in HTTP transport. common#179 +* [BUGFIX] Use fsync to be more resilient to machine crashes. tsdb#573 tsdb#578 +* [BUGFIX] Keep series that are still in WAL in checkpoints. tsdb#577 +* [BUGFIX] Fix output sample values for scalar-to-vector comparison operations. #5454 + +## 2.8.1 / 2019-03-28 + +* [BUGFIX] Display the job labels in `/targets` which was removed accidentally. #5406 + +## 2.8.0 / 2019-03-12 + +This release uses Write-Ahead Logging (WAL) for the remote_write API. This currently causes a slight increase in memory usage, which will be addressed in future releases. + +* [CHANGE] Default time retention is used only when no size based retention is specified. These are flags where time retention is specified by the flag `--storage.tsdb.retention` and size retention by `--storage.tsdb.retention.size`. #5216 +* [CHANGE] `prometheus_tsdb_storage_blocks_bytes_total` is now `prometheus_tsdb_storage_blocks_bytes`. prometheus/tsdb#506 +* [FEATURE] [EXPERIMENTAL] Time overlapping blocks are now allowed; vertical compaction and vertical query merge. It is an optional feature which is controlled by the `--storage.tsdb.allow-overlapping-blocks` flag, disabled by default. prometheus/tsdb#370 +* [ENHANCEMENT] Use the WAL for remote_write API. #4588 +* [ENHANCEMENT] Query performance improvements. prometheus/tsdb#531 +* [ENHANCEMENT] UI enhancements with upgrade to Bootstrap 4. #5226 +* [ENHANCEMENT] Reduce time that Alertmanagers are in flux when reloaded. #5126 +* [ENHANCEMENT] Limit number of metrics displayed on UI to 10000. #5139 +* [ENHANCEMENT] (1) Remember All/Unhealthy choice on target-overview when reloading page. (2) Resize text-input area on Graph page on mouseclick. #5201 +* [ENHANCEMENT] In `histogram_quantile` merge buckets with equivalent le values. #5158. +* [ENHANCEMENT] Show list of offending labels in the error message in many-to-many scenarios. #5189 +* [ENHANCEMENT] Show `Storage Retention` criteria in effect on `/status` page. #5322 +* [BUGFIX] Fix sorting of rule groups. #5260 +* [BUGFIX] Fix support for password_file and bearer_token_file in Kubernetes SD. #5211 +* [BUGFIX] Scrape: catch errors when creating HTTP clients #5182. Adds new metrics: + * `prometheus_target_scrape_pools_total` + * `prometheus_target_scrape_pools_failed_total` + * `prometheus_target_scrape_pool_reloads_total` + * `prometheus_target_scrape_pool_reloads_failed_total` +* [BUGFIX] Fix panic when aggregator param is not a literal. #5290 + +## 2.7.2 / 2019-03-02 + +* [BUGFIX] `prometheus_rule_group_last_evaluation_timestamp_seconds` is now a unix timestamp. #5186 + +## 2.7.1 / 2019-01-31 + +This release has a fix for a Stored DOM XSS vulnerability that can be triggered when using the query history functionality. Thanks to Dor Tumarkin from Checkmarx for reporting it. + +* [BUGFIX/SECURITY] Fix a Stored DOM XSS vulnerability with query history. #5163 +* [BUGFIX] `prometheus_rule_group_last_duration_seconds` now reports seconds instead of nanoseconds. #5153 +* [BUGFIX] Make sure the targets are consistently sorted in the targets page. #5161 + +## 2.7.0 / 2019-01-28 + +We're rolling back the Dockerfile changes introduced in 2.6.0. If you made changes to your docker deployment in 2.6.0, you will need to roll them back. This release also adds experimental support for disk size based retention. To accommodate that we are deprecating the flag `storage.tsdb.retention` in favour of `storage.tsdb.retention.time`. We print a warning if the flag is in use, but it will function without breaking until Prometheus 3.0. + +* [CHANGE] Rollback Dockerfile to version at 2.5.0. Rollback of the breaking change introduced in 2.6.0. #5122 +* [FEATURE] Add subqueries to PromQL. #4831 +* [FEATURE] [EXPERIMENTAL] Add support for disk size based retention. Note that we don't consider the WAL size which could be significant and the time based retention policy also applies. #5109 prometheus/tsdb#343 +* [FEATURE] Add CORS origin flag. #5011 +* [ENHANCEMENT] Consul SD: Add tagged address to the discovery metadata. #5001 +* [ENHANCEMENT] Kubernetes SD: Add service external IP and external name to the discovery metadata. #4940 +* [ENHANCEMENT] Azure SD: Add support for Managed Identity authentication. #4590 +* [ENHANCEMENT] Azure SD: Add tenant and subscription IDs to the discovery metadata. #4969 +* [ENHANCEMENT] OpenStack SD: Add support for application credentials based authentication. #4968 +* [ENHANCEMENT] Add metric for number of rule groups loaded. #5090 +* [BUGFIX] Avoid duplicate tests for alert unit tests. #4964 +* [BUGFIX] Don't depend on given order when comparing samples in alert unit testing. #5049 +* [BUGFIX] Make sure the retention period doesn't overflow. #5112 +* [BUGFIX] Make sure the blocks don't get very large. #5112 +* [BUGFIX] Don't generate blocks with no samples. prometheus/tsdb#374 +* [BUGFIX] Reintroduce metric for WAL corruptions. prometheus/tsdb#473 + +## 2.6.1 / 2019-01-15 + +* [BUGFIX] Azure SD: Fix discovery getting stuck sometimes. #5088 +* [BUGFIX] Marathon SD: Use `Tasks.Ports` when `RequirePorts` is `false`. #5026 +* [BUGFIX] Promtool: Fix "out-of-order sample" errors when testing rules. #5069 + +## 2.6.0 / 2018-12-17 + +* [CHANGE] Remove default flags from the container's entrypoint, run Prometheus from `/etc/prometheus` and symlink the storage directory to `/etc/prometheus/data`. #4976 +* [CHANGE] Promtool: Remove the `update` command. #3839 +* [FEATURE] Add JSON log format via the `--log.format` flag. #4876 +* [FEATURE] API: Add /api/v1/labels endpoint to get all label names. #4835 +* [FEATURE] Web: Allow setting the page's title via the `--web.ui-title` flag. #4841 +* [ENHANCEMENT] Add `prometheus_tsdb_lowest_timestamp_seconds`, `prometheus_tsdb_head_min_time_seconds` and `prometheus_tsdb_head_max_time_seconds` metrics. #4888 +* [ENHANCEMENT] Add `rule_group_last_evaluation_timestamp_seconds` metric. #4852 +* [ENHANCEMENT] Add `prometheus_template_text_expansion_failures_total` and `prometheus_template_text_expansions_total` metrics. #4747 +* [ENHANCEMENT] Set consistent User-Agent header in outgoing requests. #4891 +* [ENHANCEMENT] Azure SD: Error out at load time when authentication parameters are missing. #4907 +* [ENHANCEMENT] EC2 SD: Add the machine's private DNS name to the discovery metadata. #4693 +* [ENHANCEMENT] EC2 SD: Add the operating system's platform to the discovery metadata. #4663 +* [ENHANCEMENT] Kubernetes SD: Add the pod's phase to the discovery metadata. #4824 +* [ENHANCEMENT] Kubernetes SD: Log Kubernetes messages. #4931 +* [ENHANCEMENT] Promtool: Collect CPU and trace profiles. #4897 +* [ENHANCEMENT] Promtool: Support writing output as JSON. #4848 +* [ENHANCEMENT] Remote Read: Return available data if remote read fails partially. #4832 +* [ENHANCEMENT] Remote Write: Improve queue performance. #4772 +* [ENHANCEMENT] Remote Write: Add min_shards parameter to set the minimum number of shards. #4924 +* [ENHANCEMENT] TSDB: Improve WAL reading. #4953 +* [ENHANCEMENT] TSDB: Memory improvements. #4953 +* [ENHANCEMENT] Web: Log stack traces on panic. #4221 +* [ENHANCEMENT] Web UI: Add copy to clipboard button for configuration. #4410 +* [ENHANCEMENT] Web UI: Support console queries at specific times. #4764 +* [ENHANCEMENT] Web UI: group targets by job then instance. #4898 #4806 +* [BUGFIX] Deduplicate handler labels for HTTP metrics. #4732 +* [BUGFIX] Fix leaked queriers causing shutdowns to hang. #4922 +* [BUGFIX] Fix configuration loading panics on nil pointer slice elements. #4942 +* [BUGFIX] API: Correctly skip mismatching targets on /api/v1/targets/metadata. #4905 +* [BUGFIX] API: Better rounding for incoming query timestamps. #4941 +* [BUGFIX] Azure SD: Fix panic. #4867 +* [BUGFIX] Console templates: Fix hover when the metric has a null value. #4906 +* [BUGFIX] Discovery: Remove all targets when the scrape configuration gets empty. #4819 +* [BUGFIX] Marathon SD: Fix leaked connections. #4915 +* [BUGFIX] Marathon SD: Use 'hostPort' member of portMapping to construct target endpoints. #4887 +* [BUGFIX] PromQL: Fix a goroutine leak in the lexer/parser. #4858 +* [BUGFIX] Scrape: Pass through content-type for non-compressed output. #4912 +* [BUGFIX] Scrape: Fix deadlock in the scrape's manager. #4894 +* [BUGFIX] Scrape: Scrape targets at fixed intervals even after Prometheus restarts. #4926 +* [BUGFIX] TSDB: Support restored snapshots including the head properly. #4953 +* [BUGFIX] TSDB: Repair WAL when the last record in a segment is torn. #4953 +* [BUGFIX] TSDB: Fix unclosed file readers on Windows systems. #4997 +* [BUGFIX] Web: Avoid proxy to connect to the local gRPC server. #4572 + ## 2.5.0 / 2018-11-06 * [CHANGE] Group targets by scrape config instead of job name. #4806 #4526 @@ -54,13 +236,13 @@ This release includes multiple bugfixes and features. Further, the WAL implement * [FEATURE] Persist alert 'for' state across restarts #4061 * [FEATURE] Add API providing per target metric metadata #4183 * [FEATURE] Add API providing recording and alerting rules #4318 #4501 -* [ENHANCEMENT] Brand new WAL implementation for TSDB. Forwards incompatible with previous WAL. -* [ENHANCEMENT] Show rule evaluation errors in UI #4457 +* [ENHANCEMENT] Brand new WAL implementation for TSDB. Forwards incompatible with previous WAL. +* [ENHANCEMENT] Show rule evaluation errors in UI #4457 * [ENHANCEMENT] Throttle resends of alerts to Alertmanager #4538 * [ENHANCEMENT] Send EndsAt along with the alert to Alertmanager #4550 * [ENHANCEMENT] Limit the samples returned by remote read endpoint #4532 * [ENHANCEMENT] Limit the data read in through remote read #4239 -* [ENHANCEMENT] Coalesce identical SD configuations #3912 +* [ENHANCEMENT] Coalesce identical SD configurations #3912 * [ENHANCEMENT] `promtool`: Add new commands for debugging and querying #4247 #4308 #4346 #4454 * [ENHANCEMENT] Update console examples for node_exporter v0.16.0 #4208 * [ENHANCEMENT] Optimize PromQL aggregations #4248 @@ -90,13 +272,13 @@ This release includes multiple bugfixes and features. Further, the WAL implement * [BUGFIX] discovery/kubernetes/ingress: fix scheme discovery #4329 * [BUGFIX] Fix race in zookeeper sd #4355 * [BUGFIX] Better timeout handling in promql #4291 #4300 -* [BUGFIX] Propogate errors when selecting series from the tsdb #4136 +* [BUGFIX] Propagate errors when selecting series from the tsdb #4136 ## 2.3.1 / 2018-06-19 * [BUGFIX] Avoid infinite loop on duplicate NaN values. #4275 * [BUGFIX] Fix nil pointer deference when using various API endpoints #4282 -* [BUGFIX] config: set target group source index during unmarshalling #4245 +* [BUGFIX] config: set target group source index during unmarshaling #4245 * [BUGFIX] discovery/file: fix logging #4178 * [BUGFIX] kubernetes_sd: fix namespace filtering #4285 * [BUGFIX] web: restore old path prefix behavior #4273 @@ -110,7 +292,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement * [FEATURE] Add security headers to HTTP server responses * [FEATURE] Pass query hints via remote read API * [FEATURE] Basic auth passwords can now be configured via file across all configuration -* [ENHANCEMENT] Optimise PromQL and API serialization for memory usage and allocations +* [ENHANCEMENT] Optimize PromQL and API serialization for memory usage and allocations * [ENHANCEMENT] Limit number of dropped targets in web UI * [ENHANCEMENT] Consul and EC2 service discovery allow using server-side filtering for performance improvement * [ENHANCEMENT] Add advanced filtering configuration to EC2 service discovery @@ -133,7 +315,7 @@ This release includes multiple bugfixes and features. Further, the WAL implement * [BUGFIX] Fix data loss in TSDB on compaction * [BUGFIX] Correctly stop timer in remote-write path -* [BUGFIX] Fix deadlock triggerd by loading targets page +* [BUGFIX] Fix deadlock triggered by loading targets page * [BUGFIX] Fix incorrect buffering of samples on range selection queries * [BUGFIX] Handle large index files on windows properly @@ -526,7 +708,7 @@ This is a breaking change to the Kubernetes service discovery. * [ENHANCEMENT] Message on empty Alerts page. * [ENHANCEMENT] Various internal code refactorings and clean-ups. * [ENHANCEMENT] Various improvements in the build system. -* [BUGFIX] Catch errors when unmarshalling delta/doubleDelta encoded chunks. +* [BUGFIX] Catch errors when unmarshaling delta/doubleDelta encoded chunks. * [BUGFIX] Fix data race in lexer and lexer test. * [BUGFIX] Trim stray whitespace from bearer token file. * [BUGFIX] Avoid divide-by-zero panic on query_range?step=0. @@ -1118,7 +1300,7 @@ All changes: from embedding into the binary. Those files are only used for debugging, and then you can use -web.use-local-assets. By including fewer files, the RAM usage during compilation is much more manageable. -* [ENHANCEMENT] Help link points to http://prometheus.github.io now. +* [ENHANCEMENT] Help link points to https://prometheus.github.io now. * [FEATURE] Consoles for haproxy and cloudwatch. * [BUGFIX] Several fixes to graphs in consoles. * [CLEANUP] Removed a file size check that did not check anything. @@ -1211,4 +1393,4 @@ All changes: * [BUGFIX] Built from Go 1.2.1, which has internal fixes to race conditions in garbage collection handling. * [ENHANCEMENT] Internal storage interface refactoring that allows building e.g. the `rule_checker` tool without LevelDB dynamic library dependencies. * [ENHANCEMENT] Cleanups around shutdown handling. -* [PERFORMANCE] Preparations for better memory reuse during marshalling / unmarshalling. +* [PERFORMANCE] Preparations for better memory reuse during marshaling / unmarshaling. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1e5dae17d3..a965a07f14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,7 +17,7 @@ Prometheus uses GitHub to manage reviews of pull requests. Comments](https://code.google.com/p/go-wiki/wiki/CodeReviewComments) and the _Formatting and style_ section of Peter Bourgon's [Go: Best Practices for Production - Environments](http://peter.bourgon.org/go-in-production/#formatting-and-style). + Environments](https://peter.bourgon.org/go-in-production/#formatting-and-style). * Be sure to sign off on the [DCO](https://github.com/probot/dco#how-it-works) @@ -40,7 +40,9 @@ go build ./cmd/prometheus/ make test # Make sure all the tests pass before you commit and push :) ``` -All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labelling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). +We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. + +All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). ## Pull Request Checklist @@ -54,7 +56,7 @@ All our issues are regularly tagged so that you can also filter down the issues ## Dependency management -The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.11 or greater installed. +The Prometheus project uses [Go modules](https://golang.org/cmd/go/#hdr-Modules__module_versions__and_more) to manage dependencies on external packages. This requires a working Go environment with version 1.12 or greater installed. All dependencies are vendored in the `vendor/` directory. diff --git a/Dockerfile b/Dockerfile index b04e660932..e42b18ef68 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,12 @@ -FROM quay.io/prometheus/busybox:latest +ARG ARCH="amd64" +ARG OS="linux" +FROM quay.io/prometheus/busybox-${OS}-${ARCH}:latest LABEL maintainer="The Prometheus Authors " -COPY prometheus /bin/prometheus -COPY promtool /bin/promtool +ARG ARCH="amd64" +ARG OS="linux" +COPY .build/${OS}-${ARCH}/prometheus /bin/prometheus +COPY .build/${OS}-${ARCH}/promtool /bin/promtool COPY documentation/examples/prometheus.yml /etc/prometheus/prometheus.yml COPY console_libraries/ /usr/share/prometheus/console_libraries/ COPY consoles/ /usr/share/prometheus/consoles/ @@ -15,8 +19,8 @@ USER nobody EXPOSE 9090 VOLUME [ "/prometheus" ] WORKDIR /prometheus -ENTRYPOINT [ "/bin/prometheus", \ +ENTRYPOINT [ "/bin/prometheus" ] +CMD [ "--config.file=/etc/prometheus/prometheus.yml", \ "--storage.tsdb.path=/prometheus", \ - "--web.console.libraries=/etc/prometheus/console_libraries", \ - "--web.console.templates=/etc/prometheus/consoles", \ - "--config.file=/etc/prometheus/prometheus.yml" ] + "--web.console.libraries=/usr/share/prometheus/console_libraries", \ + "--web.console.templates=/usr/share/prometheus/consoles" ] diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 48330b13e9..e00da702c0 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -1,6 +1,6 @@ Maintainers of this repository with their focus areas: * Brian Brazil @brian-brazil: Console templates; semantics of PromQL, service discovery, and relabeling. -* Fabian Reinartz @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery. +* Fabian Reinartz @fabxc: PromQL parsing and evaluation; implementation of retrieval, alert notification, and service discovery. * Julius Volz @juliusv: Remote storage integrations; web UI. diff --git a/Makefile b/Makefile index b4cc0eef95..6b13f9b67c 100644 --- a/Makefile +++ b/Makefile @@ -11,24 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Needs to be defined before including Makefile.common to auto-generate targets +DOCKER_ARCHS ?= amd64 armv7 arm64 + include Makefile.common -STATICCHECK_IGNORE = \ - github.com/prometheus/prometheus/discovery/kubernetes/kubernetes.go:SA1019 \ - github.com/prometheus/prometheus/discovery/kubernetes/node.go:SA1019 \ - github.com/prometheus/prometheus/documentation/examples/remote_storage/remote_storage_adapter/main.go:SA1019 \ - github.com/prometheus/prometheus/pkg/textparse/promlex.l.go:SA4006 \ - github.com/prometheus/prometheus/pkg/textparse/openmetricslex.l.go:SA4006 \ - github.com/prometheus/prometheus/pkg/pool/pool.go:SA6002 \ - github.com/prometheus/prometheus/promql/engine.go:SA6002 \ - github.com/prometheus/prometheus/prompb/rpc.pb.gw.go:SA1019 - DOCKER_IMAGE_NAME ?= prometheus -# Go modules needs the bzr binary because of the dependency on launchpad.net/gocheck. -$(eval $(call PRECHECK_COMMAND_template,bzr)) -PRECHECK_OPTIONS_bzr = version - .PHONY: assets assets: @echo ">> writing assets" diff --git a/Makefile.common b/Makefile.common index 0248fee5a2..d7aea1b86f 100644 --- a/Makefile.common +++ b/Makefile.common @@ -29,12 +29,15 @@ GO ?= go GOFMT ?= $(GO)fmt FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH))) GOOPTS ?= +GOHOSTOS ?= $(shell $(GO) env GOHOSTOS) +GOHOSTARCH ?= $(shell $(GO) env GOHOSTARCH) GO_VERSION ?= $(shell $(GO) version) GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION)) PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.') -unexport GOVENDOR +GOVENDOR := +GO111MODULE := ifeq (, $(PRE_GO_111)) ifneq (,$(wildcard go.mod)) # Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI). @@ -55,32 +58,58 @@ $(warning Some recipes may not work as expected as the current Go runtime is '$( # This repository isn't using Go modules (yet). GOVENDOR := $(FIRST_GOPATH)/bin/govendor endif - - unexport GO111MODULE endif PROMU := $(FIRST_GOPATH)/bin/promu -STATICCHECK := $(FIRST_GOPATH)/bin/staticcheck pkgs = ./... -GO_VERSION ?= $(shell $(GO) version) -GO_BUILD_PLATFORM ?= $(subst /,-,$(lastword $(GO_VERSION))) +ifeq (arm, $(GOHOSTARCH)) + GOHOSTARM ?= $(shell GOARM= $(GO) env GOARM) + GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)v$(GOHOSTARM) +else + GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH) +endif -PROMU_VERSION ?= 0.2.0 +PROMU_VERSION ?= 0.4.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz +GOLANGCI_LINT := +GOLANGCI_LINT_OPTS ?= +GOLANGCI_LINT_VERSION ?= v1.16.0 +# golangci-lint only supports linux, darwin and windows platforms on i386/amd64. +# windows isn't included here because of the path separator being different. +ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) + ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386)) + GOLANGCI_LINT := $(FIRST_GOPATH)/bin/golangci-lint + endif +endif + PREFIX ?= $(shell pwd) BIN_DIR ?= $(shell pwd) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) +DOCKERFILE_PATH ?= ./ DOCKER_REPO ?= prom -.PHONY: all -all: precheck style staticcheck unused build test +DOCKER_ARCHS ?= amd64 + +BUILD_DOCKER_ARCHS = $(addprefix common-docker-,$(DOCKER_ARCHS)) +PUBLISH_DOCKER_ARCHS = $(addprefix common-docker-publish-,$(DOCKER_ARCHS)) +TAG_DOCKER_ARCHS = $(addprefix common-docker-tag-latest-,$(DOCKER_ARCHS)) + +ifeq ($(GOHOSTARCH),amd64) + ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux freebsd darwin windows)) + # Only supported on amd64 + test-flags := -race + endif +endif # This rule is used to forward a target like "build" to "common-build". This # allows a new "build" target to be defined in a Makefile which includes this # one and override "common-build" without override warnings. %: common-% ; +.PHONY: common-all +common-all: precheck style check_license lint unused build test + .PHONY: common-style common-style: @echo ">> checking code style" @@ -102,6 +131,15 @@ common-check_license: exit 1; \ fi +.PHONY: common-deps +common-deps: + @echo ">> getting dependencies" +ifdef GO111MODULE + GO111MODULE=$(GO111MODULE) $(GO) mod download +else + $(GO) get $(GOOPTS) -t ./... +endif + .PHONY: common-test-short common-test-short: @echo ">> running short tests" @@ -110,26 +148,35 @@ common-test-short: .PHONY: common-test common-test: @echo ">> running all tests" - GO111MODULE=$(GO111MODULE) $(GO) test -race $(GOOPTS) $(pkgs) + GO111MODULE=$(GO111MODULE) $(GO) test $(test-flags) $(GOOPTS) $(pkgs) .PHONY: common-format common-format: @echo ">> formatting code" - GO111MODULE=$(GO111MODULE) $(GO) fmt $(GOOPTS) $(pkgs) + GO111MODULE=$(GO111MODULE) $(GO) fmt $(pkgs) .PHONY: common-vet common-vet: @echo ">> vetting code" GO111MODULE=$(GO111MODULE) $(GO) vet $(GOOPTS) $(pkgs) -.PHONY: common-staticcheck -common-staticcheck: $(STATICCHECK) - @echo ">> running staticcheck" +.PHONY: common-lint +common-lint: $(GOLANGCI_LINT) +ifdef GOLANGCI_LINT + @echo ">> running golangci-lint" ifdef GO111MODULE - GO111MODULE=$(GO111MODULE) $(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" -checks "SA*" $(pkgs) +# 'go list' needs to be executed before staticcheck to prepopulate the modules cache. +# Otherwise staticcheck might fail randomly for some reason not yet explained. + GO111MODULE=$(GO111MODULE) $(GO) list -e -compiled -test=true -export=false -deps=true -find=false -tags= -- ./... > /dev/null + GO111MODULE=$(GO111MODULE) $(GOLANGCI_LINT) run $(GOLANGCI_LINT_OPTS) $(pkgs) else - $(STATICCHECK) -ignore "$(STATICCHECK_IGNORE)" $(pkgs) + $(GOLANGCI_LINT) run $(pkgs) endif +endif + +# For backward-compatibility. +.PHONY: common-staticcheck +common-staticcheck: lint .PHONY: common-unused common-unused: $(GOVENDOR) @@ -140,8 +187,9 @@ else ifdef GO111MODULE @echo ">> running check for unused/missing packages in go.mod" GO111MODULE=$(GO111MODULE) $(GO) mod tidy +ifeq (,$(wildcard vendor)) @git diff --exit-code -- go.sum go.mod -ifneq (,$(wildcard vendor)) +else @echo ">> running check for unused packages in vendor/" GO111MODULE=$(GO111MODULE) $(GO) mod vendor @git diff --exit-code -- go.sum go.mod vendor/ @@ -159,45 +207,50 @@ common-tarball: promu @echo ">> building release tarball" $(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR) -.PHONY: common-docker -common-docker: - docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" . +.PHONY: common-docker $(BUILD_DOCKER_ARCHS) +common-docker: $(BUILD_DOCKER_ARCHS) +$(BUILD_DOCKER_ARCHS): common-docker-%: + docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" \ + --build-arg ARCH="$*" \ + --build-arg OS="linux" \ + $(DOCKERFILE_PATH) -.PHONY: common-docker-publish -common-docker-publish: - docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)" +.PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) +common-docker-publish: $(PUBLISH_DOCKER_ARCHS) +$(PUBLISH_DOCKER_ARCHS): common-docker-publish-%: + docker push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" -.PHONY: common-docker-tag-latest -common-docker-tag-latest: - docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):latest" +.PHONY: common-docker-tag-latest $(TAG_DOCKER_ARCHS) +common-docker-tag-latest: $(TAG_DOCKER_ARCHS) +$(TAG_DOCKER_ARCHS): common-docker-tag-latest-%: + docker tag "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:latest" + +.PHONY: common-docker-manifest +common-docker-manifest: + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create -a "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" $(foreach ARCH,$(DOCKER_ARCHS),$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$(ARCH):$(DOCKER_IMAGE_TAG)) + DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" .PHONY: promu promu: $(PROMU) $(PROMU): - curl -s -L $(PROMU_URL) | tar -xvz -C /tmp - mkdir -v -p $(FIRST_GOPATH)/bin - cp -v /tmp/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(PROMU) + $(eval PROMU_TMP := $(shell mktemp -d)) + curl -s -L $(PROMU_URL) | tar -xvzf - -C $(PROMU_TMP) + mkdir -p $(FIRST_GOPATH)/bin + cp $(PROMU_TMP)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM)/promu $(FIRST_GOPATH)/bin/promu + rm -r $(PROMU_TMP) .PHONY: proto proto: @echo ">> generating code from proto files" @./scripts/genproto.sh -.PHONY: $(STATICCHECK) -$(STATICCHECK): -ifdef GO111MODULE -# Get staticcheck from a temporary directory to avoid modifying the local go.{mod,sum}. -# See https://github.com/golang/go/issues/27643. -# For now, we are using the next branch of staticcheck because master isn't compatible yet with Go modules. - tmpModule=$$(mktemp -d 2>&1) && \ - mkdir -p $${tmpModule}/staticcheck && \ - cd "$${tmpModule}"/staticcheck && \ - GO111MODULE=on $(GO) mod init example.com/staticcheck && \ - GO111MODULE=on GOOS= GOARCH= $(GO) get -u honnef.co/go/tools/cmd/staticcheck@next && \ - rm -rf $${tmpModule}; -else - GOOS= GOARCH= GO111MODULE=off $(GO) get -u honnef.co/go/tools/cmd/staticcheck +ifdef GOLANGCI_LINT +$(GOLANGCI_LINT): + mkdir -p $(FIRST_GOPATH)/bin + curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/$(GOLANGCI_LINT_VERSION)/install.sh \ + | sed -e '/install -d/d' \ + | sh -s -- -b $(FIRST_GOPATH)/bin $(GOLANGCI_LINT_VERSION) endif ifdef GOVENDOR @@ -212,9 +265,8 @@ precheck:: define PRECHECK_COMMAND_template = precheck:: $(1)_precheck - PRECHECK_COMMAND_$(1) ?= $(1) $$(strip $$(PRECHECK_OPTIONS_$(1))) -.PHONE: $(1)_precheck +.PHONY: $(1)_precheck $(1)_precheck: @if ! $$(PRECHECK_COMMAND_$(1)) 1>/dev/null 2>&1; then \ echo "Execution of '$$(PRECHECK_COMMAND_$(1))' command failed. Is $(1) installed?"; \ diff --git a/NOTICE b/NOTICE index 2e141355cf..e36e57e527 100644 --- a/NOTICE +++ b/NOTICE @@ -2,13 +2,13 @@ The Prometheus systems and service monitoring server Copyright 2012-2015 The Prometheus Authors This product includes software developed at -SoundCloud Ltd. (http://soundcloud.com/). +SoundCloud Ltd. (https://soundcloud.com/). The following components are included in this product: Bootstrap -http://getbootstrap.com +https://getbootstrap.com Copyright 2011-2014 Twitter, Inc. Licensed under the MIT License @@ -52,7 +52,7 @@ Copyright jQuery Foundation and other contributors Licensed under the MIT License Protocol Buffers for Go with Gadgets -http://github.com/gogo/protobuf/ +https://github.com/gogo/protobuf/ Copyright (c) 2013, The GoGo Authors. See source code for license details. @@ -67,7 +67,7 @@ Copyright 2013 Matt T. Proud Licensed under the Apache License, Version 2.0 DNS library in Go -http://miek.nl/posts/2014/Aug/16/go-dns-package/ +https://miek.nl/2014/august/16/go-dns-package/ Copyright 2009 The Go Authors, 2011 Miek Gieben See https://github.com/miekg/dns/blob/master/LICENSE for license details. diff --git a/README.md b/README.md index 326244c2f1..908a2cccfb 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ -# Prometheus [![Build Status](https://travis-ci.org/prometheus/prometheus.svg)][travis] +# Prometheus +[![Build Status](https://travis-ci.org/prometheus/prometheus.svg)][travis] [![CircleCI](https://circleci.com/gh/prometheus/prometheus/tree/master.svg?style=shield)][circleci] [![Docker Repository on Quay](https://quay.io/repository/prometheus/prometheus/status)][quay] [![Docker Pulls](https://img.shields.io/docker/pulls/prom/prometheus.svg?maxAge=604800)][hub] @@ -57,7 +58,7 @@ Prometheus will now be reachable at http://localhost:9090/. ### Building from source To build Prometheus from the source code yourself you need to have a working -Go environment with [version 1.11 or greater installed](http://golang.org/doc/install). +Go environment with [version 1.12 or greater installed](https://golang.org/doc/install). You can directly use the `go` tool to download and install the `prometheus` and `promtool` binaries into your `GOPATH`: @@ -86,7 +87,7 @@ The Makefile provides several targets: ## More information - * The source code is periodically indexed: [Prometheus Core](http://godoc.org/github.com/prometheus/prometheus). + * The source code is periodically indexed: [Prometheus Core](https://godoc.org/github.com/prometheus/prometheus). * You will find a Travis CI configuration in `.travis.yml`. * See the [Community page](https://prometheus.io/community) for how to reach the Prometheus developers and users on various communication channels. diff --git a/RELEASE.md b/RELEASE.md index 0ff5131a78..2eaf656c84 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,28 +1,35 @@ # Releases -This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release schepherds. Release shepards are chosen on a voluntary basis. +This page describes the release process and the currently planned schedule for upcoming releases as well as the respective release shepherd. Release shepherds are chosen on a voluntary basis. ## Release schedule Release cadence of first pre-releases being cut is 6 weeks. -| release series | date of first pre-release (year-month-day) | release shepard | +| release series | date of first pre-release (year-month-day) | release shepherd | |----------------|--------------------------------------------|---------------------------------------------| | v2.4 | 2018-09-06 | Goutham Veeramachaneni (GitHub: @gouthamve) | | v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) | | v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) | -| v2.7 | 2019-01-16 | **searching for volunteer** | +| v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) | +| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) | +| v2.9 | 2019-04-10 | Brian Brazil (GitHub: @brian-brazil) | +| v2.10 | 2019-05-22 | Björn Rabenstein (GitHub: @beorn7) | +| v2.11 | 2019-07-03 | Frederic Branczyk (GitHub: @brancz) | +| v2.12 | 2019-08-14 | Julius Volz (GitHub: @juliusv) | +| v2.13 | 2019-09-25 | Krasi Georgiev (GitHub: @krasi-georgiev) | +| v2.14 | 2019-11-06 | **searching for volunteer** | If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice. -## Release shepard responsibilities +## Release shepherd responsibilities -The release shepard is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process starts with the initial pre-release. +The release shepherd is responsible for the entire release series of a minor release, meaning all pre- and patch releases of a minor release. The process formally starts with the initial pre-release, but some preparations should be done a few days in advance. -* The first pre-release is scheduled according to the above schedule. -* With the pre-release the release shepard is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release. -* Once a pre-release has been released, the `master` branch of the repository is frozen for any feature work, only critical bug fix work concerning the minor release is merged. -* Pre-releases are done from `master`, after pre-releases are promoted to the stable release a `release-major.minor` branch is created. +* We aim to keep the master branch in a working state at all times. In principle, it should be possible to cut a release from master at any time. In practice, things might not work out as nicely. A few days before the pre-release is scheduled, the shepherd should check the state of master. Following their best judgement, the shepherd should try to expedite bug fixes that are still in progress but should make it into the release. On the other hand, the shepherd may hold back merging last-minute invasive and risky changes that are better suited for the next minor release. +* On the date listed in the table above, the release shepherd cuts the first pre-release (using the suffix `-rc.0`) and creates a new branch called `release-.` starting at the commit tagged for the pre-release. In general, a pre-release is considered a release candidate (that's what `rc` stands for) and should therefore not contain any known bugs that are planned to be fixed in the final release. +* With the pre-release, the release shepherd is responsible for running and monitoring a benchmark run of the pre-release for 3 days, after which, if successful, the pre-release is promoted to a stable release. +* If regressions or critical bugs are detected, they need to get fixed before cutting a new pre-release (called `-rc.1`, `-rc.2`, etc.). See the next section for details on cutting an individual release. @@ -32,13 +39,13 @@ These instructions are currently valid for the Prometheus server, i.e. the [prom ### Branch management and versioning strategy -We use [Semantic Versioning](http://semver.org/). +We use [Semantic Versioning](https://semver.org/). We maintain a separate branch for each minor release, named `release-.`, e.g. `release-1.1`, `release-2.0`. -The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. Whether merging master back into a release branch makes more sense is left up to the shepard's judgement. +The usual flow is to merge new features and changes into the master branch and to merge bug fixes into the latest release branch. Bug fixes are then merged into master from the latest release branch. The master branch should always contain all commits from the latest release branch. As long as master hasn't deviated from the release branch, new commits can also go to master, followed by merging master back into the release branch. -If a bug fix got accidentally merged into master, cherry-pick commits have to be created in the latest release branch, which then have to be merged back into master. Try to avoid that situation. +If a bug fix got accidentally merged into master after non-bug-fix changes in master, the bug-fix commits have to be cherry-picked into the release branch, which then have to be merged back into master. Try to avoid that situation. Maintaining the release branches for older minor releases happens on a best effort basis. @@ -88,13 +95,13 @@ If the release has happened in the latest release branch, merge the changes into To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration. -Once the binaries have been uploaded, announce the release on `prometheus-users@googlegroups.com`. Start the subject with `[ANN]`. Check out previous announcement mails for inspiration. +Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration. ### Pre-releases The following changes to the above procedures apply: -* In line with [Semantic Versioning](http://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.). +* In line with [Semantic Versioning](https://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.). * Tick the _This is a pre-release_ box when drafting the release in the Github UI. * Still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update. diff --git a/VERSION b/VERSION index 437459cd94..10c2c0c3d6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.0 +2.10.0 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index f4856fabb8..5529f0dcb9 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -26,6 +26,7 @@ import ( "os" "os/signal" "path/filepath" + "regexp" "runtime" "strings" "sync" @@ -34,22 +35,23 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - "github.com/oklog/oklog/pkg/group" + conntrack "github.com/mwitkow/go-conntrack" + "github.com/oklog/run" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/prometheus/common/version" - prom_runtime "github.com/prometheus/prometheus/pkg/runtime" - "gopkg.in/alecthomas/kingpin.v2" - k8s_runtime "k8s.io/apimachinery/pkg/util/runtime" - - "github.com/mwitkow/go-conntrack" "github.com/prometheus/common/promlog" + "github.com/prometheus/common/version" + kingpin "gopkg.in/alecthomas/kingpin.v2" + "k8s.io/klog" + promlogflag "github.com/prometheus/common/promlog/flag" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/discovery" sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/notifier" + "github.com/prometheus/prometheus/pkg/relabel" + prom_runtime "github.com/prometheus/prometheus/pkg/runtime" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/scrape" @@ -69,10 +71,19 @@ var ( Name: "prometheus_config_last_reload_success_timestamp_seconds", Help: "Timestamp of the last successful configuration reload.", }) + + defaultRetentionString = "15d" + defaultRetentionDuration model.Duration ) func init() { prometheus.MustRegister(version.NewCollector("prometheus")) + + var err error + defaultRetentionDuration, err = model.ParseDuration(defaultRetentionString) + if err != nil { + panic(err) + } } func main() { @@ -81,6 +92,11 @@ func main() { runtime.SetMutexProfileFraction(20) } + var ( + oldFlagRetentionDuration model.Duration + newFlagRetentionDuration model.Duration + ) + cfg := struct { configFile string @@ -99,13 +115,15 @@ func main() { queryMaxSamples int RemoteFlushDeadline model.Duration - prometheusURL string + prometheusURL string + corsRegexString string - logLevel promlog.AllowedLevel + promlogConfig promlog.Config }{ notifier: notifier.Options{ Registerer: prometheus.DefaultRegisterer, }, + promlogConfig: promlog.Config{}, } a := kingpin.New(filepath.Base(os.Args[0]), "The Prometheus monitoring server") @@ -150,6 +168,12 @@ func main() { a.Flag("web.console.libraries", "Path to the console library directory."). Default("console_libraries").StringVar(&cfg.web.ConsoleLibrariesPath) + a.Flag("web.page-title", "Document title of Prometheus instance."). + Default("Prometheus Time Series Collection and Processing Server").StringVar(&cfg.web.PageTitle) + + a.Flag("web.cors.origin", `Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1|domain2)\.com'`). + Default(".*").StringVar(&cfg.corsRegexString) + a.Flag("storage.tsdb.path", "Base path for metrics storage."). Default("data/").StringVar(&cfg.localStoragePath) @@ -157,15 +181,28 @@ func main() { Hidden().Default("2h").SetValue(&cfg.tsdb.MinBlockDuration) a.Flag("storage.tsdb.max-block-duration", - "Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period)."). + "Maximum duration compacted blocks may span. For use in testing. (Defaults to 10% of the retention period.)"). Hidden().PlaceHolder("").SetValue(&cfg.tsdb.MaxBlockDuration) - a.Flag("storage.tsdb.retention", "How long to retain samples in storage."). - Default("15d").SetValue(&cfg.tsdb.Retention) + a.Flag("storage.tsdb.wal-segment-size", + "Size at which to split the tsdb WAL segment files. Example: 100MB"). + Hidden().PlaceHolder("").BytesVar(&cfg.tsdb.WALSegmentSize) + + a.Flag("storage.tsdb.retention", "[DEPRECATED] How long to retain samples in storage. This flag has been deprecated, use \"storage.tsdb.retention.time\" instead."). + SetValue(&oldFlagRetentionDuration) + + a.Flag("storage.tsdb.retention.time", "How long to retain samples in storage. When this flag is set it overrides \"storage.tsdb.retention\". If neither this flag nor \"storage.tsdb.retention\" nor \"storage.tsdb.retention.size\" is set, the retention time defaults to "+defaultRetentionString+"."). + SetValue(&newFlagRetentionDuration) + + a.Flag("storage.tsdb.retention.size", "[EXPERIMENTAL] Maximum number of bytes that can be stored for blocks. Units supported: KB, MB, GB, TB, PB. This flag is experimental and can be changed in future releases."). + BytesVar(&cfg.tsdb.MaxBytes) a.Flag("storage.tsdb.no-lockfile", "Do not create lockfile in data directory."). Default("false").BoolVar(&cfg.tsdb.NoLockfile) + a.Flag("storage.tsdb.allow-overlapping-blocks", "[EXPERIMENTAL] Allow overlapping blocks, which in turn enables vertical compaction and vertical query merge."). + Default("false").BoolVar(&cfg.tsdb.AllowOverlappingBlocks) + a.Flag("storage.remote.flush-deadline", "How long to wait flushing sample on shutdown or config reload."). Default("1m").PlaceHolder("").SetValue(&cfg.RemoteFlushDeadline) @@ -175,10 +212,10 @@ func main() { a.Flag("storage.remote.read-concurrent-limit", "Maximum number of concurrent remote read calls. 0 means no limit."). Default("10").IntVar(&cfg.web.RemoteReadConcurrencyLimit) - a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring 'for' state of alert."). + a.Flag("rules.alert.for-outage-tolerance", "Max time to tolerate prometheus outage for restoring \"for\" state of alert."). Default("1h").SetValue(&cfg.outageTolerance) - a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored 'for' state. This is maintained only for alerts with configured 'for' time greater than grace period."). + a.Flag("rules.alert.for-grace-period", "Minimum duration between alert and restored \"for\" state. This is maintained only for alerts with configured \"for\" time greater than grace period."). Default("10m").SetValue(&cfg.forGracePeriod) a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). @@ -190,7 +227,7 @@ func main() { a.Flag("alertmanager.timeout", "Timeout for sending alerts to Alertmanager."). Default("10s").SetValue(&cfg.notifierTimeout) - a.Flag("query.lookback-delta", "The delta difference allowed for retrieving metrics during expression evaluations."). + a.Flag("query.lookback-delta", "The maximum lookback duration for retrieving metrics during expression evaluations."). Default("5m").SetValue(&cfg.lookbackDelta) a.Flag("query.timeout", "Maximum time a query may take before being aborted."). @@ -198,10 +235,11 @@ func main() { a.Flag("query.max-concurrency", "Maximum number of queries executed concurrently."). Default("20").IntVar(&cfg.queryConcurrency) - a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they would load more samples than this into memory, so this also limits the number of samples a query can return."). + + a.Flag("query.max-samples", "Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return."). Default("50000000").IntVar(&cfg.queryMaxSamples) - promlogflag.AddFlags(a, &cfg.logLevel) + promlogflag.AddFlags(a, &cfg.promlogConfig) _, err := a.Parse(os.Args[1:]) if err != nil { @@ -210,12 +248,20 @@ func main() { os.Exit(2) } + logger := promlog.New(&cfg.promlogConfig) + cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress) if err != nil { fmt.Fprintln(os.Stderr, errors.Wrapf(err, "parse external URL %q", cfg.prometheusURL)) os.Exit(2) } + cfg.web.CORSOrigin, err = compileCORSRegexString(cfg.corsRegexString) + if err != nil { + fmt.Fprintln(os.Stderr, errors.Wrapf(err, "could not compile CORS regex string %q", cfg.corsRegexString)) + os.Exit(2) + } + cfg.web.ReadTimeout = time.Duration(cfg.webTimeout) // Default -web.route-prefix to path of -web.external-url. if cfg.web.RoutePrefix == "" { @@ -224,22 +270,54 @@ func main() { // RoutePrefix must always be at least '/'. cfg.web.RoutePrefix = "/" + strings.Trim(cfg.web.RoutePrefix, "/") - if cfg.tsdb.MaxBlockDuration == 0 { - cfg.tsdb.MaxBlockDuration = cfg.tsdb.Retention / 10 + { // Time retention settings. + if oldFlagRetentionDuration != 0 { + level.Warn(logger).Log("deprecation_notice", "'storage.tsdb.retention' flag is deprecated use 'storage.tsdb.retention.time' instead.") + cfg.tsdb.RetentionDuration = oldFlagRetentionDuration + } + + // When the new flag is set it takes precedence. + if newFlagRetentionDuration != 0 { + cfg.tsdb.RetentionDuration = newFlagRetentionDuration + } + + if cfg.tsdb.RetentionDuration == 0 && cfg.tsdb.MaxBytes == 0 { + cfg.tsdb.RetentionDuration = defaultRetentionDuration + level.Info(logger).Log("msg", "no time or size retention was set so using the default time retention", "duration", defaultRetentionDuration) + } + + // Check for overflows. This limits our max retention to 100y. + if cfg.tsdb.RetentionDuration < 0 { + y, err := model.ParseDuration("100y") + if err != nil { + panic(err) + } + cfg.tsdb.RetentionDuration = y + level.Warn(logger).Log("msg", "time retention value is too high. Limiting to: "+y.String()) + } + } + + { // Max block size settings. + if cfg.tsdb.MaxBlockDuration == 0 { + maxBlockDuration, err := model.ParseDuration("31d") + if err != nil { + panic(err) + } + // When the time retention is set and not too big use to define the max block duration. + if cfg.tsdb.RetentionDuration != 0 && cfg.tsdb.RetentionDuration/10 < maxBlockDuration { + maxBlockDuration = cfg.tsdb.RetentionDuration / 10 + } + + cfg.tsdb.MaxBlockDuration = maxBlockDuration + } } promql.LookbackDelta = time.Duration(cfg.lookbackDelta) + promql.SetDefaultEvaluationInterval(time.Duration(config.DefaultGlobalConfig.EvaluationInterval)) - logger := promlog.New(cfg.logLevel) - - // XXX(fabxc): Kubernetes does background logging which we can only customize by modifying - // a global variable. - // Ultimately, here is the best place to set it. - k8s_runtime.ErrorHandlers = []func(error){ - func(err error) { - level.Error(log.With(logger, "component", "k8s_client_runtime")).Log("err", err) - }, - } + // Above level 6, the k8s client would log bearer tokens in clear-text. + klog.ClampLevel(6) + klog.SetLogger(log.With(logger, "component", "k8s_client_runtime")) level.Info(logger).Log("msg", "Starting Prometheus", "version", version.Info()) level.Info(logger).Log("build_context", version.BuildContext()) @@ -249,7 +327,7 @@ func main() { var ( localStorage = &tsdb.ReadyStorage{} - remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), localStorage.StartTime, time.Duration(cfg.RemoteFlushDeadline)) + remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, cfg.localStoragePath, time.Duration(cfg.RemoteFlushDeadline)) fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage) ) @@ -257,7 +335,7 @@ func main() { ctxWeb, cancelWeb = context.WithCancel(context.Background()) ctxRule = context.Background() - notifier = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) + notifierManager = notifier.NewManager(&cfg.notifier, log.With(logger, "component", "notifier")) ctxScrape, cancelScrape = context.WithCancel(context.Background()) discoveryManagerScrape = discovery.NewManager(ctxScrape, log.With(logger, "component", "discovery manager scrape"), discovery.Name("scrape")) @@ -280,7 +358,7 @@ func main() { Appendable: fanoutStorage, TSDB: localStorage, QueryFunc: rules.EngineQueryFunc(queryEngine, fanoutStorage), - NotifyFunc: sendAlerts(notifier, cfg.web.ExternalURL.String()), + NotifyFunc: sendAlerts(notifierManager, cfg.web.ExternalURL.String()), Context: ctxRule, ExternalURL: cfg.web.ExternalURL, Registerer: prometheus.DefaultRegisterer, @@ -297,7 +375,8 @@ func main() { cfg.web.QueryEngine = queryEngine cfg.web.ScrapeManager = scrapeManager cfg.web.RuleManager = ruleManager - cfg.web.Notifier = notifier + cfg.web.Notifier = notifierManager + cfg.web.TSDBCfg = cfg.tsdb cfg.web.Version = &web.PrometheusVersion{ Version: version.Version, @@ -333,7 +412,6 @@ func main() { webHandler.ApplyConfig, // The Scrape and notifier managers need to reload before the Discovery manager as // they need to read the most updated config when receiving the new targets list. - notifier.ApplyConfig, scrapeManager.ApplyConfig, func(cfg *config.Config) error { c := make(map[string]sd_config.ServiceDiscoveryConfig) @@ -342,6 +420,7 @@ func main() { } return discoveryManagerScrape.ApplyConfig(c) }, + notifierManager.ApplyConfig, func(cfg *config.Config) error { c := make(map[string]sd_config.ServiceDiscoveryConfig) for _, v := range cfg.AlertingConfig.AlertmanagerConfigs { @@ -355,17 +434,21 @@ func main() { return discoveryManagerNotify.ApplyConfig(c) }, func(cfg *config.Config) error { - // Get all rule files matching the configuration oaths. + // Get all rule files matching the configuration paths. var files []string for _, pat := range cfg.RuleFiles { fs, err := filepath.Glob(pat) if err != nil { // The only error can be a bad pattern. - return fmt.Errorf("error retrieving rule files for %s: %s", pat, err) + return errors.Wrapf(err, "error retrieving rule files for %s", pat) } files = append(files, fs...) } - return ruleManager.Update(time.Duration(cfg.GlobalConfig.EvaluationInterval), files) + return ruleManager.Update( + time.Duration(cfg.GlobalConfig.EvaluationInterval), + files, + cfg.GlobalConfig.ExternalLabels, + ) }, } @@ -392,7 +475,7 @@ func main() { }) } - var g group.Group + var g run.Group { // Termination handler. term := make(chan os.Signal, 1) @@ -522,7 +605,7 @@ func main() { } if err := reloadConfig(cfg.configFile, logger, reloaders...); err != nil { - return fmt.Errorf("error loading config from %q: %s", cfg.configFile, err) + return errors.Wrapf(err, "error loading config from %q", cfg.configFile) } reloadReady.Close() @@ -560,6 +643,11 @@ func main() { g.Add( func() error { level.Info(logger).Log("msg", "Starting TSDB ...") + if cfg.tsdb.WALSegmentSize != 0 { + if cfg.tsdb.WALSegmentSize < 10*1024*1024 || cfg.tsdb.WALSegmentSize > 256*1024*1024 { + return errors.New("flag 'storage.tsdb.wal-segment-size' must be set between 10MB and 256MB") + } + } db, err := tsdb.Open( cfg.localStoragePath, log.With(logger, "component", "tsdb"), @@ -567,9 +655,19 @@ func main() { &cfg.tsdb, ) if err != nil { - return fmt.Errorf("opening storage failed: %s", err) + return errors.Wrapf(err, "opening storage failed") } + level.Info(logger).Log("fs_type", prom_runtime.Statfs(cfg.localStoragePath)) level.Info(logger).Log("msg", "TSDB started") + level.Debug(logger).Log("msg", "TSDB options", + "MinBlockDuration", cfg.tsdb.MinBlockDuration, + "MaxBlockDuration", cfg.tsdb.MaxBlockDuration, + "MaxBytes", cfg.tsdb.MaxBytes, + "NoLockfile", cfg.tsdb.NoLockfile, + "RetentionDuration", cfg.tsdb.RetentionDuration, + "WALSegmentSize", cfg.tsdb.WALSegmentSize, + "AllowOverlappingBlocks", cfg.tsdb.AllowOverlappingBlocks, + ) startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000) localStorage.Set(db, startTimeMargin) @@ -590,7 +688,7 @@ func main() { g.Add( func() error { if err := webHandler.Run(ctxWeb); err != nil { - return fmt.Errorf("error starting web server: %s", err) + return errors.Wrapf(err, "error starting web server") } return nil }, @@ -612,12 +710,12 @@ func main() { // so we wait until the config is fully loaded. <-reloadReady.C - notifier.Run(discoveryManagerNotify.SyncCh()) + notifierManager.Run(discoveryManagerNotify.SyncCh()) level.Info(logger).Log("msg", "Notifier manager stopped") return nil }, func(err error) { - notifier.Stop() + notifierManager.Stop() }, ) } @@ -642,7 +740,7 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config conf, err := config.LoadFile(filename) if err != nil { - return fmt.Errorf("couldn't load configuration (--config.file=%q): %v", filename, err) + return errors.Wrapf(err, "couldn't load configuration (--config.file=%q)", filename) } failed := false @@ -653,8 +751,10 @@ func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config } } if failed { - return fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) + return errors.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename) } + + promql.SetDefaultEvaluationInterval(time.Duration(conf.GlobalConfig.EvaluationInterval)) level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename) return nil } @@ -664,6 +764,15 @@ func startsOrEndsWithQuote(s string) bool { strings.HasSuffix(s, "\"") || strings.HasSuffix(s, "'") } +// compileCORSRegexString compiles given string and adds anchors +func compileCORSRegexString(s string) (*regexp.Regexp, error) { + r, err := relabel.NewRegexp(s) + if err != nil { + return nil, err + } + return r.Regexp, nil +} + // computeExternalURL computes a sanitized external URL from a raw input. It infers unset // URL parts from the OS and the given listen address. func computeExternalURL(u, listenAddr string) (*url.URL, error) { @@ -680,7 +789,7 @@ func computeExternalURL(u, listenAddr string) (*url.URL, error) { } if startsOrEndsWithQuote(u) { - return nil, fmt.Errorf("URL must not begin or end with quotes") + return nil, errors.New("URL must not begin or end with quotes") } eu, err := url.Parse(u) @@ -697,8 +806,12 @@ func computeExternalURL(u, listenAddr string) (*url.URL, error) { return eu, nil } +type sender interface { + Send(alerts ...*notifier.Alert) +} + // sendAlerts implements the rules.NotifyFunc for a Notifier. -func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc { +func sendAlerts(s sender, externalURL string) rules.NotifyFunc { return func(ctx context.Context, expr string, alerts ...*rules.Alert) { var res []*notifier.Alert @@ -718,7 +831,7 @@ func sendAlerts(n *notifier.Manager, externalURL string) rules.NotifyFunc { } if len(alerts) > 0 { - n.Send(res...) + s.Send(res...) } } } diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index 605ba816eb..e82e55bc32 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -14,6 +14,7 @@ package main import ( + "context" "flag" "fmt" "net/http" @@ -24,6 +25,9 @@ import ( "testing" "time" + "github.com/prometheus/prometheus/notifier" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/util/testutil" ) @@ -159,6 +163,10 @@ func TestComputeExternalURL(t *testing.T) { // Let's provide an invalid configuration file and verify the exit status indicates the error. func TestFailedStartupExitCode(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + fakeInputFile := "fake-input-file" expectedExitStatus := 1 @@ -173,3 +181,106 @@ func TestFailedStartupExitCode(t *testing.T) { t.Errorf("unable to retrieve the exit status for prometheus: %v", err) } } + +type senderFunc func(alerts ...*notifier.Alert) + +func (s senderFunc) Send(alerts ...*notifier.Alert) { + s(alerts...) +} + +func TestSendAlerts(t *testing.T) { + testCases := []struct { + in []*rules.Alert + exp []*notifier.Alert + }{ + { + in: []*rules.Alert{ + { + Labels: []labels.Label{{Name: "l1", Value: "v1"}}, + Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, + ActiveAt: time.Unix(1, 0), + FiredAt: time.Unix(2, 0), + ValidUntil: time.Unix(3, 0), + }, + }, + exp: []*notifier.Alert{ + { + Labels: []labels.Label{{Name: "l1", Value: "v1"}}, + Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, + StartsAt: time.Unix(2, 0), + EndsAt: time.Unix(3, 0), + GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", + }, + }, + }, + { + in: []*rules.Alert{ + { + Labels: []labels.Label{{Name: "l1", Value: "v1"}}, + Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, + ActiveAt: time.Unix(1, 0), + FiredAt: time.Unix(2, 0), + ResolvedAt: time.Unix(4, 0), + }, + }, + exp: []*notifier.Alert{ + { + Labels: []labels.Label{{Name: "l1", Value: "v1"}}, + Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, + StartsAt: time.Unix(2, 0), + EndsAt: time.Unix(4, 0), + GeneratorURL: "http://localhost:9090/graph?g0.expr=up&g0.tab=1", + }, + }, + }, + { + in: []*rules.Alert{}, + }, + } + + for i, tc := range testCases { + tc := tc + t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + senderFunc := senderFunc(func(alerts ...*notifier.Alert) { + if len(tc.in) == 0 { + t.Fatalf("sender called with 0 alert") + } + testutil.Equals(t, tc.exp, alerts) + }) + sendAlerts(senderFunc, "http://localhost:9090")(context.TODO(), "up", tc.in...) + }) + } +} + +func TestWALSegmentSizeBounds(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + for size, expectedExitStatus := range map[string]int{"9MB": 1, "257MB": 1, "10": 2, "1GB": 1, "12MB": 0} { + prom := exec.Command(promPath, "--storage.tsdb.wal-segment-size="+size, "--config.file="+promConfig) + err := prom.Start() + testutil.Ok(t, err) + + if expectedExitStatus == 0 { + done := make(chan error, 1) + go func() { done <- prom.Wait() }() + select { + case err := <-done: + t.Errorf("prometheus should be still running: %v", err) + case <-time.After(5 * time.Second): + prom.Process.Signal(os.Interrupt) + } + continue + } + + err = prom.Wait() + testutil.NotOk(t, err, "") + if exitError, ok := err.(*exec.ExitError); ok { + status := exitError.Sys().(syscall.WaitStatus) + testutil.Equals(t, expectedExitStatus, status.ExitStatus()) + } else { + t.Errorf("unable to retrieve the exit status for prometheus: %v", err) + } + } +} diff --git a/cmd/promtool/archive.go b/cmd/promtool/archive.go index b3fc1ac3ee..783d8294f4 100644 --- a/cmd/promtool/archive.go +++ b/cmd/promtool/archive.go @@ -16,18 +16,13 @@ package main import ( "archive/tar" "compress/gzip" - "fmt" "os" + + "github.com/pkg/errors" ) const filePerm = 0644 -type archiver interface { - write(filename string, b []byte) error - close() error - filename() string -} - type tarGzFileWriter struct { tarWriter *tar.Writer gzWriter *gzip.Writer @@ -37,7 +32,7 @@ type tarGzFileWriter struct { func newTarGzFileWriter(archiveName string) (*tarGzFileWriter, error) { file, err := os.Create(archiveName) if err != nil { - return nil, fmt.Errorf("error creating archive %q: %s", archiveName, err) + return nil, errors.Wrapf(err, "error creating archive %q", archiveName) } gzw := gzip.NewWriter(file) tw := tar.NewWriter(gzw) @@ -72,7 +67,3 @@ func (w *tarGzFileWriter) write(filename string, b []byte) error { } return nil } - -func (w *tarGzFileWriter) filename() string { - return w.file.Name() -} diff --git a/cmd/promtool/debug.go b/cmd/promtool/debug.go index 6704fbb573..280a0d44b2 100644 --- a/cmd/promtool/debug.go +++ b/cmd/promtool/debug.go @@ -14,112 +14,56 @@ package main import ( - "bytes" "fmt" + "io/ioutil" "net/http" - "os" - "github.com/google/pprof/profile" + "github.com/pkg/errors" ) type debugWriterConfig struct { serverURL string tarballName string - pathToFileName map[string]string - postProcess func(b []byte) ([]byte, error) + endPointGroups []endpointsGroup } -type debugWriter struct { - archiver - httpClient - requestToFile map[*http.Request]string - postProcess func(b []byte) ([]byte, error) -} - -func newDebugWriter(cfg debugWriterConfig) (*debugWriter, error) { - client, err := newPrometheusHTTPClient(cfg.serverURL) - if err != nil { - return nil, err - } +func debugWrite(cfg debugWriterConfig) error { archiver, err := newTarGzFileWriter(cfg.tarballName) if err != nil { - return nil, err + return errors.Wrap(err, "error creating a new archiver") } - reqs := make(map[*http.Request]string) - for path, filename := range cfg.pathToFileName { - req, err := http.NewRequest(http.MethodGet, client.urlJoin(path), nil) - if err != nil { - return nil, err - } - reqs[req] = filename - } - return &debugWriter{ - archiver, - client, - reqs, - cfg.postProcess, - }, nil -} -func (w *debugWriter) Write() int { - for req, filename := range w.requestToFile { - _, body, err := w.do(req) - if err != nil { - fmt.Fprintln(os.Stderr, "error executing HTTP request:", err) - return 1 + for _, endPointGroup := range cfg.endPointGroups { + for url, filename := range endPointGroup.urlToFilename { + url := cfg.serverURL + url + fmt.Println("collecting:", url) + res, err := http.Get(url) + if err != nil { + return errors.Wrap(err, "error executing HTTP request") + } + body, err := ioutil.ReadAll(res.Body) + res.Body.Close() + if err != nil { + return errors.Wrap(err, "error reading the response body") + } + + if endPointGroup.postProcess != nil { + body, err = endPointGroup.postProcess(body) + if err != nil { + return errors.Wrap(err, "error post-processing HTTP response body") + } + } + if err := archiver.write(filename, body); err != nil { + return errors.Wrap(err, "error writing into the archive") + } } - buf, err := w.postProcess(body) - if err != nil { - fmt.Fprintln(os.Stderr, "error post-processing HTTP response body:", err) - return 1 - } - - if err := w.archiver.write(filename, buf); err != nil { - fmt.Fprintln(os.Stderr, "error writing into archive:", err) - return 1 - } } - if err := w.close(); err != nil { - fmt.Fprintln(os.Stderr, "error closing archiver:", err) - return 1 + if err := archiver.close(); err != nil { + return errors.Wrap(err, "error closing archive writer") } - fmt.Printf("Compiling debug information complete, all files written in %q.\n", w.filename()) - return 0 -} - -func validate(b []byte) (*profile.Profile, error) { - p, err := profile.Parse(bytes.NewReader(b)) - if err != nil { - return nil, err - } - return p, nil -} - -var pprofPostProcess = func(b []byte) ([]byte, error) { - p, err := validate(b) - if err != nil { - return nil, err - } - var buf bytes.Buffer - if err := p.WriteUncompressed(&buf); err != nil { - return nil, err - } - fmt.Println(p.String()) - return buf.Bytes(), nil -} - -var metricsPostProcess = func(b []byte) ([]byte, error) { - fmt.Println(string(b)) - return b, nil -} - -var allPostProcess = func(b []byte) ([]byte, error) { - _, err := validate(b) - if err != nil { - return metricsPostProcess(b) - } - return pprofPostProcess(b) + fmt.Printf("Compiling debug information complete, all files written in %q.\n", cfg.tarballName) + return nil } diff --git a/cmd/promtool/http.go b/cmd/promtool/http.go deleted file mode 100644 index 70cee13cec..0000000000 --- a/cmd/promtool/http.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2015 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "context" - "fmt" - "net/http" - "time" - - "github.com/prometheus/client_golang/api" -) - -const defaultTimeout = 2 * time.Minute - -type httpClient interface { - do(req *http.Request) (*http.Response, []byte, error) - urlJoin(path string) string -} - -type prometheusHTTPClient struct { - requestTimeout time.Duration - httpClient api.Client -} - -func newPrometheusHTTPClient(serverURL string) (*prometheusHTTPClient, error) { - hc, err := api.NewClient(api.Config{ - Address: serverURL, - }) - if err != nil { - return nil, fmt.Errorf("error creating HTTP client: %s", err) - } - return &prometheusHTTPClient{ - requestTimeout: defaultTimeout, - httpClient: hc, - }, nil -} - -func (c *prometheusHTTPClient) do(req *http.Request) (*http.Response, []byte, error) { - ctx, cancel := context.WithTimeout(context.Background(), c.requestTimeout) - defer cancel() - return c.httpClient.Do(ctx, req) -} - -func (c *prometheusHTTPClient) urlJoin(path string) string { - return c.httpClient.URL(path, nil).String() -} diff --git a/cmd/promtool/http_test.go b/cmd/promtool/http_test.go deleted file mode 100644 index b9783daaf5..0000000000 --- a/cmd/promtool/http_test.go +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2015 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import "testing" - -func TestURLJoin(t *testing.T) { - - testCases := []struct { - inputHost string - inputPath string - expected string - }{ - {"http://host", "path", "http://host/path"}, - {"http://host", "path/", "http://host/path"}, - {"http://host", "/path", "http://host/path"}, - {"http://host", "/path/", "http://host/path"}, - - {"http://host/", "path", "http://host/path"}, - {"http://host/", "path/", "http://host/path"}, - {"http://host/", "/path", "http://host/path"}, - {"http://host/", "/path/", "http://host/path"}, - - {"https://host", "path", "https://host/path"}, - {"https://host", "path/", "https://host/path"}, - {"https://host", "/path", "https://host/path"}, - {"https://host", "/path/", "https://host/path"}, - - {"https://host/", "path", "https://host/path"}, - {"https://host/", "path/", "https://host/path"}, - {"https://host/", "/path", "https://host/path"}, - {"https://host/", "/path/", "https://host/path"}, - } - for i, c := range testCases { - client, err := newPrometheusHTTPClient(c.inputHost) - if err != nil { - panic(err) - } - actual := client.urlJoin(c.inputPath) - if actual != c.expected { - t.Errorf("Error on case %d: %v(actual) != %v(expected)", i, actual, c.expected) - } - t.Logf("Case %d: %v(actual) == %v(expected)", i, actual, c.expected) - } -} diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 6033219933..51d5a6ca62 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -14,6 +14,7 @@ package main import ( + "bytes" "context" "encoding/json" "fmt" @@ -25,13 +26,15 @@ import ( "strings" "time" - "gopkg.in/alecthomas/kingpin.v2" - + "github.com/google/pprof/profile" + "github.com/pkg/errors" "github.com/prometheus/client_golang/api" - "github.com/prometheus/client_golang/api/prometheus/v1" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" + kingpin "gopkg.in/alecthomas/kingpin.v2" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/pkg/rulefmt" "github.com/prometheus/prometheus/util/promlint" @@ -199,10 +202,10 @@ func checkConfig(filename string) ([]string, error) { // If an explicit file was given, error if it is not accessible. if !strings.Contains(rf, "*") { if len(rfs) == 0 { - return nil, fmt.Errorf("%q does not point to an existing file", rf) + return nil, errors.Errorf("%q does not point to an existing file", rf) } if err := checkFileExists(rfs[0]); err != nil { - return nil, fmt.Errorf("error checking rule file %q: %s", rfs[0], err) + return nil, errors.Wrapf(err, "error checking rule file %q", rfs[0]) } } ruleFiles = append(ruleFiles, rfs...) @@ -210,7 +213,7 @@ func checkConfig(filename string) ([]string, error) { for _, scfg := range cfg.ScrapeConfigs { if err := checkFileExists(scfg.HTTPClientConfig.BearerTokenFile); err != nil { - return nil, fmt.Errorf("error checking bearer token file %q: %s", scfg.HTTPClientConfig.BearerTokenFile, err) + return nil, errors.Wrapf(err, "error checking bearer token file %q", scfg.HTTPClientConfig.BearerTokenFile) } if err := checkTLSConfig(scfg.HTTPClientConfig.TLSConfig); err != nil { @@ -218,7 +221,7 @@ func checkConfig(filename string) ([]string, error) { } for _, kd := range scfg.ServiceDiscoveryConfig.KubernetesSDConfigs { - if err := checkTLSConfig(kd.TLSConfig); err != nil { + if err := checkTLSConfig(kd.HTTPClientConfig.TLSConfig); err != nil { return nil, err } } @@ -244,17 +247,17 @@ func checkConfig(filename string) ([]string, error) { func checkTLSConfig(tlsConfig config_util.TLSConfig) error { if err := checkFileExists(tlsConfig.CertFile); err != nil { - return fmt.Errorf("error checking client cert file %q: %s", tlsConfig.CertFile, err) + return errors.Wrapf(err, "error checking client cert file %q", tlsConfig.CertFile) } if err := checkFileExists(tlsConfig.KeyFile); err != nil { - return fmt.Errorf("error checking client key file %q: %s", tlsConfig.KeyFile, err) + return errors.Wrapf(err, "error checking client key file %q", tlsConfig.KeyFile) } if len(tlsConfig.CertFile) > 0 && len(tlsConfig.KeyFile) == 0 { - return fmt.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile) + return errors.Errorf("client cert file %q specified without client key file", tlsConfig.CertFile) } if len(tlsConfig.KeyFile) > 0 && len(tlsConfig.CertFile) == 0 { - return fmt.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile) + return errors.Errorf("client key file %q specified without client cert file", tlsConfig.KeyFile) } return nil @@ -507,64 +510,88 @@ func parseTime(s string) (time.Time, error) { if t, err := time.Parse(time.RFC3339Nano, s); err == nil { return t, nil } - return time.Time{}, fmt.Errorf("cannot parse %q to a valid timestamp", s) + return time.Time{}, errors.Errorf("cannot parse %q to a valid timestamp", s) } -func debugPprof(url string) int { - w, err := newDebugWriter(debugWriterConfig{ - serverURL: url, - tarballName: "debug.tar.gz", - pathToFileName: map[string]string{ - "/debug/pprof/block": "block.pb", - "/debug/pprof/goroutine": "goroutine.pb", - "/debug/pprof/heap": "heap.pb", - "/debug/pprof/mutex": "mutex.pb", - "/debug/pprof/threadcreate": "threadcreate.pb", +type endpointsGroup struct { + urlToFilename map[string]string + postProcess func(b []byte) ([]byte, error) +} + +var ( + pprofEndpoints = []endpointsGroup{ + { + urlToFilename: map[string]string{ + "/debug/pprof/profile?seconds=30": "cpu.pb", + "/debug/pprof/block": "block.pb", + "/debug/pprof/goroutine": "goroutine.pb", + "/debug/pprof/heap": "heap.pb", + "/debug/pprof/mutex": "mutex.pb", + "/debug/pprof/threadcreate": "threadcreate.pb", + }, + postProcess: func(b []byte) ([]byte, error) { + p, err := profile.Parse(bytes.NewReader(b)) + if err != nil { + return nil, err + } + var buf bytes.Buffer + if err := p.WriteUncompressed(&buf); err != nil { + return nil, errors.Wrap(err, "writing the profile to the buffer") + } + + return buf.Bytes(), nil + }, }, - postProcess: pprofPostProcess, - }) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating debug writer:", err) + { + urlToFilename: map[string]string{ + "/debug/pprof/trace?seconds=30": "trace.pb", + }, + }, + } + metricsEndpoints = []endpointsGroup{ + { + urlToFilename: map[string]string{ + "/metrics": "metrics.txt", + }, + }, + } + allEndpoints = append(pprofEndpoints, metricsEndpoints...) +) + +func debugPprof(url string) int { + if err := debugWrite(debugWriterConfig{ + serverURL: url, + tarballName: "debug.tar.gz", + endPointGroups: pprofEndpoints, + }); err != nil { + fmt.Fprintln(os.Stderr, "error completing debug command:", err) return 1 } - return w.Write() + return 0 } func debugMetrics(url string) int { - w, err := newDebugWriter(debugWriterConfig{ - serverURL: url, - tarballName: "debug.tar.gz", - pathToFileName: map[string]string{ - "/metrics": "metrics.txt", - }, - postProcess: metricsPostProcess, - }) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating debug writer:", err) + if err := debugWrite(debugWriterConfig{ + serverURL: url, + tarballName: "debug.tar.gz", + endPointGroups: metricsEndpoints, + }); err != nil { + fmt.Fprintln(os.Stderr, "error completing debug command:", err) return 1 } - return w.Write() + return 0 } func debugAll(url string) int { - w, err := newDebugWriter(debugWriterConfig{ - serverURL: url, - tarballName: "debug.tar.gz", - pathToFileName: map[string]string{ - "/debug/pprof/block": "block.pb", - "/debug/pprof/goroutine": "goroutine.pb", - "/debug/pprof/heap": "heap.pb", - "/debug/pprof/mutex": "mutex.pb", - "/debug/pprof/threadcreate": "threadcreate.pb", - "/metrics": "metrics.txt", - }, - postProcess: allPostProcess, - }) - if err != nil { - fmt.Fprintln(os.Stderr, "error creating debug writer:", err) + if err := debugWrite(debugWriterConfig{ + serverURL: url, + tarballName: "debug.tar.gz", + endPointGroups: allEndpoints, + }); err != nil { + fmt.Fprintln(os.Stderr, "error completing debug command:", err) return 1 } - return w.Write() + return 0 } type printer interface { @@ -583,7 +610,7 @@ func (p *promqlPrinter) printSeries(val []model.LabelSet) { fmt.Println(v) } } -func (j *promqlPrinter) printLabelValues(val model.LabelValues) { +func (p *promqlPrinter) printLabelValues(val model.LabelValues) { for _, v := range val { fmt.Println(v) } @@ -592,11 +619,14 @@ func (j *promqlPrinter) printLabelValues(val model.LabelValues) { type jsonPrinter struct{} func (j *jsonPrinter) printValue(v model.Value) { + //nolint:errcheck json.NewEncoder(os.Stdout).Encode(v) } func (j *jsonPrinter) printSeries(v []model.LabelSet) { + //nolint:errcheck json.NewEncoder(os.Stdout).Encode(v) } func (j *jsonPrinter) printLabelValues(v model.LabelValues) { + //nolint:errcheck json.NewEncoder(os.Stdout).Encode(v) } diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 8824fba7be..84ff006db4 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -17,26 +17,27 @@ import ( "fmt" "net/http" "net/http/httptest" - "net/url" "testing" "time" ) func TestQueryRange(t *testing.T) { - s, getURL := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`) + s, getRequest := mockServer(200, `{"status": "success", "data": {"resultType": "matrix", "result": []}}`) defer s.Close() p := &promqlPrinter{} exitCode := QueryRange(s.URL, "up", "0", "300", 0, p) expectedPath := "/api/v1/query_range" - if getURL().Path != expectedPath { - t.Errorf("unexpected URL path %s (wanted %s)", getURL().Path, expectedPath) + gotPath := getRequest().URL.Path + if gotPath != expectedPath { + t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) } - actual := getURL().Query().Get("query") + form := getRequest().Form + actual := form.Get("query") if actual != "up" { t.Errorf("unexpected value %s for query", actual) } - actual = getURL().Query().Get("step") + actual = form.Get("step") if actual != "1.000" { t.Errorf("unexpected value %s for step", actual) } @@ -45,14 +46,16 @@ func TestQueryRange(t *testing.T) { } exitCode = QueryRange(s.URL, "up", "0", "300", 10*time.Millisecond, p) - if getURL().Path != expectedPath { - t.Errorf("unexpected URL path %s (wanted %s)", getURL().Path, expectedPath) + gotPath = getRequest().URL.Path + if gotPath != expectedPath { + t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) } - actual = getURL().Query().Get("query") + form = getRequest().Form + actual = form.Get("query") if actual != "up" { t.Errorf("unexpected value %s for query", actual) } - actual = getURL().Query().Get("step") + actual = form.Get("step") if actual != "0.010" { t.Errorf("unexpected value %s for step", actual) } @@ -61,16 +64,17 @@ func TestQueryRange(t *testing.T) { } } -func mockServer(code int, body string) (*httptest.Server, func() *url.URL) { - var u *url.URL +func mockServer(code int, body string) (*httptest.Server, func() *http.Request) { + var req *http.Request server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - u = r.URL + r.ParseForm() + req = r w.WriteHeader(code) fmt.Fprintln(w, body) })) - f := func() *url.URL { - return u + f := func() *http.Request { + return req } return server, f } diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index eeb2358021..8b19d80394 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -18,13 +18,16 @@ import ( "fmt" "io/ioutil" "os" + "path/filepath" "reflect" "sort" "strconv" "strings" "time" - "gopkg.in/yaml.v2" + "github.com/go-kit/kit/log" + "github.com/pkg/errors" + yaml "gopkg.in/yaml.v2" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/promql" @@ -67,6 +70,9 @@ func ruleUnitTest(filename string) []error { if err := yaml.UnmarshalStrict(b, &unitTestInp); err != nil { return []error{err} } + if err := resolveAndGlobFilepaths(filepath.Dir(filename), &unitTestInp); err != nil { + return []error{err} + } if unitTestInp.EvaluationInterval == 0 { unitTestInp.EvaluationInterval = 1 * time.Minute @@ -84,7 +90,7 @@ func ruleUnitTest(filename string) []error { groupOrderMap := make(map[string]int) for i, gn := range unitTestInp.GroupEvalOrder { if _, ok := groupOrderMap[gn]; ok { - return []error{fmt.Errorf("Group name repeated in evaluation order: %s", gn)} + return []error{errors.Errorf("group name repeated in evaluation order: %s", gn)} } groupOrderMap[gn] = i } @@ -124,6 +130,27 @@ func (utf *unitTestFile) maxEvalTime() time.Duration { return maxd } +// resolveAndGlobFilepaths joins all relative paths in a configuration +// with a given base directory and replaces all globs with matching files. +func resolveAndGlobFilepaths(baseDir string, utf *unitTestFile) error { + for i, rf := range utf.RuleFiles { + if rf != "" && !filepath.IsAbs(rf) { + utf.RuleFiles[i] = filepath.Join(baseDir, rf) + } + } + + var globbedFiles []string + for _, rf := range utf.RuleFiles { + m, err := filepath.Glob(rf) + if err != nil { + return err + } + globbedFiles = append(globbedFiles, m...) + } + utf.RuleFiles = globbedFiles + return nil +} + // testGroup is a group of input series and tests associated with it. type testGroup struct { Interval time.Duration `yaml:"interval"` @@ -135,27 +162,23 @@ type testGroup struct { // test performs the unit tests. func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, groupOrderMap map[string]int, ruleFiles ...string) []error { // Setup testing suite. - suite, err := promql.NewTest(nil, tg.seriesLoadingString()) + suite, err := promql.NewLazyLoader(nil, tg.seriesLoadingString()) if err != nil { return []error{err} } defer suite.Close() - err = suite.Run() - if err != nil { - return []error{err} - } - // Load the rule files. opts := &rules.ManagerOptions{ QueryFunc: rules.EngineQueryFunc(suite.QueryEngine(), suite.Storage()), Appendable: suite.Storage(), Context: context.Background(), NotifyFunc: func(ctx context.Context, expr string, alerts ...*rules.Alert) {}, - Logger: &dummyLogger{}, + Logger: log.NewNopLogger(), } m := rules.NewManager(opts) - groupsMap, ers := m.LoadGroups(tg.Interval, ruleFiles...) + // TODO(beorn7): Provide a way to pass in external labels. + groupsMap, ers := m.LoadGroups(tg.Interval, nil, ruleFiles...) if ers != nil { return ers } @@ -165,14 +188,14 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou // All this preparation is so that we can test alerts as we evaluate the rules. // This avoids storing them in memory, as the number of evals might be high. - // All the `eval_time` for which we have unit tests. - var alertEvalTimes []time.Duration + // All the `eval_time` for which we have unit tests for alerts. + alertEvalTimesMap := map[time.Duration]struct{}{} // Map of all the eval_time+alertname combination present in the unit tests. alertsInTest := make(map[time.Duration]map[string]struct{}) // Map of all the unit tests for given eval_time. alertTests := make(map[time.Duration][]alertTestCase) for _, alert := range tg.AlertRuleTests { - alertEvalTimes = append(alertEvalTimes, alert.EvalTime) + alertEvalTimesMap[alert.EvalTime] = struct{}{} if _, ok := alertsInTest[alert.EvalTime]; !ok { alertsInTest[alert.EvalTime] = make(map[string]struct{}) @@ -181,6 +204,10 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou alertTests[alert.EvalTime] = append(alertTests[alert.EvalTime], alert) } + alertEvalTimes := make([]time.Duration, 0, len(alertEvalTimesMap)) + for k := range alertEvalTimesMap { + alertEvalTimes = append(alertEvalTimes, k) + } sort.Slice(alertEvalTimes, func(i, j int) bool { return alertEvalTimes[i] < alertEvalTimes[j] }) @@ -191,8 +218,23 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou var errs []error for ts := mint; ts.Before(maxt); ts = ts.Add(evalInterval) { // Collects the alerts asked for unit testing. - for _, g := range groups { - g.Eval(suite.Context(), ts) + suite.WithSamplesTill(ts, func(err error) { + if err != nil { + errs = append(errs, err) + return + } + for _, g := range groups { + g.Eval(suite.Context(), ts) + for _, r := range g.Rules() { + if r.LastError() != nil { + errs = append(errs, errors.Errorf(" rule: %s, time: %s, err: %v", + r.Name(), ts.Sub(time.Unix(0, 0)), r.LastError())) + } + } + } + }) + if len(errs) > 0 { + return errs } for { @@ -253,14 +295,14 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou } if gotAlerts.Len() != expAlerts.Len() { - errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", + errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String())) } else { sort.Sort(gotAlerts) sort.Sort(expAlerts) if !reflect.DeepEqual(expAlerts, gotAlerts) { - errs = append(errs, fmt.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", + errs = append(errs, errors.Errorf(" alertname:%s, time:%s, \n exp:%#v, \n got:%#v", testcase.Alertname, testcase.EvalTime.String(), expAlerts.String(), gotAlerts.String())) } } @@ -276,7 +318,7 @@ Outer: got, err := query(suite.Context(), testCase.Expr, mint.Add(testCase.EvalTime), suite.QueryEngine(), suite.Queryable()) if err != nil { - errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr, + errs = append(errs, errors.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr, testCase.EvalTime.String(), err.Error())) continue } @@ -293,7 +335,7 @@ Outer: for _, s := range testCase.ExpSamples { lb, err := promql.ParseMetric(s.Labels) if err != nil { - errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr, + errs = append(errs, errors.Errorf(" expr:'%s', time:%s, err:%s", testCase.Expr, testCase.EvalTime.String(), err.Error())) continue Outer } @@ -303,8 +345,14 @@ Outer: }) } + sort.Slice(expSamples, func(i, j int) bool { + return labels.Compare(expSamples[i].Labels, expSamples[j].Labels) <= 0 + }) + sort.Slice(gotSamples, func(i, j int) bool { + return labels.Compare(gotSamples[i].Labels, gotSamples[j].Labels) <= 0 + }) if !reflect.DeepEqual(expSamples, gotSamples) { - errs = append(errs, fmt.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr, + errs = append(errs, errors.Errorf(" expr:'%s', time:%s, \n exp:%#v, \n got:%#v", testCase.Expr, testCase.EvalTime.String(), parsedSamplesString(expSamples), parsedSamplesString(gotSamples))) } } @@ -383,7 +431,7 @@ func query(ctx context.Context, qs string, t time.Time, engine *promql.Engine, q Metric: labels.Labels{}, }}, nil default: - return nil, fmt.Errorf("rule result is not a vector or scalar") + return nil, errors.New("rule result is not a vector or scalar") } } @@ -468,9 +516,3 @@ func parsedSamplesString(pss []parsedSample) string { func (ps *parsedSample) String() string { return ps.Labels.String() + " " + strconv.FormatFloat(ps.Value, 'E', -1, 64) } - -type dummyLogger struct{} - -func (l *dummyLogger) Log(keyvals ...interface{}) error { - return nil -} diff --git a/config/config.go b/config/config.go index 0881d9b298..1ddcfdd8a0 100644 --- a/config/config.go +++ b/config/config.go @@ -22,15 +22,18 @@ import ( "strings" "time" + "github.com/pkg/errors" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" + yaml "gopkg.in/yaml.v2" + sd_config "github.com/prometheus/prometheus/discovery/config" - "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/relabel" ) var ( - patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) - relabelTarget = regexp.MustCompile(`^(?:(?:[a-zA-Z_]|\$(?:\{\w+\}|\w+))+\w*)+$`) + patRulePath = regexp.MustCompile(`^[^*]*(\*[^/]*)?$`) ) // Load parses the YAML input s into a Config. @@ -57,7 +60,7 @@ func LoadFile(filename string) (*Config, error) { } cfg, err := Load(string(content)) if err != nil { - return nil, fmt.Errorf("parsing YAML file %s: %v", filename, err) + return nil, errors.Wrapf(err, "parsing YAML file %s", filename) } resolveFilepaths(filepath.Dir(filename), cfg) return cfg, nil @@ -81,9 +84,10 @@ var ( DefaultScrapeConfig = ScrapeConfig{ // ScrapeTimeout and ScrapeInterval default to the // configured globals. - MetricsPath: "/metrics", - Scheme: "http", - HonorLabels: false, + MetricsPath: "/metrics", + Scheme: "http", + HonorLabels: false, + HonorTimestamps: true, } // DefaultAlertmanagerConfig is the default alertmanager configuration. @@ -92,14 +96,6 @@ var ( Timeout: model.Duration(10 * time.Second), } - // DefaultRelabelConfig is the default Relabel configuration. - DefaultRelabelConfig = RelabelConfig{ - Action: RelabelReplace, - Separator: ";", - Regex: MustNewRegexp("(.*)"), - Replacement: "$1", - } - // DefaultRemoteWriteConfig is the default remote write configuration. DefaultRemoteWriteConfig = RemoteWriteConfig{ RemoteTimeout: model.Duration(30 * time.Second), @@ -111,15 +107,16 @@ var ( // With a maximum of 1000 shards, assuming an average of 100ms remote write // time and 100 samples per batch, we will be able to push 1M samples/s. MaxShards: 1000, + MinShards: 1, MaxSamplesPerSend: 100, - // By default, buffer 100 batches, which at 100ms per batch is 10s. At - // 1000 shards, this will buffer 10M samples total. - Capacity: 100 * 100, + // Each shard will have a max of 10 samples pending in it's channel, plus the pending + // samples that have been enqueued. Theoretically we should only ever have about 110 samples + // per shard pending. At 1000 shards that's 110k. + Capacity: 10, BatchSendDeadline: model.Duration(5 * time.Second), - // Max number of times to retry a batch on recoverable errors. - MaxRetries: 3, + // Backoff times for retrying a batch of samples on recoverable errors. MinBackoff: model.Duration(30 * time.Millisecond), MaxBackoff: model.Duration(100 * time.Millisecond), } @@ -158,30 +155,34 @@ func resolveFilepaths(baseDir string, cfg *Config) { cfg.RuleFiles[i] = join(rf) } + tlsPaths := func(cfg *config_util.TLSConfig) { + cfg.CAFile = join(cfg.CAFile) + cfg.CertFile = join(cfg.CertFile) + cfg.KeyFile = join(cfg.KeyFile) + } clientPaths := func(scfg *config_util.HTTPClientConfig) { + if scfg.BasicAuth != nil { + scfg.BasicAuth.PasswordFile = join(scfg.BasicAuth.PasswordFile) + } scfg.BearerTokenFile = join(scfg.BearerTokenFile) - scfg.TLSConfig.CAFile = join(scfg.TLSConfig.CAFile) - scfg.TLSConfig.CertFile = join(scfg.TLSConfig.CertFile) - scfg.TLSConfig.KeyFile = join(scfg.TLSConfig.KeyFile) + tlsPaths(&scfg.TLSConfig) } sdPaths := func(cfg *sd_config.ServiceDiscoveryConfig) { for _, kcfg := range cfg.KubernetesSDConfigs { - kcfg.BearerTokenFile = join(kcfg.BearerTokenFile) - kcfg.TLSConfig.CAFile = join(kcfg.TLSConfig.CAFile) - kcfg.TLSConfig.CertFile = join(kcfg.TLSConfig.CertFile) - kcfg.TLSConfig.KeyFile = join(kcfg.TLSConfig.KeyFile) + clientPaths(&kcfg.HTTPClientConfig) } for _, mcfg := range cfg.MarathonSDConfigs { mcfg.AuthTokenFile = join(mcfg.AuthTokenFile) - mcfg.HTTPClientConfig.BearerTokenFile = join(mcfg.HTTPClientConfig.BearerTokenFile) - mcfg.HTTPClientConfig.TLSConfig.CAFile = join(mcfg.HTTPClientConfig.TLSConfig.CAFile) - mcfg.HTTPClientConfig.TLSConfig.CertFile = join(mcfg.HTTPClientConfig.TLSConfig.CertFile) - mcfg.HTTPClientConfig.TLSConfig.KeyFile = join(mcfg.HTTPClientConfig.TLSConfig.KeyFile) + clientPaths(&mcfg.HTTPClientConfig) } for _, consulcfg := range cfg.ConsulSDConfigs { - consulcfg.TLSConfig.CAFile = join(consulcfg.TLSConfig.CAFile) - consulcfg.TLSConfig.CertFile = join(consulcfg.TLSConfig.CertFile) - consulcfg.TLSConfig.KeyFile = join(consulcfg.TLSConfig.KeyFile) + tlsPaths(&consulcfg.TLSConfig) + } + for _, cfg := range cfg.OpenstackSDConfigs { + tlsPaths(&cfg.TLSConfig) + } + for _, cfg := range cfg.TritonSDConfigs { + tlsPaths(&cfg.TLSConfig) } for _, filecfg := range cfg.FileSDConfigs { for i, fn := range filecfg.Files { @@ -198,6 +199,12 @@ func resolveFilepaths(baseDir string, cfg *Config) { clientPaths(&cfg.HTTPClientConfig) sdPaths(&cfg.ServiceDiscoveryConfig) } + for _, cfg := range cfg.RemoteReadConfigs { + clientPaths(&cfg.HTTPClientConfig) + } + for _, cfg := range cfg.RemoteWriteConfigs { + clientPaths(&cfg.HTTPClientConfig) + } } func (c Config) String() string { @@ -227,19 +234,22 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { for _, rf := range c.RuleFiles { if !patRulePath.MatchString(rf) { - return fmt.Errorf("invalid rule file path %q", rf) + return errors.Errorf("invalid rule file path %q", rf) } } // Do global overrides and validate unique names. jobNames := map[string]struct{}{} for _, scfg := range c.ScrapeConfigs { + if scfg == nil { + return errors.New("empty or null scrape config section") + } // First set the correct scrape interval, then check that the timeout // (inferred or explicit) is not greater than that. if scfg.ScrapeInterval == 0 { scfg.ScrapeInterval = c.GlobalConfig.ScrapeInterval } if scfg.ScrapeTimeout > scfg.ScrapeInterval { - return fmt.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName) + return errors.Errorf("scrape timeout greater than scrape interval for scrape config with job name %q", scfg.JobName) } if scfg.ScrapeTimeout == 0 { if c.GlobalConfig.ScrapeTimeout > scfg.ScrapeInterval { @@ -250,10 +260,20 @@ func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { } if _, ok := jobNames[scfg.JobName]; ok { - return fmt.Errorf("found multiple scrape configs with job name %q", scfg.JobName) + return errors.Errorf("found multiple scrape configs with job name %q", scfg.JobName) } jobNames[scfg.JobName] = struct{}{} } + for _, rwcfg := range c.RemoteWriteConfigs { + if rwcfg == nil { + return errors.New("empty or null remote write config section") + } + } + for _, rrcfg := range c.RemoteReadConfigs { + if rrcfg == nil { + return errors.New("empty or null remote read config section") + } + } return nil } @@ -267,7 +287,7 @@ type GlobalConfig struct { // How frequently to evaluate rules by default. EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. - ExternalLabels model.LabelSet `yaml:"external_labels,omitempty"` + ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -280,13 +300,22 @@ func (c *GlobalConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } + for _, l := range gc.ExternalLabels { + if !model.LabelName(l.Name).IsValid() { + return errors.Errorf("%q is not a valid label name", l.Name) + } + if !model.LabelValue(l.Value).IsValid() { + return errors.Errorf("%q is not a valid label value", l.Value) + } + } + // First set the correct scrape interval, then check that the timeout // (inferred or explicit) is not greater than that. if gc.ScrapeInterval == 0 { gc.ScrapeInterval = DefaultGlobalConfig.ScrapeInterval } if gc.ScrapeTimeout > gc.ScrapeInterval { - return fmt.Errorf("global scrape timeout greater than scrape interval") + return errors.New("global scrape timeout greater than scrape interval") } if gc.ScrapeTimeout == 0 { if DefaultGlobalConfig.ScrapeTimeout > gc.ScrapeInterval { @@ -316,6 +345,8 @@ type ScrapeConfig struct { JobName string `yaml:"job_name"` // Indicator whether the scraped metrics should remain unmodified. HonorLabels bool `yaml:"honor_labels,omitempty"` + // Indicator whether the scraped timestamps should be respected. + HonorTimestamps bool `yaml:"honor_timestamps"` // A set of query parameters with which the target is scraped. Params url.Values `yaml:"params,omitempty"` // How frequently to scrape the targets of this scrape config. @@ -336,9 +367,9 @@ type ScrapeConfig struct { HTTPClientConfig config_util.HTTPClientConfig `yaml:",inline"` // List of target relabel configurations. - RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"` + RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` // List of metric relabel configurations. - MetricRelabelConfigs []*RelabelConfig `yaml:"metric_relabel_configs,omitempty"` + MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -350,7 +381,7 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } if len(c.JobName) == 0 { - return fmt.Errorf("job_name is empty") + return errors.New("job_name is empty") } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. @@ -360,6 +391,13 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { return err } + // The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer. + // We cannot make it a pointer as the parser panics for inlined pointer structs. + // Thus we just do its validation here. + if err := c.ServiceDiscoveryConfig.Validate(); err != nil { + return err + } + // Check for users putting URLs in target groups. if len(c.RelabelConfigs) == 0 { for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { @@ -371,6 +409,17 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { } } + for _, rlcfg := range c.RelabelConfigs { + if rlcfg == nil { + return errors.New("empty or null target relabeling rule in scrape config") + } + } + for _, rlcfg := range c.MetricRelabelConfigs { + if rlcfg == nil { + return errors.New("empty or null metric relabeling rule in scrape config") + } + } + // Add index to the static config target groups for unique identification // within scrape pool. for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { @@ -382,7 +431,7 @@ func (c *ScrapeConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { // AlertingConfig configures alerting and alertmanager related configs. type AlertingConfig struct { - AlertRelabelConfigs []*RelabelConfig `yaml:"alert_relabel_configs,omitempty"` + AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"` AlertmanagerConfigs []*AlertmanagerConfig `yaml:"alertmanagers,omitempty"` } @@ -392,7 +441,16 @@ func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error // by the default due to the YAML parser behavior for empty blocks. *c = AlertingConfig{} type plain AlertingConfig - return unmarshal((*plain)(c)) + if err := unmarshal((*plain)(c)); err != nil { + return err + } + + for _, rlcfg := range c.AlertRelabelConfigs { + if rlcfg == nil { + return errors.New("empty or null alert relabeling rule") + } + } + return nil } // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. @@ -411,7 +469,7 @@ type AlertmanagerConfig struct { Timeout model.Duration `yaml:"timeout,omitempty"` // List of Alertmanager relabel configurations. - RelabelConfigs []*RelabelConfig `yaml:"relabel_configs,omitempty"` + RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. @@ -429,6 +487,13 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er return err } + // The UnmarshalYAML method of ServiceDiscoveryConfig is not being called because it's not a pointer. + // We cannot make it a pointer as the parser panics for inlined pointer structs. + // Thus we just do its validation here. + if err := c.ServiceDiscoveryConfig.Validate(); err != nil { + return err + } + // Check for users putting URLs in target groups. if len(c.RelabelConfigs) == 0 { for _, tg := range c.ServiceDiscoveryConfig.StaticConfigs { @@ -440,6 +505,12 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er } } + for _, rlcfg := range c.RelabelConfigs { + if rlcfg == nil { + return errors.New("empty or null Alertmanager target relabeling rule") + } + } + // Add index to the static config target groups for unique identification // within scrape pool. for i, tg := range c.ServiceDiscoveryConfig.StaticConfigs { @@ -453,7 +524,7 @@ func (c *AlertmanagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) er func CheckTargetAddress(address model.LabelValue) error { // For now check for a URL, we may want to expand this later. if strings.Contains(string(address), "/") { - return fmt.Errorf("%q is not a valid hostname", address) + return errors.Errorf("%q is not a valid hostname", address) } return nil } @@ -470,151 +541,11 @@ type FileSDConfig struct { RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"` } -// RelabelAction is the action to be performed on relabeling. -type RelabelAction string - -const ( - // RelabelReplace performs a regex replacement. - RelabelReplace RelabelAction = "replace" - // RelabelKeep drops targets for which the input does not match the regex. - RelabelKeep RelabelAction = "keep" - // RelabelDrop drops targets for which the input does match the regex. - RelabelDrop RelabelAction = "drop" - // RelabelHashMod sets a label to the modulus of a hash of labels. - RelabelHashMod RelabelAction = "hashmod" - // RelabelLabelMap copies labels to other labelnames based on a regex. - RelabelLabelMap RelabelAction = "labelmap" - // RelabelLabelDrop drops any label matching the regex. - RelabelLabelDrop RelabelAction = "labeldrop" - // RelabelLabelKeep drops any label not matching the regex. - RelabelLabelKeep RelabelAction = "labelkeep" -) - -// UnmarshalYAML implements the yaml.Unmarshaler interface. -func (a *RelabelAction) UnmarshalYAML(unmarshal func(interface{}) error) error { - var s string - if err := unmarshal(&s); err != nil { - return err - } - switch act := RelabelAction(strings.ToLower(s)); act { - case RelabelReplace, RelabelKeep, RelabelDrop, RelabelHashMod, RelabelLabelMap, RelabelLabelDrop, RelabelLabelKeep: - *a = act - return nil - } - return fmt.Errorf("unknown relabel action %q", s) -} - -// RelabelConfig is the configuration for relabeling of target label sets. -type RelabelConfig struct { - // A list of labels from which values are taken and concatenated - // with the configured separator in order. - SourceLabels model.LabelNames `yaml:"source_labels,flow,omitempty"` - // Separator is the string between concatenated values from the source labels. - Separator string `yaml:"separator,omitempty"` - // Regex against which the concatenation is matched. - Regex Regexp `yaml:"regex,omitempty"` - // Modulus to take of the hash of concatenated values from the source labels. - Modulus uint64 `yaml:"modulus,omitempty"` - // TargetLabel is the label to which the resulting string is written in a replacement. - // Regexp interpolation is allowed for the replace action. - TargetLabel string `yaml:"target_label,omitempty"` - // Replacement is the regex replacement pattern to be used. - Replacement string `yaml:"replacement,omitempty"` - // Action is the action to be performed for the relabeling. - Action RelabelAction `yaml:"action,omitempty"` -} - -// UnmarshalYAML implements the yaml.Unmarshaler interface. -func (c *RelabelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { - *c = DefaultRelabelConfig - type plain RelabelConfig - if err := unmarshal((*plain)(c)); err != nil { - return err - } - if c.Regex.Regexp == nil { - c.Regex = MustNewRegexp("") - } - if c.Modulus == 0 && c.Action == RelabelHashMod { - return fmt.Errorf("relabel configuration for hashmod requires non-zero modulus") - } - if (c.Action == RelabelReplace || c.Action == RelabelHashMod) && c.TargetLabel == "" { - return fmt.Errorf("relabel configuration for %s action requires 'target_label' value", c.Action) - } - if c.Action == RelabelReplace && !relabelTarget.MatchString(c.TargetLabel) { - return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) - } - if c.Action == RelabelLabelMap && !relabelTarget.MatchString(c.Replacement) { - return fmt.Errorf("%q is invalid 'replacement' for %s action", c.Replacement, c.Action) - } - if c.Action == RelabelHashMod && !model.LabelName(c.TargetLabel).IsValid() { - return fmt.Errorf("%q is invalid 'target_label' for %s action", c.TargetLabel, c.Action) - } - - if c.Action == RelabelLabelDrop || c.Action == RelabelLabelKeep { - if c.SourceLabels != nil || - c.TargetLabel != DefaultRelabelConfig.TargetLabel || - c.Modulus != DefaultRelabelConfig.Modulus || - c.Separator != DefaultRelabelConfig.Separator || - c.Replacement != DefaultRelabelConfig.Replacement { - return fmt.Errorf("%s action requires only 'regex', and no other fields", c.Action) - } - } - - return nil -} - -// Regexp encapsulates a regexp.Regexp and makes it YAML marshallable. -type Regexp struct { - *regexp.Regexp - original string -} - -// NewRegexp creates a new anchored Regexp and returns an error if the -// passed-in regular expression does not compile. -func NewRegexp(s string) (Regexp, error) { - regex, err := regexp.Compile("^(?:" + s + ")$") - return Regexp{ - Regexp: regex, - original: s, - }, err -} - -// MustNewRegexp works like NewRegexp, but panics if the regular expression does not compile. -func MustNewRegexp(s string) Regexp { - re, err := NewRegexp(s) - if err != nil { - panic(err) - } - return re -} - -// UnmarshalYAML implements the yaml.Unmarshaler interface. -func (re *Regexp) UnmarshalYAML(unmarshal func(interface{}) error) error { - var s string - if err := unmarshal(&s); err != nil { - return err - } - r, err := NewRegexp(s) - if err != nil { - return err - } - *re = r - return nil -} - -// MarshalYAML implements the yaml.Marshaler interface. -func (re Regexp) MarshalYAML() (interface{}, error) { - if re.original != "" { - return re.original, nil - } - return nil, nil -} - // RemoteWriteConfig is the configuration for writing to remote storage. type RemoteWriteConfig struct { - URL *config_util.URL `yaml:"url"` - RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` - WriteRelabelConfigs []*RelabelConfig `yaml:"write_relabel_configs,omitempty"` + URL *config_util.URL `yaml:"url"` + RemoteTimeout model.Duration `yaml:"remote_timeout,omitempty"` + WriteRelabelConfigs []*relabel.Config `yaml:"write_relabel_configs,omitempty"` // We cannot do proper Go type embedding below as the parser will then parse // values arbitrarily into the overflow maps of further-down types. @@ -630,7 +561,12 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err return err } if c.URL == nil { - return fmt.Errorf("url for remote_write is empty") + return errors.New("url for remote_write is empty") + } + for _, rlcfg := range c.WriteRelabelConfigs { + if rlcfg == nil { + return errors.New("empty or null relabeling rule in remote write config") + } } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. @@ -648,15 +584,15 @@ type QueueConfig struct { // Max number of shards, i.e. amount of concurrency. MaxShards int `yaml:"max_shards,omitempty"` + // Min number of shards, i.e. amount of concurrency. + MinShards int `yaml:"min_shards,omitempty"` + // Maximum number of samples per send. MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"` // Maximum time sample will wait in buffer. BatchSendDeadline model.Duration `yaml:"batch_send_deadline,omitempty"` - // Max number of times to retry a batch on recoverable errors. - MaxRetries int `yaml:"max_retries,omitempty"` - // On recoverable errors, backoff exponentially. MinBackoff model.Duration `yaml:"min_backoff,omitempty"` MaxBackoff model.Duration `yaml:"max_backoff,omitempty"` @@ -684,7 +620,7 @@ func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro return err } if c.URL == nil { - return fmt.Errorf("url for remote_read is empty") + return errors.New("url for remote_read is empty") } // The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer. // We cannot make it a pointer as the parser panics for inlined pointer structs. diff --git a/config/config_test.go b/config/config_test.go index bd992bedbd..9d35e9e533 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -23,7 +23,13 @@ import ( "testing" "time" + config_util "github.com/prometheus/common/config" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/assert" + "gopkg.in/yaml.v2" + "github.com/prometheus/prometheus/discovery/azure" + sd_config "github.com/prometheus/prometheus/discovery/config" "github.com/prometheus/prometheus/discovery/consul" "github.com/prometheus/prometheus/discovery/dns" "github.com/prometheus/prometheus/discovery/ec2" @@ -34,12 +40,9 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/triton" "github.com/prometheus/prometheus/discovery/zookeeper" - - config_util "github.com/prometheus/common/config" - "github.com/prometheus/common/model" - sd_config "github.com/prometheus/prometheus/discovery/config" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/relabel" "github.com/prometheus/prometheus/util/testutil" - "gopkg.in/yaml.v2" ) func mustParseURL(u string) *config_util.URL { @@ -56,9 +59,9 @@ var expectedConf = &Config{ ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, EvaluationInterval: model.Duration(30 * time.Second), - ExternalLabels: model.LabelSet{ - "monitor": "codelab", - "foo": "bar", + ExternalLabels: labels.Labels{ + {Name: "foo", Value: "bar"}, + {Name: "monitor", Value: "codelab"}, }, }, @@ -71,13 +74,13 @@ var expectedConf = &Config{ { URL: mustParseURL("http://remote1/push"), RemoteTimeout: model.Duration(30 * time.Second), - WriteRelabelConfigs: []*RelabelConfig{ + WriteRelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"__name__"}, Separator: ";", - Regex: MustNewRegexp("expensive.*"), + Regex: relabel.MustNewRegexp("expensive.*"), Replacement: "$1", - Action: RelabelDrop, + Action: relabel.Drop, }, }, QueueConfig: DefaultQueueConfig, @@ -86,6 +89,12 @@ var expectedConf = &Config{ URL: mustParseURL("http://remote2/push"), RemoteTimeout: model.Duration(30 * time.Second), QueueConfig: DefaultQueueConfig, + HTTPClientConfig: config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), + }, + }, }, }, @@ -100,6 +109,12 @@ var expectedConf = &Config{ RemoteTimeout: model.Duration(1 * time.Minute), ReadRecent: false, RequiredMatchers: model.LabelSet{"job": "special"}, + HTTPClientConfig: config_util.HTTPClientConfig{ + TLSConfig: config_util.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), + }, + }, }, }, @@ -107,9 +122,10 @@ var expectedConf = &Config{ { JobName: "prometheus", - HonorLabels: true, - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorLabels: true, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -145,33 +161,33 @@ var expectedConf = &Config{ }, }, - RelabelConfigs: []*RelabelConfig{ + RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"job", "__meta_dns_name"}, TargetLabel: "job", Separator: ";", - Regex: MustNewRegexp("(.*)some-[regex]"), + Regex: relabel.MustNewRegexp("(.*)some-[regex]"), Replacement: "foo-${1}", - Action: RelabelReplace, + Action: relabel.Replace, }, { SourceLabels: model.LabelNames{"abc"}, TargetLabel: "cde", Separator: ";", - Regex: DefaultRelabelConfig.Regex, - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelReplace, + Regex: relabel.DefaultRelabelConfig.Regex, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.Replace, }, { TargetLabel: "abc", Separator: ";", - Regex: DefaultRelabelConfig.Regex, + Regex: relabel.DefaultRelabelConfig.Regex, Replacement: "static", - Action: RelabelReplace, + Action: relabel.Replace, }, { TargetLabel: "abc", Separator: ";", - Regex: MustNewRegexp(""), + Regex: relabel.MustNewRegexp(""), Replacement: "static", - Action: RelabelReplace, + Action: relabel.Replace, }, }, }, @@ -179,9 +195,10 @@ var expectedConf = &Config{ JobName: "service-x", - ScrapeInterval: model.Duration(50 * time.Second), - ScrapeTimeout: model.Duration(5 * time.Second), - SampleLimit: 1000, + HonorTimestamps: true, + ScrapeInterval: model.Duration(50 * time.Second), + ScrapeTimeout: model.Duration(5 * time.Second), + SampleLimit: 1000, HTTPClientConfig: config_util.HTTPClientConfig{ BasicAuth: &config_util.BasicAuth{ @@ -212,64 +229,65 @@ var expectedConf = &Config{ }, }, - RelabelConfigs: []*RelabelConfig{ + RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"job"}, - Regex: MustNewRegexp("(.*)some-[regex]"), + Regex: relabel.MustNewRegexp("(.*)some-[regex]"), Separator: ";", - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelDrop, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.Drop, }, { SourceLabels: model.LabelNames{"__address__"}, TargetLabel: "__tmp_hash", - Regex: DefaultRelabelConfig.Regex, - Replacement: DefaultRelabelConfig.Replacement, + Regex: relabel.DefaultRelabelConfig.Regex, + Replacement: relabel.DefaultRelabelConfig.Replacement, Modulus: 8, Separator: ";", - Action: RelabelHashMod, + Action: relabel.HashMod, }, { SourceLabels: model.LabelNames{"__tmp_hash"}, - Regex: MustNewRegexp("1"), + Regex: relabel.MustNewRegexp("1"), Separator: ";", - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelKeep, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.Keep, }, { - Regex: MustNewRegexp("1"), + Regex: relabel.MustNewRegexp("1"), Separator: ";", - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelLabelMap, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.LabelMap, }, { - Regex: MustNewRegexp("d"), + Regex: relabel.MustNewRegexp("d"), Separator: ";", - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelLabelDrop, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.LabelDrop, }, { - Regex: MustNewRegexp("k"), + Regex: relabel.MustNewRegexp("k"), Separator: ";", - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelLabelKeep, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.LabelKeep, }, }, - MetricRelabelConfigs: []*RelabelConfig{ + MetricRelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"__name__"}, - Regex: MustNewRegexp("expensive_metric.*"), + Regex: relabel.MustNewRegexp("expensive_metric.*"), Separator: ";", - Replacement: DefaultRelabelConfig.Replacement, - Action: RelabelDrop, + Replacement: relabel.DefaultRelabelConfig.Replacement, + Action: relabel.Drop, }, }, }, { JobName: "service-y", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -280,7 +298,7 @@ var expectedConf = &Config{ Server: "localhost:1234", Token: "mysecret", Services: []string{"nginx", "cache", "mysql"}, - ServiceTag: "canary", + ServiceTags: []string{"canary", "v1"}, NodeMeta: map[string]string{"rack": "123"}, TagSeparator: consul.DefaultSDConfig.TagSeparator, Scheme: "https", @@ -296,22 +314,23 @@ var expectedConf = &Config{ }, }, - RelabelConfigs: []*RelabelConfig{ + RelabelConfigs: []*relabel.Config{ { SourceLabels: model.LabelNames{"__meta_sd_consul_tags"}, - Regex: MustNewRegexp("label:([^=]+)=([^,]+)"), + Regex: relabel.MustNewRegexp("label:([^=]+)=([^,]+)"), Separator: ",", TargetLabel: "${1}", Replacement: "${2}", - Action: RelabelReplace, + Action: relabel.Replace, }, }, }, { JobName: "service-z", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: model.Duration(10 * time.Second), + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: model.Duration(10 * time.Second), MetricsPath: "/metrics", Scheme: "http", @@ -328,8 +347,9 @@ var expectedConf = &Config{ { JobName: "service-kubernetes", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -339,9 +359,15 @@ var expectedConf = &Config{ { APIServer: kubernetesSDHostURL(), Role: kubernetes.RoleEndpoint, - BasicAuth: &config_util.BasicAuth{ - Username: "myusername", - Password: "mysecret", + HTTPClientConfig: config_util.HTTPClientConfig{ + BasicAuth: &config_util.BasicAuth{ + Username: "myusername", + Password: "mysecret", + }, + TLSConfig: config_util.TLSConfig{ + CertFile: filepath.FromSlash("testdata/valid_cert_file"), + KeyFile: filepath.FromSlash("testdata/valid_key_file"), + }, }, NamespaceDiscovery: kubernetes.NamespaceDiscovery{}, }, @@ -351,11 +377,18 @@ var expectedConf = &Config{ { JobName: "service-kubernetes-namespaces", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, + HTTPClientConfig: config_util.HTTPClientConfig{ + BasicAuth: &config_util.BasicAuth{ + Username: "myusername", + PasswordFile: filepath.FromSlash("testdata/valid_password_file"), + }, + }, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ KubernetesSDConfigs: []*kubernetes.SDConfig{ @@ -374,8 +407,9 @@ var expectedConf = &Config{ { JobName: "service-marathon", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -401,8 +435,9 @@ var expectedConf = &Config{ { JobName: "service-ec2", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -433,8 +468,9 @@ var expectedConf = &Config{ { JobName: "service-azure", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -442,13 +478,14 @@ var expectedConf = &Config{ ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ AzureSDConfigs: []*azure.SDConfig{ { - Environment: "AzurePublicCloud", - SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11", - TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", - ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", - ClientSecret: "mysecret", - RefreshInterval: model.Duration(5 * time.Minute), - Port: 9100, + Environment: "AzurePublicCloud", + SubscriptionID: "11AAAA11-A11A-111A-A111-1111A1111A11", + TenantID: "BBBB222B-B2B2-2B22-B222-2BB2222BB2B2", + ClientID: "333333CC-3C33-3333-CCC3-33C3CCCCC33C", + ClientSecret: "mysecret", + AuthenticationMethod: "OAuth", + RefreshInterval: model.Duration(5 * time.Minute), + Port: 9100, }, }, }, @@ -456,8 +493,9 @@ var expectedConf = &Config{ { JobName: "service-nerve", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -475,8 +513,9 @@ var expectedConf = &Config{ { JobName: "0123service-xxx", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -492,11 +531,33 @@ var expectedConf = &Config{ }, }, }, + { + JobName: "badfederation", + + HonorTimestamps: false, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + + MetricsPath: "/federate", + Scheme: DefaultScrapeConfig.Scheme, + + ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ + StaticConfigs: []*targetgroup.Group{ + { + Targets: []model.LabelSet{ + {model.AddressLabel: "localhost:9090"}, + }, + Source: "0", + }, + }, + }, + }, { JobName: "測試", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -515,8 +576,9 @@ var expectedConf = &Config{ { JobName: "service-triton", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -542,8 +604,9 @@ var expectedConf = &Config{ { JobName: "service-openstack", - ScrapeInterval: model.Duration(15 * time.Second), - ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, + HonorTimestamps: true, + ScrapeInterval: model.Duration(15 * time.Second), + ScrapeTimeout: DefaultGlobalConfig.ScrapeTimeout, MetricsPath: DefaultScrapeConfig.MetricsPath, Scheme: DefaultScrapeConfig.Scheme, @@ -556,9 +619,9 @@ var expectedConf = &Config{ Port: 80, RefreshInterval: model.Duration(60 * time.Second), TLSConfig: config_util.TLSConfig{ - CAFile: "valid_ca_file", - CertFile: "valid_cert_file", - KeyFile: "valid_key_file", + CAFile: "testdata/valid_ca_file", + CertFile: "testdata/valid_cert_file", + KeyFile: "testdata/valid_key_file", }, }, }, @@ -598,10 +661,10 @@ func TestLoadConfig(t *testing.T) { testutil.Ok(t, err) expectedConf.original = c.original - testutil.Equals(t, expectedConf, c) + assert.Equal(t, expectedConf, c) } -// YAML marshalling must not reveal authentication credentials. +// YAML marshaling must not reveal authentication credentials. func TestElideSecrets(t *testing.T) { c, err := LoadFile("testdata/conf.good.yml") testutil.Ok(t, err) @@ -627,6 +690,11 @@ func TestLoadConfigRuleFilesAbsolutePath(t *testing.T) { testutil.Equals(t, ruleFilesExpectedConf, c) } +func TestKubernetesEmptyAPIServer(t *testing.T) { + _, err := LoadFile("testdata/kubernetes_empty_apiserver.good.yml") + testutil.Ok(t, err) +} + var expectedErrors = []struct { filename string errMsg string @@ -646,6 +714,9 @@ var expectedErrors = []struct { }, { filename: "labelname2.bad.yml", errMsg: `"not:allowed" is not a valid label name`, + }, { + filename: "labelvalue.bad.yml", + errMsg: `"\xff" is not a valid label value`, }, { filename: "regex.bad.yml", errMsg: "error parsing regexp", @@ -697,6 +768,9 @@ var expectedErrors = []struct { }, { filename: "bearertoken_basicauth.bad.yml", errMsg: "at most one of basic_auth, bearer_token & bearer_token_file must be configured", + }, { + filename: "kubernetes_http_config_without_api_server.bad.yml", + errMsg: "to use custom HTTP client configuration please provide the 'api_server' URL explicitly", }, { filename: "kubernetes_bearertoken.bad.yml", errMsg: "at most one of bearer_token & bearer_token_file must be configured", @@ -751,6 +825,62 @@ var expectedErrors = []struct { filename: "section_key_dup.bad.yml", errMsg: "field scrape_configs already set in type config.plain", }, + { + filename: "azure_client_id_missing.bad.yml", + errMsg: "azure SD configuration requires a client_id", + }, + { + filename: "azure_client_secret_missing.bad.yml", + errMsg: "azure SD configuration requires a client_secret", + }, + { + filename: "azure_subscription_id_missing.bad.yml", + errMsg: "azure SD configuration requires a subscription_id", + }, + { + filename: "azure_tenant_id_missing.bad.yml", + errMsg: "azure SD configuration requires a tenant_id", + }, + { + filename: "azure_authentication_method.bad.yml", + errMsg: "unknown authentication_type \"invalid\". Supported types are \"OAuth\" or \"ManagedIdentity\"", + }, + { + filename: "empty_scrape_config.bad.yml", + errMsg: "empty or null scrape config section", + }, + { + filename: "empty_rw_config.bad.yml", + errMsg: "empty or null remote write config section", + }, + { + filename: "empty_rr_config.bad.yml", + errMsg: "empty or null remote read config section", + }, + { + filename: "empty_target_relabel_config.bad.yml", + errMsg: "empty or null target relabeling rule", + }, + { + filename: "empty_metric_relabel_config.bad.yml", + errMsg: "empty or null metric relabeling rule", + }, + { + filename: "empty_alert_relabel_config.bad.yml", + errMsg: "empty or null alert relabeling rule", + }, + { + filename: "empty_alertmanager_relabel_config.bad.yml", + errMsg: "empty or null Alertmanager target relabeling rule", + }, + { + filename: "empty_rw_relabel_config.bad.yml", + errMsg: "empty or null relabeling rule in remote write config", + }, + { + filename: "empty_static_config.bad.yml", + errMsg: "empty or null section in static_configs", + }, } func TestBadConfigs(t *testing.T) { @@ -793,33 +923,6 @@ func TestEmptyGlobalBlock(t *testing.T) { testutil.Equals(t, exp, *c) } -func TestTargetLabelValidity(t *testing.T) { - tests := []struct { - str string - valid bool - }{ - {"-label", false}, - {"label", true}, - {"label${1}", true}, - {"${1}label", true}, - {"${1}", true}, - {"${1}label", true}, - {"${", false}, - {"$", false}, - {"${}", false}, - {"foo${", false}, - {"$1", true}, - {"asd$2asd", true}, - {"-foo${1}bar-", false}, - {"_${1}_", true}, - {"foo${bar}foo", true}, - } - for _, test := range tests { - testutil.Assert(t, relabelTarget.Match([]byte(test.str)) == test.valid, - "Expected %q to be %v", test.str, test.valid) - } -} - func kubernetesSDHostURL() config_util.URL { tURL, _ := url.Parse("https://localhost:1234") return config_util.URL{URL: tURL} diff --git a/config/testdata/azure_authentication_method.bad.yml b/config/testdata/azure_authentication_method.bad.yml new file mode 100644 index 0000000000..b05fc474a6 --- /dev/null +++ b/config/testdata/azure_authentication_method.bad.yml @@ -0,0 +1,4 @@ +scrape_configs: +- azure_sd_configs: + - authentication_method: invalid + subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 diff --git a/config/testdata/azure_client_id_missing.bad.yml b/config/testdata/azure_client_id_missing.bad.yml new file mode 100644 index 0000000000..f8da2ff9c9 --- /dev/null +++ b/config/testdata/azure_client_id_missing.bad.yml @@ -0,0 +1,7 @@ +scrape_configs: + - job_name: azure + azure_sd_configs: + - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 + tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 + client_id: + client_secret: mysecret \ No newline at end of file diff --git a/config/testdata/azure_client_secret_missing.bad.yml b/config/testdata/azure_client_secret_missing.bad.yml new file mode 100644 index 0000000000..1295c8ad57 --- /dev/null +++ b/config/testdata/azure_client_secret_missing.bad.yml @@ -0,0 +1,7 @@ +scrape_configs: + - job_name: azure + azure_sd_configs: + - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 + tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 + client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C + client_secret: \ No newline at end of file diff --git a/config/testdata/azure_subscription_id_missing.bad.yml b/config/testdata/azure_subscription_id_missing.bad.yml new file mode 100644 index 0000000000..9976138823 --- /dev/null +++ b/config/testdata/azure_subscription_id_missing.bad.yml @@ -0,0 +1,7 @@ +scrape_configs: + - job_name: azure + azure_sd_configs: + - subscription_id: + tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 + client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C + client_secret: mysecret \ No newline at end of file diff --git a/config/testdata/azure_tenant_id_missing.bad.yml b/config/testdata/azure_tenant_id_missing.bad.yml new file mode 100644 index 0000000000..ac714d9b52 --- /dev/null +++ b/config/testdata/azure_tenant_id_missing.bad.yml @@ -0,0 +1,7 @@ +scrape_configs: + - job_name: azure + azure_sd_configs: + - subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 + tenant_id: + client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C + client_secret: mysecret \ No newline at end of file diff --git a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml index 5abadc212f..6993217805 100644 --- a/config/testdata/conf.good.yml +++ b/config/testdata/conf.good.yml @@ -19,6 +19,9 @@ remote_write: regex: expensive.* action: drop - url: http://remote2/push + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file remote_read: - url: http://remote1/read @@ -27,6 +30,9 @@ remote_read: read_recent: false required_matchers: job: special + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file scrape_configs: - job_name: prometheus @@ -122,7 +128,7 @@ scrape_configs: - server: 'localhost:1234' token: mysecret services: ['nginx', 'cache', 'mysql'] - tag: "canary" + tags: ["canary", "v1"] node_meta: rack: "123" allow_stale: true @@ -153,6 +159,9 @@ scrape_configs: kubernetes_sd_configs: - role: endpoints api_server: 'https://localhost:1234' + tls_config: + cert_file: valid_cert_file + key_file: valid_key_file basic_auth: username: 'myusername' @@ -167,6 +176,11 @@ scrape_configs: names: - default + basic_auth: + username: 'myusername' + password_file: valid_password_file + + - job_name: service-marathon marathon_sd_configs: - servers: @@ -196,6 +210,7 @@ scrape_configs: - job_name: service-azure azure_sd_configs: - environment: AzurePublicCloud + authentication_method: OAuth subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11 tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2 client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C @@ -215,6 +230,13 @@ scrape_configs: - targets: - localhost:9090 +- job_name: badfederation + honor_timestamps: false + metrics_path: /federate + static_configs: + - targets: + - localhost:9090 + - job_name: 測試 metrics_path: /metrics static_configs: @@ -230,8 +252,8 @@ scrape_configs: refresh_interval: 1m version: 1 tls_config: - cert_file: testdata/valid_cert_file - key_file: testdata/valid_key_file + cert_file: valid_cert_file + key_file: valid_key_file - job_name: service-openstack openstack_sd_configs: diff --git a/config/testdata/empty_alert_relabel_config.bad.yml b/config/testdata/empty_alert_relabel_config.bad.yml new file mode 100644 index 0000000000..b863bf23a0 --- /dev/null +++ b/config/testdata/empty_alert_relabel_config.bad.yml @@ -0,0 +1,3 @@ +alerting: + alert_relabel_configs: + - diff --git a/config/testdata/empty_alertmanager_relabel_config.bad.yml b/config/testdata/empty_alertmanager_relabel_config.bad.yml new file mode 100644 index 0000000000..6d99ac4dc6 --- /dev/null +++ b/config/testdata/empty_alertmanager_relabel_config.bad.yml @@ -0,0 +1,4 @@ +alerting: + alertmanagers: + - relabel_configs: + - diff --git a/config/testdata/empty_metric_relabel_config.bad.yml b/config/testdata/empty_metric_relabel_config.bad.yml new file mode 100644 index 0000000000..d2485e3527 --- /dev/null +++ b/config/testdata/empty_metric_relabel_config.bad.yml @@ -0,0 +1,4 @@ +scrape_configs: +- job_name: "test" + metric_relabel_configs: + - diff --git a/config/testdata/empty_rr_config.bad.yml b/config/testdata/empty_rr_config.bad.yml new file mode 100644 index 0000000000..e3bcca598c --- /dev/null +++ b/config/testdata/empty_rr_config.bad.yml @@ -0,0 +1,2 @@ +remote_read: +- diff --git a/config/testdata/empty_rw_config.bad.yml b/config/testdata/empty_rw_config.bad.yml new file mode 100644 index 0000000000..6f16030e65 --- /dev/null +++ b/config/testdata/empty_rw_config.bad.yml @@ -0,0 +1,2 @@ +remote_write: +- diff --git a/config/testdata/empty_rw_relabel_config.bad.yml b/config/testdata/empty_rw_relabel_config.bad.yml new file mode 100644 index 0000000000..6d5418290c --- /dev/null +++ b/config/testdata/empty_rw_relabel_config.bad.yml @@ -0,0 +1,4 @@ +remote_write: + - url: "foo" + write_relabel_configs: + - \ No newline at end of file diff --git a/config/testdata/empty_scrape_config.bad.yml b/config/testdata/empty_scrape_config.bad.yml new file mode 100644 index 0000000000..8c300deaab --- /dev/null +++ b/config/testdata/empty_scrape_config.bad.yml @@ -0,0 +1,2 @@ +scrape_configs: +- \ No newline at end of file diff --git a/config/testdata/empty_static_config.bad.yml b/config/testdata/empty_static_config.bad.yml new file mode 100644 index 0000000000..464a0a6fbe --- /dev/null +++ b/config/testdata/empty_static_config.bad.yml @@ -0,0 +1,4 @@ +scrape_configs: +- job_name: "test" + static_configs: + - diff --git a/config/testdata/empty_target_relabel_config.bad.yml b/config/testdata/empty_target_relabel_config.bad.yml new file mode 100644 index 0000000000..7324b10411 --- /dev/null +++ b/config/testdata/empty_target_relabel_config.bad.yml @@ -0,0 +1,4 @@ +scrape_configs: +- job_name: "test" + relabel_configs: + - diff --git a/config/testdata/kubernetes_empty_apiserver.good.yml b/config/testdata/kubernetes_empty_apiserver.good.yml new file mode 100644 index 0000000000..12b428eb84 --- /dev/null +++ b/config/testdata/kubernetes_empty_apiserver.good.yml @@ -0,0 +1,4 @@ +scrape_configs: +- job_name: prometheus + kubernetes_sd_configs: + - role: endpoints diff --git a/config/testdata/kubernetes_http_config_without_api_server.bad.yml b/config/testdata/kubernetes_http_config_without_api_server.bad.yml new file mode 100644 index 0000000000..db442c3bd1 --- /dev/null +++ b/config/testdata/kubernetes_http_config_without_api_server.bad.yml @@ -0,0 +1,5 @@ +scrape_configs: + - job_name: prometheus + kubernetes_sd_configs: + - role: pod + bearer_token: 1234 diff --git a/config/testdata/labelvalue.bad.yml b/config/testdata/labelvalue.bad.yml new file mode 100644 index 0000000000..7873eb1743 --- /dev/null +++ b/config/testdata/labelvalue.bad.yml @@ -0,0 +1,3 @@ +global: + external_labels: + name: !!binary "/w==" \ No newline at end of file diff --git a/console_libraries/menu.lib b/console_libraries/menu.lib index 929dc362a3..199ebf9f48 100644 --- a/console_libraries/menu.lib +++ b/console_libraries/menu.lib @@ -2,33 +2,37 @@ {{/* Navbar, should be passed . */}} {{ define "navbar" }} -