diff --git a/.promu.yml b/.promu.yml index 9fc1271d09..11a8c19903 100644 --- a/.promu.yml +++ b/.promu.yml @@ -42,13 +42,12 @@ crossbuild: - linux/arm - linux/arm64 - freebsd/arm + - openbsd/arm # Temporarily deactivated as golang.org/x/sys does not have syscalls # implemented for that os/platform combination. - #- openbsd/arm #- linux/mips64 #- linux/mips64le - netbsd/arm - linux/ppc64 - linux/ppc64le - linux/s390x - diff --git a/CHANGELOG.md b/CHANGELOG.md index 88b8144c92..bb932a3495 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,33 @@ +## 2.11.0-rc.0 / 2019-07-04 + +* [CHANGE] Remove `max_retries` from queue_config (it has been unused since rewriting remote-write to utilize the write-ahead-log). #5649 +* [CHANGE] The meta file `BlockStats` no longer holds size information. This is now dynamically calculated and kept in memory. It also includes the meta file size which was not included before. tsdb#637 +* [CHANGE] Renamed metric from `prometheus_tsdb_wal_reader_corruption_errors` to `prometheus_tsdb_wal_reader_corruption_errors_total`. tsdb#622 +* [FEATURE] Add option to use Alertmanager API v2. #5482 +* [FEATURE] Added `humanizePercentage` function for templates. #5670 +* [FEATURE] Include InitContainers in Kubernetes Service Discovery. #5598 +* [FEATURE] Provide option to compress WAL records using Snappy. [#609](https://github.com/prometheus/tsdb/pull/609) +* [ENHANCEMENT] Create new clean segment when starting the WAL. tsdb#608 +* [ENHANCEMENT] Reduce allocations in PromQL aggregations. #5641 +* [ENHANCEMENT] Add storage warnings to LabelValues and LabelNames API results. #5673 +* [ENHANCEMENT] Add prometheus_http_requests_total metric. #5640 +* [ENHANCEMENT] Enable openbsd/arm build. #5696 +* [ENHANCEMENT] Remote-write allocation improvements. #5614 +* [ENHANCEMENT] Query performance improvement: Efficient iteration and search in HashForLabels and HashWithoutLabels. #5707 +* [ENHANCEMENT] Allow injection of arbitrary headers in promtool. #4389 +* [ENHANCEMENT] Allow passing `external_labels` in alert unit tests groups. #5608 +* [ENHANCEMENT] Allows globs for rules when unit testing. #5595 +* [ENHANCEMENT] Improved postings intersection matching. tsdb#616 +* [ENHANCEMENT] Reduced disk usage for WAL for small setups. tsdb#605 +* [ENHANCEMENT] Optimize queries using regexp for set lookups. tsdb#602 +* [BUGFIX] resolve race condition in maxGauge. #5647 +* [BUGFIX] Fix ZooKeeper connection leak. #5675 +* [BUGFIX] Improved atomicity of .tmp block replacement during compaction for usual case. tsdb#636 +* [BUGFIX] Fix "unknown series references" after clean shutdown. tsdb#623 +* [BUGFIX] Re-calculate block size when calling `block.Delete`. tsdb#637 +* [BUGFIX] Fix unsafe snapshots with head block. tsdb#641 +* [BUGFIX] `prometheus_tsdb_compactions_failed_total` is now incremented on any compaction failure. tsdb#613 + ## 2.10.0 / 2019-05-25 * [CHANGE/BUGFIX] API: Encode alert values as string to correctly represent Inf/NaN. #5582 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e2277a7888..a965a07f14 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,7 +40,7 @@ go build ./cmd/prometheus/ make test # Make sure all the tests pass before you commit and push :) ``` -We use `golangci-lint`[https://github.com/golangci/golangci-lint] for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. +We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). diff --git a/Makefile.common b/Makefile.common index d7aea1b86f..48d2ff84e9 100644 --- a/Makefile.common +++ b/Makefile.common @@ -69,7 +69,7 @@ else GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH) endif -PROMU_VERSION ?= 0.4.0 +PROMU_VERSION ?= 0.5.0 PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz GOLANGCI_LINT := @@ -86,7 +86,8 @@ endif PREFIX ?= $(shell pwd) BIN_DIR ?= $(shell pwd) DOCKER_IMAGE_TAG ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD)) -DOCKERFILE_PATH ?= ./ +DOCKERFILE_PATH ?= ./Dockerfile +DOCKERBUILD_CONTEXT ?= ./ DOCKER_REPO ?= prom DOCKER_ARCHS ?= amd64 @@ -211,9 +212,10 @@ common-tarball: promu common-docker: $(BUILD_DOCKER_ARCHS) $(BUILD_DOCKER_ARCHS): common-docker-%: docker build -t "$(DOCKER_REPO)/$(DOCKER_IMAGE_NAME)-linux-$*:$(DOCKER_IMAGE_TAG)" \ + -f $(DOCKERFILE_PATH) \ --build-arg ARCH="$*" \ --build-arg OS="linux" \ - $(DOCKERFILE_PATH) + $(DOCKERBUILD_CONTEXT) .PHONY: common-docker-publish $(PUBLISH_DOCKER_ARCHS) common-docker-publish: $(PUBLISH_DOCKER_ARCHS) diff --git a/RELEASE.md b/RELEASE.md index 2eaf656c84..3c1917ba8f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -80,22 +80,17 @@ $ git push --tags Signing a tag with a GPG key is appreciated, but in case you can't add a GPG key to your Github account using the following [procedure](https://help.github.com/articles/generating-a-gpg-key/), you can replace the `-s` flag by `-a` flag of the `git tag` command to only annotate the tag without signing. -Once a tag is created, the release process through CircleCI will be triggered for this tag. -You must create a Github Release using the UI for this tag, as otherwise CircleCI will not be able to upload tarballs for this tag. __Also, you must create the Github Release using a Github user that has granted access rights to CircleCI.__ If you did not or cannot grant those rights to your personal account, you can log in as `prombot` in an anonymous browser tab. (This will, however, prevent verified releases signed with your GPG key. For verified releases, the signing identity must be the same as the one creating the release.) +Once a tag is created, the release process through CircleCI will be triggered for this tag and Circle CI will draft the GitHub release using the `prombot` account. -Go to the releases page of the project, click on the _Draft a new release_ button and select the tag you just pushed. The title of the release is formatted `x.y.z / YYYY-MM-DD`. Add the relevant part of `CHANGELOG.md` as description. Click _Save draft_ rather than _Publish release_ at this time. (This will prevent the release being visible before it has got the binaries attached to it.) - -You can also create the tag and the Github release in one go through the Github UI by going to the releases page and then click on the `Draft a new release` button and enter your tag version. - -Now all you can do is to wait for tarballs to be uploaded to the Github release and Docker images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification. +Now all you can do is to wait for tarballs to be uploaded to the Github release and the container images to be pushed to the Docker Hub and Quay.io. Once that has happened, click _Publish release_, which will make the release publicly visible and create a GitHub notification. ### Wrapping up If the release has happened in the latest release branch, merge the changes into master. -To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration. +To update the docs, a PR needs to be created to `prometheus/docs`. See [this PR](https://github.com/prometheus/docs/pull/952/files) for inspiration (note: only actually merge this for final releases, not for pre-releases like a release candidate). -Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration. +Once the binaries have been uploaded, announce the release on `prometheus-announce@googlegroups.com`. (Please do not use `prometheus-users@googlegroups.com` for announcements anymore.) Check out previous announcement mails for inspiration. ### Pre-releases @@ -104,4 +99,5 @@ The following changes to the above procedures apply: * In line with [Semantic Versioning](https://semver.org/), append something like `-rc.0` to the version (with the corresponding changes to the tag name, the release name etc.). * Tick the _This is a pre-release_ box when drafting the release in the Github UI. * Still update `CHANGELOG.md`, but when you cut the final release later, merge all the changes from the pre-releases into the one final update. +* Run the benchmark for 3 days using the `/benchmark x.y.z` command, `x.y.z` being the latest stable patch release of the previous minor release series. diff --git a/VERSION b/VERSION index 10c2c0c3d6..ed612dd65c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.10.0 +2.11.0-rc.0 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 5529f0dcb9..7b68003da7 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -123,6 +123,10 @@ func main() { notifier: notifier.Options{ Registerer: prometheus.DefaultRegisterer, }, + web: web.Options{ + Registerer: prometheus.DefaultRegisterer, + Gatherer: prometheus.DefaultGatherer, + }, promlogConfig: promlog.Config{}, } @@ -203,6 +207,9 @@ func main() { a.Flag("storage.tsdb.allow-overlapping-blocks", "[EXPERIMENTAL] Allow overlapping blocks, which in turn enables vertical compaction and vertical query merge."). Default("false").BoolVar(&cfg.tsdb.AllowOverlappingBlocks) + a.Flag("storage.tsdb.wal-compression", "Compress the tsdb WAL."). + Default("false").BoolVar(&cfg.tsdb.WALCompression) + a.Flag("storage.remote.flush-deadline", "How long to wait flushing sample on shutdown or config reload."). Default("1m").PlaceHolder("").SetValue(&cfg.RemoteFlushDeadline) @@ -667,6 +674,7 @@ func main() { "RetentionDuration", cfg.tsdb.RetentionDuration, "WALSegmentSize", cfg.tsdb.WALSegmentSize, "AllowOverlappingBlocks", cfg.tsdb.AllowOverlappingBlocks, + "WALCompression", cfg.tsdb.WALCompression, ) startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 51d5a6ca62..ffae6991d3 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -19,6 +19,7 @@ import ( "encoding/json" "fmt" "math" + "net/http" "net/url" "os" "path/filepath" @@ -30,6 +31,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus/promhttp" config_util "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/common/version" @@ -70,6 +72,7 @@ func main() { queryRangeCmd := queryCmd.Command("range", "Run range query.") queryRangeServer := queryRangeCmd.Arg("server", "Prometheus server to query.").Required().String() queryRangeExpr := queryRangeCmd.Arg("expr", "PromQL query expression.").Required().String() + queryRangeHeaders := queryRangeCmd.Flag("header", "Extra headers to send to server.").StringMap() queryRangeBegin := queryRangeCmd.Flag("start", "Query range start time (RFC3339 or Unix timestamp).").String() queryRangeEnd := queryRangeCmd.Flag("end", "Query range end time (RFC3339 or Unix timestamp).").String() queryRangeStep := queryRangeCmd.Flag("step", "Query step size (duration).").Duration() @@ -123,7 +126,7 @@ func main() { os.Exit(QueryInstant(*queryServer, *queryExpr, p)) case queryRangeCmd.FullCommand(): - os.Exit(QueryRange(*queryRangeServer, *queryRangeExpr, *queryRangeBegin, *queryRangeEnd, *queryRangeStep, p)) + os.Exit(QueryRange(*queryRangeServer, *queryRangeHeaders, *queryRangeExpr, *queryRangeBegin, *queryRangeEnd, *queryRangeStep, p)) case querySeriesCmd.FullCommand(): os.Exit(QuerySeries(*querySeriesServer, *querySeriesMatch, *querySeriesBegin, *querySeriesEnd, p)) @@ -143,7 +146,6 @@ func main() { case testRulesCmd.FullCommand(): os.Exit(RulesUnitTest(*testRulesFiles...)) } - } // CheckConfig validates configuration files. @@ -348,7 +350,7 @@ func QueryInstant(url, query string, p printer) int { api := v1.NewAPI(c) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, err := api.Query(ctx, query, time.Now()) + val, _, err := api.Query(ctx, query, time.Now()) // Ignoring warnings for now. cancel() if err != nil { fmt.Fprintln(os.Stderr, "query error:", err) @@ -361,11 +363,20 @@ func QueryInstant(url, query string, p printer) int { } // QueryRange performs a range query against a Prometheus server. -func QueryRange(url, query, start, end string, step time.Duration, p printer) int { +func QueryRange(url string, headers map[string]string, query, start, end string, step time.Duration, p printer) int { config := api.Config{ Address: url, } + if len(headers) > 0 { + config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { + for key, value := range headers { + req.Header.Add(key, value) + } + return http.DefaultTransport.RoundTrip(req) + }) + } + // Create new client. c, err := api.NewClient(config) if err != nil { @@ -408,7 +419,7 @@ func QueryRange(url, query, start, end string, step time.Duration, p printer) in api := v1.NewAPI(c) r := v1.Range{Start: stime, End: etime, Step: step} ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, err := api.QueryRange(ctx, query, r) + val, _, err := api.QueryRange(ctx, query, r) // Ignoring warnings for now. cancel() if err != nil { @@ -462,7 +473,7 @@ func QuerySeries(url *url.URL, matchers []string, start, end string, p printer) // Run query against client. api := v1.NewAPI(c) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) - val, err := api.Series(ctx, matchers, stime, etime) + val, _, err := api.Series(ctx, matchers, stime, etime) // Ignoring warnings for now. cancel() if err != nil { diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 84ff006db4..80b5c14267 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -26,7 +26,7 @@ func TestQueryRange(t *testing.T) { defer s.Close() p := &promqlPrinter{} - exitCode := QueryRange(s.URL, "up", "0", "300", 0, p) + exitCode := QueryRange(s.URL, map[string]string{}, "up", "0", "300", 0, p) expectedPath := "/api/v1/query_range" gotPath := getRequest().URL.Path if gotPath != expectedPath { @@ -45,7 +45,7 @@ func TestQueryRange(t *testing.T) { t.Error() } - exitCode = QueryRange(s.URL, "up", "0", "300", 10*time.Millisecond, p) + exitCode = QueryRange(s.URL, map[string]string{}, "up", "0", "300", 10*time.Millisecond, p) gotPath = getRequest().URL.Path if gotPath != expectedPath { t.Errorf("unexpected URL path %s (wanted %s)", gotPath, expectedPath) diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 8b19d80394..798cc40e10 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -157,6 +157,7 @@ type testGroup struct { InputSeries []series `yaml:"input_series"` AlertRuleTests []alertTestCase `yaml:"alert_rule_test,omitempty"` PromqlExprTests []promqlTestCase `yaml:"promql_expr_test,omitempty"` + ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` } // test performs the unit tests. @@ -177,8 +178,7 @@ func (tg *testGroup) test(mint, maxt time.Time, evalInterval time.Duration, grou Logger: log.NewNopLogger(), } m := rules.NewManager(opts) - // TODO(beorn7): Provide a way to pass in external labels. - groupsMap, ers := m.LoadGroups(tg.Interval, nil, ruleFiles...) + groupsMap, ers := m.LoadGroups(tg.Interval, tg.ExternalLabels, ruleFiles...) if ers != nil { return ers } diff --git a/config/config.go b/config/config.go index f4d8d470de..2716c8cf37 100644 --- a/config/config.go +++ b/config/config.go @@ -92,8 +92,9 @@ var ( // DefaultAlertmanagerConfig is the default alertmanager configuration. DefaultAlertmanagerConfig = AlertmanagerConfig{ - Scheme: "http", - Timeout: model.Duration(10 * time.Second), + Scheme: "http", + Timeout: model.Duration(10 * time.Second), + APIVersion: AlertmanagerAPIVersionV1, } // DefaultRemoteWriteConfig is the default remote write configuration. @@ -116,8 +117,7 @@ var ( Capacity: 10, BatchSendDeadline: model.Duration(5 * time.Second), - // Max number of times to retry a batch on recoverable errors. - MaxRetries: 3, + // Backoff times for retrying a batch of samples on recoverable errors. MinBackoff: model.Duration(30 * time.Millisecond), MaxBackoff: model.Duration(100 * time.Millisecond), } @@ -454,6 +454,40 @@ func (c *AlertingConfig) UnmarshalYAML(unmarshal func(interface{}) error) error return nil } +// AlertmanagerAPIVersion represents a version of the +// github.com/prometheus/alertmanager/api, e.g. 'v1' or 'v2'. +type AlertmanagerAPIVersion string + +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (v *AlertmanagerAPIVersion) UnmarshalYAML(unmarshal func(interface{}) error) error { + *v = AlertmanagerAPIVersion("") + type plain AlertmanagerAPIVersion + if err := unmarshal((*plain)(v)); err != nil { + return err + } + + for _, supportedVersion := range SupportedAlertmanagerAPIVersions { + if *v == supportedVersion { + return nil + } + } + + return fmt.Errorf("expected Alertmanager api version to be one of %v but got %v", SupportedAlertmanagerAPIVersions, *v) +} + +const ( + // AlertmanagerAPIVersionV1 represents + // github.com/prometheus/alertmanager/api/v1. + AlertmanagerAPIVersionV1 AlertmanagerAPIVersion = "v1" + // AlertmanagerAPIVersionV2 represents + // github.com/prometheus/alertmanager/api/v2. + AlertmanagerAPIVersionV2 AlertmanagerAPIVersion = "v2" +) + +var SupportedAlertmanagerAPIVersions = []AlertmanagerAPIVersion{ + AlertmanagerAPIVersionV1, AlertmanagerAPIVersionV2, +} + // AlertmanagerConfig configures how Alertmanagers can be discovered and communicated with. type AlertmanagerConfig struct { // We cannot do proper Go type embedding below as the parser will then parse @@ -469,6 +503,9 @@ type AlertmanagerConfig struct { // The timeout used when sending alerts. Timeout model.Duration `yaml:"timeout,omitempty"` + // The api version of Alertmanager. + APIVersion AlertmanagerAPIVersion `yaml:"api_version"` + // List of Alertmanager relabel configurations. RelabelConfigs []*relabel.Config `yaml:"relabel_configs,omitempty"` } @@ -594,9 +631,6 @@ type QueueConfig struct { // Maximum time sample will wait in buffer. BatchSendDeadline model.Duration `yaml:"batch_send_deadline,omitempty"` - // Max number of times to retry a batch on recoverable errors. - MaxRetries int `yaml:"max_retries,omitempty"` - // On recoverable errors, backoff exponentially. MinBackoff model.Duration `yaml:"min_backoff,omitempty"` MaxBackoff model.Duration `yaml:"max_backoff,omitempty"` diff --git a/config/config_test.go b/config/config_test.go index 9d35e9e533..9cfb9107c7 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -631,8 +631,9 @@ var expectedConf = &Config{ AlertingConfig: AlertingConfig{ AlertmanagerConfigs: []*AlertmanagerConfig{ { - Scheme: "https", - Timeout: model.Duration(10 * time.Second), + Scheme: "https", + Timeout: model.Duration(10 * time.Second), + APIVersion: AlertmanagerAPIVersionV1, ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{ StaticConfigs: []*targetgroup.Group{ { diff --git a/console_libraries/prom.lib b/console_libraries/prom.lib index ca1c9617c6..1edf475e1a 100644 --- a/console_libraries/prom.lib +++ b/console_libraries/prom.lib @@ -36,6 +36,7 @@ var PATH_PREFIX = "{{ pathPrefix }}"; {{ define "humanizeNoSmallPrefix" }}{{ if and (lt . 1.0) (gt . -1.0) }}{{ printf "%.3g" . }}{{ else }}{{ humanize . }}{{ end }}{{ end }} {{ define "humanize1024" }}{{ humanize1024 . }}{{ end }} {{ define "humanizeDuration" }}{{ humanizeDuration . }}{{ end }} +{{ define "humanizePercentage" }}{{ humanizePercentage . }}{{ end }} {{ define "humanizeTimestamp" }}{{ humanizeTimestamp . }}{{ end }} {{ define "printf.1f" }}{{ printf "%.1f" . }}{{ end }} {{ define "printf.3g" }}{{ printf "%.3g" . }}{{ end }} diff --git a/consoles/prometheus-overview.html b/consoles/prometheus-overview.html index 83e44b2624..08e027de06 100644 --- a/consoles/prometheus-overview.html +++ b/consoles/prometheus-overview.html @@ -50,10 +50,10 @@ HTTP Server -{{ range printf "http_request_duration_microseconds_count{job='prometheus',instance='%s',handler=~'^(query.*|federate|consoles)$'}" .Params.instance | query | sortByLabel "handler" }} +{{ range printf "prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s'}" .Params.instance | query | sortByLabel "handler" }} {{ .Labels.handler }} - {{ template "prom_query_drilldown" (args (printf "irate(http_request_duration_microseconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }} + {{ template "prom_query_drilldown" (args (printf "irate(prometheus_http_request_duration_seconds_count{job='prometheus',instance='%s',handler='%s'}[5m])" .Labels.instance .Labels.handler) "/s" "humanizeNoSmallPrefix") }} {{ end }} @@ -82,7 +82,7 @@ -{{end}} +{{end}} {{define "content"}}
@@ -19,29 +19,24 @@

Service Discovery

    - {{range $i, $job := .Index}} -
  • - {{$job}} ({{ index $.Active $i }}/{{ index $.Total $i }} active targets) -
  • - {{end}} + {{- range $i, $job := .Index}} +
  • {{$job}} ({{ index $.Active $i }}/{{ index $.Total $i }} active targets)
  • + {{- end}}
- {{$targets := .Targets}} - {{range $i, $job := .Index}} + {{- $targets := .Targets}} + {{- range $i, $job := .Index}}

- {{$job}} - + {{$job}}

- {{with index $.Dropped $i}} - {{if gt . 100 }} - - {{end}} - {{end}} + {{- with index $.Dropped $i}} + {{- if gt . 100 }} + + {{- end}} + {{- end}} @@ -51,42 +46,34 @@ - {{range index $targets $job}} + {{- range index $targets $job}} - {{end}} + {{- end}}
- {{ end }} + {{- end }}
{{end}} diff --git a/web/ui/templates/targets.html b/web/ui/templates/targets.html index b4bef875b6..813f858f74 100644 --- a/web/ui/templates/targets.html +++ b/web/ui/templates/targets.html @@ -14,12 +14,12 @@ -
+
- {{range $job, $pool := .TargetPools}} - {{$healthy := numHealthy $pool}} - {{$total := len $pool}} + {{- range $job, $pool := .TargetPools}} + {{- $healthy := numHealthy $pool}} + {{- $total := len $pool}}

@@ -38,46 +38,36 @@ - {{range $pool}} + {{- range $pool}} {{.URL.Scheme}}://{{.URL.Host}}{{.URL.Path}}
- {{range $label, $values := .URL.Query }} - {{range $i, $value := $values}} + {{- range $label, $values := .URL.Query }} + {{- range $i, $value := $values}} {{$label}}="{{$value}}" - {{end}} - {{end}} + {{- end}} + {{- end}} - - {{.Health}} - + {{.Health}} - {{range $label, $value := .Labels.Map}} + {{- range $label, $value := .Labels.Map}} {{$label}}="{{$value}}" - {{else}} + {{- else -}} none - {{end}} + {{- end}} - - {{if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}} - - - {{humanizeDuration .LastScrapeDuration.Seconds}} - - - {{if .LastError}} - {{.LastError}} - {{end}} - + {{- if .LastScrape.IsZero}}Never{{else}}{{since .LastScrape}} ago{{end}} + {{- humanizeDuration .LastScrapeDuration.Seconds}} + {{- if .LastError}}{{.LastError}}{{end}} - {{end}} + {{- end}}

- {{ end }} + {{- end }} {{end}} diff --git a/web/web.go b/web/web.go index 4b1f8ef6fe..a7ee559f01 100644 --- a/web/web.go +++ b/web/web.go @@ -89,33 +89,71 @@ func withStackTracer(h http.Handler, l log.Logger) http.Handler { }) } -var ( - requestDuration = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: "prometheus_http_request_duration_seconds", - Help: "Histogram of latencies for HTTP requests.", - Buckets: []float64{.1, .2, .4, 1, 3, 8, 20, 60, 120}, - }, - []string{"handler"}, - ) - responseSize = prometheus.NewHistogramVec( - prometheus.HistogramOpts{ - Name: "prometheus_http_response_size_bytes", - Help: "Histogram of response size for HTTP requests.", - Buckets: prometheus.ExponentialBuckets(100, 10, 8), - }, - []string{"handler"}, - ) -) +type metrics struct { + requestCounter *prometheus.CounterVec + requestDuration *prometheus.HistogramVec + responseSize *prometheus.HistogramVec +} -func init() { - prometheus.MustRegister(requestDuration, responseSize) +func newMetrics(r prometheus.Registerer) *metrics { + m := &metrics{ + requestCounter: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "prometheus_http_requests_total", + Help: "Counter of HTTP requests.", + }, + []string{"handler", "code"}, + ), + requestDuration: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "prometheus_http_request_duration_seconds", + Help: "Histogram of latencies for HTTP requests.", + Buckets: []float64{.1, .2, .4, 1, 3, 8, 20, 60, 120}, + }, + []string{"handler"}, + ), + responseSize: prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "prometheus_http_response_size_bytes", + Help: "Histogram of response size for HTTP requests.", + Buckets: prometheus.ExponentialBuckets(100, 10, 8), + }, + []string{"handler"}, + ), + } + + if r != nil { + r.MustRegister(m.requestCounter, m.requestDuration, m.responseSize) + } + return m +} + +func (m *metrics) instrumentHandlerWithPrefix(prefix string) func(handlerName string, handler http.HandlerFunc) http.HandlerFunc { + return func(handlerName string, handler http.HandlerFunc) http.HandlerFunc { + return m.instrumentHandler(prefix+handlerName, handler) + } +} + +func (m *metrics) instrumentHandler(handlerName string, handler http.HandlerFunc) http.HandlerFunc { + return promhttp.InstrumentHandlerCounter( + m.requestCounter.MustCurryWith(prometheus.Labels{"handler": handlerName}), + promhttp.InstrumentHandlerDuration( + m.requestDuration.MustCurryWith(prometheus.Labels{"handler": handlerName}), + promhttp.InstrumentHandlerResponseSize( + m.responseSize.MustCurryWith(prometheus.Labels{"handler": handlerName}), + handler, + ), + ), + ) } // Handler serves various HTTP endpoints of the Prometheus server type Handler struct { logger log.Logger + gatherer prometheus.Gatherer + metrics *metrics + scrapeManager *scrape.Manager ruleManager *rules.Manager queryEngine *promql.Engine @@ -190,38 +228,31 @@ type Options struct { PageTitle string RemoteReadSampleLimit int RemoteReadConcurrencyLimit int -} -func instrumentHandlerWithPrefix(prefix string) func(handlerName string, handler http.HandlerFunc) http.HandlerFunc { - return func(handlerName string, handler http.HandlerFunc) http.HandlerFunc { - return instrumentHandler(prefix+handlerName, handler) - } -} - -func instrumentHandler(handlerName string, handler http.HandlerFunc) http.HandlerFunc { - return promhttp.InstrumentHandlerDuration( - requestDuration.MustCurryWith(prometheus.Labels{"handler": handlerName}), - promhttp.InstrumentHandlerResponseSize( - responseSize.MustCurryWith(prometheus.Labels{"handler": handlerName}), - handler, - ), - ) + Gatherer prometheus.Gatherer + Registerer prometheus.Registerer } // New initializes a new web Handler. func New(logger log.Logger, o *Options) *Handler { - router := route.New().WithInstrumentation(instrumentHandler) - cwd, err := os.Getwd() - - if err != nil { - cwd = "" - } if logger == nil { logger = log.NewNopLogger() } + m := newMetrics(o.Registerer) + router := route.New().WithInstrumentation(m.instrumentHandler) + + cwd, err := os.Getwd() + if err != nil { + cwd = "" + } + h := &Handler{ - logger: logger, + logger: logger, + + gatherer: o.Gatherer, + metrics: m, + router: router, quitCh: make(chan struct{}), reloadCh: make(chan chan error), @@ -453,7 +484,7 @@ func (h *Handler) Run(ctx context.Context) error { mux := http.NewServeMux() mux.Handle("/", h.router) - av1 := route.New().WithInstrumentation(instrumentHandlerWithPrefix("/api/v1")) + av1 := route.New().WithInstrumentation(h.metrics.instrumentHandlerWithPrefix("/api/v1")) h.apiV1.Register(av1) apiPath := "/api" if h.options.RoutePrefix != "/" { @@ -639,7 +670,7 @@ func (h *Handler) status(w http.ResponseWriter, r *http.Request) { status.StorageRetention = status.StorageRetention + h.options.TSDBCfg.MaxBytes.String() } - metrics, err := prometheus.DefaultGatherer.Gather() + metrics, err := h.gatherer.Gather() if err != nil { http.Error(w, fmt.Sprintf("error gathering runtime status: %s", err), http.StatusInternalServerError) return diff --git a/web/web_test.go b/web/web_test.go index 06115aaa04..d93ec058d3 100644 --- a/web/web_test.go +++ b/web/web_test.go @@ -21,10 +21,14 @@ import ( "net/http/httptest" "net/url" "os" + "strconv" "strings" "testing" "time" + "github.com/prometheus/client_golang/prometheus" + prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/rules" @@ -116,7 +120,8 @@ func TestReadyAndHealthy(t *testing.T) { Host: "localhost:9090", Path: "/", }, - Version: &PrometheusVersion{}, + Version: &PrometheusVersion{}, + Gatherer: prometheus.DefaultGatherer, } opts.Flags = map[string]string{} @@ -402,3 +407,32 @@ func TestDebugHandler(t *testing.T) { testutil.Equals(t, tc.code, w.Code) } } + +func TestHTTPMetrics(t *testing.T) { + t.Parallel() + + handler := New(nil, &Options{RoutePrefix: "/"}) + getReady := func() int { + t.Helper() + w := httptest.NewRecorder() + + req, err := http.NewRequest("GET", "/-/ready", nil) + testutil.Ok(t, err) + + handler.router.ServeHTTP(w, req) + return w.Code + } + + code := getReady() + testutil.Equals(t, http.StatusServiceUnavailable, code) + counter := handler.metrics.requestCounter + testutil.Equals(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusServiceUnavailable))))) + + handler.Ready() + for range [2]int{} { + code = getReady() + testutil.Equals(t, http.StatusOK, code) + } + testutil.Equals(t, 2, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusOK))))) + testutil.Equals(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues("/-/ready", strconv.Itoa(http.StatusServiceUnavailable))))) +}