diff --git a/.build/Makefile b/.build/Makefile index 943bb87b29..6e9533967e 100644 --- a/.build/Makefile +++ b/.build/Makefile @@ -15,123 +15,7 @@ include ../Makefile.INCLUDE -all: dependencies-stamp - -bison-stamp: bison-implementation-$(UNAME)-stamp - [ -x "$$(which bison)" ] || { echo "bison not found." ; false ; } - touch $@ - -bison-implementation-Darwin-stamp: - [ -x "$$(which bison)" ] || $(BREW_INSTALL) bison - touch $@ - -bison-implementation-Linux-stamp: - [ -x "$$(which bison)" ] || $(APT_GET_INSTALL) bison - touch $@ - -cache-stamp: - $(MAKE) -C cache - touch $@ - -cc-stamp: cc-implementation-$(UNAME)-stamp - [ -x "$$(which cc)" ] || { echo "cc not found." ; false ; } - touch $@ - -cc-implementation-Darwin-stamp: - [ -x "$$(which cc)" ] || { echo "Install XCode?" ; false ; } - touch $@ - -cc-implementation-Linux-stamp: - [ -x "$$(which cc)" ] || $(APT_GET_INSTALL) build-essential - touch $@ - -dependencies-stamp: cache-stamp cc-stamp leveldb-stamp snappy-stamp godns-stamp - touch $@ - -goprotobuf-protoc-gen-go-stamp: protoc-stamp goprotobuf-stamp - $(GO_GET) code.google.com/p/goprotobuf/protoc-gen-go $(THIRD_PARTY_BUILD_OUTPUT) - touch $@ - -goprotobuf-stamp: protoc-stamp - $(GO_GET) code.google.com/p/goprotobuf/proto $(THIRD_PARTY_BUILD_OUTPUT) - touch $@ - -godns-stamp: - $(GO_GET) github.com/miekg/dns $(THIRD_PARTY_BUILD_OUTPUT) - touch $@ - -leveldb-stamp: cache-stamp cache/leveldb-$(LEVELDB_VERSION).tar.gz cc-stamp rsync-stamp snappy-stamp - tar xzvf cache/leveldb-$(LEVELDB_VERSION).tar.gz -C dirty $(THIRD_PARTY_BUILD_OUTPUT) - cd dirty/leveldb-$(LEVELDB_VERSION) && CFLAGS="$(CFLAGS) -lsnappy" CXXFLAGS="$(CXXFLAGS) -lsnappy $(LDFLAGS)" LDFLAGS="-lsnappy $(LDFLAGS)" bash -x ./build_detect_platform build_config.mk ./ - # The test that LevelDB uses to test for Snappy is naive and - # does not respect LDFLAGS. :-( - CFLAGS="$(CFLAGS) -lsnappy" CXXFLAGS="$(CXXFLAGS) -lsnappy $(LDFLAGS)" LDFLAGS="-lsnappy $(LDFLAGS)" $(MAKE) -C dirty/leveldb-$(LEVELDB_VERSION) $(THIRD_PARTY_BUILD_OUTPUT) - rsync -av "dirty/leveldb-$(LEVELDB_VERSION)/include/" "$(PREFIX)/include/" $(THIRD_PARTY_BUILD_OUTPUT) - -[ "$(UNAME)" = "Linux" ] && { rsync -av "dirty/leveldb-$(LEVELDB_VERSION)/"*.*so* "$(PREFIX)/lib/" ; } $(THIRD_PARTY_BUILD_OUTPUT) $(THIRD_PARTY_BUILD_OUTPUT) - -[ "$(UNAME)" = "Darwin" ] && { rsync -av "dirty/leveldb-$(LEVELDB_VERSION)/"*.*dylib* "$(PREFIX)/lib/" ; } $(THIRD_PARTY_BUILD_OUTPUT) - rsync -av "dirty/leveldb-$(LEVELDB_VERSION)/"*.a "$(PREFIX)/lib/" $(THIRD_PARTY_BUILD_OUTPUT) - touch $@ - -libunwind-stamp: - $(APT_GET_INSTALL) libunwind7 - $(APT_GET_INSTALL) libunwind7-dev - touch $@ - -noop-target-stamp: - echo "Not doing anything." - touch $@ - -protoc-stamp: cache-stamp cache/protobuf-$(PROTOCOL_BUFFERS_VERSION).tar.bz2 cc-stamp - tar xjvf cache/protobuf-$(PROTOCOL_BUFFERS_VERSION).tar.bz2 -C dirty $(THIRD_PARTY_BUILD_OUTPUT) - cd dirty/protobuf-$(PROTOCOL_BUFFERS_VERSION) && ./configure --prefix="$(PREFIX)" $(THIRD_PARTY_BUILD_OUTPUT) - $(MAKE) -C dirty/protobuf-$(PROTOCOL_BUFFERS_VERSION) $(THIRD_PARTY_BUILD_OUTPUT) - $(MAKE) -C dirty/protobuf-$(PROTOCOL_BUFFERS_VERSION) install $(THIRD_PARTY_BUILD_OUTPUT) - [ -x "$$(which protoc)" ] || { echo "protoc not found." ; false ; } - touch $@ - -rsync-implementation-Darwin-stamp: - [ -x "$$(which rsync)" ] || $(BREW_INSTALL) rsync - touch $@ - -rsync-implementation-Linux-stamp: - [ -x "$$(which rsync)" ] || $(APT_GET_INSTALL) rsync - -rsync-stamp: rsync-implementation-$(UNAME)-stamp - [ -x "$$(which rsync)" ] || { echo "rsync not found." ; false ; } - touch $@ - -snappy-stamp: cache-stamp cache/snappy-$(SNAPPY_VERSION).tar.gz cc-stamp - tar xzvf cache/snappy-$(SNAPPY_VERSION).tar.gz -C dirty $(THIRD_PARTY_BUILD_OUTPUT) - cd dirty/snappy-$(SNAPPY_VERSION) && ./configure --prefix="$(PREFIX)" $(THIRD_PARTY_BUILD_OUTPUT) - $(MAKE) -C dirty/snappy-$(SNAPPY_VERSION) $(THIRD_PARTY_BUILD_OUTPUT) - $(MAKE) -C dirty/snappy-$(SNAPPY_VERSION) install $(THIRD_PARTY_BUILD_OUTPUT) - touch $@ - -ifeq ($(UNAME), Linux) -stack-unwind-support-stamp: libunwind-stamp - touch $@ -else -stack-unwind-support-stamp: noop-target-stamp - touch $@ -endif - -vim-implementation-Darwin-stamp: - [ -x "$$(which vim)" ] || $(BREW_INSTALL) vim - touch $@ - -vim-implementation-Linux-stamp: - [ -x "$$(which vim)" ] || $(APT_GET_INSTALL) vim - touch $@ - -vim-stamp: vim-implementation-$(UNAME)-stamp - touch $@ +all: clean: - $(MAKE) -C cache clean - $(MAKE) -C dirty clean $(MAKE) -C root clean - $(MAKE) -C package clean - rm -rf *-stamp - - -.PHONY: clean diff --git a/.build/cache/Makefile b/.build/cache/Makefile deleted file mode 100644 index 0e83487c08..0000000000 --- a/.build/cache/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2013 Prometheus Team -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -.SUFFIXES: - -include ../../Makefile.INCLUDE - -all: populate - -populate: leveldb-$(LEVELDB_VERSION).tar.gz protobuf-$(PROTOCOL_BUFFERS_VERSION).tar.bz2 snappy-$(SNAPPY_VERSION).tar.gz - -leveldb-$(LEVELDB_VERSION).tar.gz: wget-stamp - $(WGET) http://leveldb.googlecode.com/files/leveldb-$(LEVELDB_VERSION).tar.gz - -protobuf-$(PROTOCOL_BUFFERS_VERSION).tar.bz2: wget-stamp - $(WGET) http://protobuf.googlecode.com/files/$@ - -snappy-$(SNAPPY_VERSION).tar.gz: wget-stamp - $(WGET) http://snappy.googlecode.com/files/snappy-$(SNAPPY_VERSION).tar.gz - -wget-implementation-Darwin-stamp: - [ -x "$$(which wget)" ] || $(BREW_INSTALL) wget - touch $@ - -wget-implementation-Linux-stamp: - [ -x "$$(which wget)" ] || $(APT_GET_INSTALL) wget - touch $@ - -wget-stamp: wget-implementation-$(UNAME)-stamp - [ -x "$$(which wget)" ] || { echo "wget not found." ; false ; } - touch $@ - -clean: - -[ -n "$(REALLY_CLEAN)" ] && rm -rf *.bz2 - -[ -n "$(REALLY_CLEAN)" ] && rm -rf *.gz - rm -rf *-stamp - -.PHONY: clean populate diff --git a/.build/dirty/.gitignore b/.build/dirty/.gitignore deleted file mode 100644 index f59ec20aab..0000000000 --- a/.build/dirty/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/.build/dirty/Makefile b/.build/dirty/Makefile deleted file mode 100644 index d76a091065..0000000000 --- a/.build/dirty/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2013 Prometheus Team -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -.SUFFIXES: - -include ../../Makefile.INCLUDE - -all: - -clean: - rm -rf * - git checkout . diff --git a/.build/package/.gitignore b/.build/package/.gitignore deleted file mode 100644 index f59ec20aab..0000000000 --- a/.build/package/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/.build/package/Makefile b/.build/package/Makefile deleted file mode 100644 index d76a091065..0000000000 --- a/.build/package/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2013 Prometheus Team -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -.SUFFIXES: - -include ../../Makefile.INCLUDE - -all: - -clean: - rm -rf * - git checkout . diff --git a/.build/package/lib/.gitignore b/.build/package/lib/.gitignore deleted file mode 100644 index f59ec20aab..0000000000 --- a/.build/package/lib/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/.build/package/run_prometheus.sh b/.build/package/run_prometheus.sh deleted file mode 100755 index 12fbee2f4c..0000000000 --- a/.build/package/run_prometheus.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -# If either of the two tests below fail, you may need to install GNU coreutils -# in your environment. - -if [ ! -x "$(which readlink)" ]; then - echo "readlink tool cannot be found." > /dev/stderr - exit 1 -fi - -if [ ! -x "$(which dirname)" ]; then - echo "dirname tool cannot be found." > /dev/stderr - exit 1 -fi - -readonly binary="${0}" -readonly binary_path="$(readlink -f ${binary})" -readonly binary_directory="$(dirname ${binary_path})" - -readonly platform=$(uname | tr '[:upper:]' '[:lower:]') - - -export LD_LIBRARY_PATH="${binary_directory}/lib:${LD_LIBRARY_PATH}" - -if [[ "${platform}" == "darwin" ]]; then - export DYLD_LIBRARY_PATH="${binary_directory}/lib:${DYLD_LIBRARY_PATH}" -fi - -exec "${binary_directory}/prometheus" "${@}" diff --git a/Makefile b/Makefile index 7591d243d1..6ab81874a0 100644 --- a/Makefile +++ b/Makefile @@ -22,14 +22,12 @@ $(GOCC): $(BUILD_PATH)/cache/$(GOPKG) $(FULL_GOPATH) touch $@ advice: - $(GO) tool vet . + $(GO) vet ./... binary: build -build: config dependencies model preparation tools web +build: config dependencies tools web $(GO) build -o prometheus $(BUILDFLAGS) . - cp prometheus $(BUILD_PATH)/package/prometheus - rsync -av --delete $(BUILD_PATH)/root/lib/ $(BUILD_PATH)/package/lib/ docker: build docker build -t prometheus:$(REV) . @@ -37,7 +35,7 @@ docker: build tarball: $(ARCHIVE) $(ARCHIVE): build - tar -C $(BUILD_PATH)/package -czf $(ARCHIVE) . + tar -czf $(ARCHIVE) prometheus release: REMOTE ?= $(error "can't upload, REMOTE not set") release: REMOTE_DIR ?= $(error "can't upload, REMOTE_DIR not set") @@ -49,7 +47,7 @@ tag: git push --tags $(BUILD_PATH)/cache/$(GOPKG): - curl -o $@ -L $(GOURL)/$(GOPKG) + $(CURL) -o $@ -L $(GOURL)/$(GOPKG) benchmark: test $(GO) test $(GO_TEST_FLAGS) -test.bench='Benchmark' ./... @@ -59,15 +57,15 @@ clean: $(MAKE) -C tools clean $(MAKE) -C web clean rm -rf $(TEST_ARTIFACTS) - -rm prometheus.tar.gz - -find . -type f -iname '*~' -exec rm '{}' ';' - -find . -type f -iname '*#' -exec rm '{}' ';' - -find . -type f -iname '.#*' -exec rm '{}' ';' + -rm $(ARCHIVE) + -find . -type f -name '*~' -exec rm '{}' ';' + -find . -type f -name '*#' -exec rm '{}' ';' + -find . -type f -name '.#*' -exec rm '{}' ';' -config: dependencies preparation +config: dependencies $(MAKE) -C config -dependencies: preparation +dependencies: $(GOCC) $(FULL_GOPATH) $(GO) get -d documentation: search_index @@ -76,14 +74,8 @@ documentation: search_index format: find . -iname '*.go' | egrep -v "^\./\.build|./generated|\.(l|y)\.go" | xargs -n1 $(GOFMT) -w -s=true -model: dependencies preparation - $(MAKE) -C model - -preparation: $(GOCC) $(FULL_GOPATH) - $(MAKE) -C $(BUILD_PATH) - race_condition_binary: build - CGO_CFLAGS="-I$(BUILD_PATH)/root/include" CGO_LDFLAGS="-L$(BUILD_PATH)/root/lib" $(GO) build -race -o prometheus.race $(BUILDFLAGS) . + $(GO) build -race -o prometheus.race $(BUILDFLAGS) . race_condition_run: race_condition_binary ./prometheus.race $(ARGUMENTS) @@ -94,7 +86,7 @@ run: binary search_index: godoc -index -write_index -index_files='search_index' -server: config dependencies model preparation +server: config dependencies $(MAKE) -C server # $(FULL_GOPATH) is responsible for ensuring that the builder has not done anything @@ -103,16 +95,13 @@ $(FULL_GOPATH): -[ -d "$(FULL_GOPATH)" ] || { mkdir -vp $(FULL_GOPATH_BASE) ; ln -s "$(PWD)" "$(FULL_GOPATH)" ; } [ -d "$(FULL_GOPATH)" ] -test: config dependencies model preparation tools web +test: config dependencies tools web $(GO) test $(GO_TEST_FLAGS) ./... -tools: dependencies preparation +tools: dependencies $(MAKE) -C tools -update: - $(GO) get -d - -web: config dependencies model preparation +web: config dependencies $(MAKE) -C web -.PHONY: advice binary build clean config dependencies documentation format model preparation race_condition_binary race_condition_run release run search_index tag tarball test tools update +.PHONY: advice binary build clean config dependencies documentation format race_condition_binary race_condition_run release run search_index tag tarball test tools diff --git a/Makefile.INCLUDE b/Makefile.INCLUDE index 58d802bbaf..5f9be26fcc 100644 --- a/Makefile.INCLUDE +++ b/Makefile.INCLUDE @@ -15,15 +15,7 @@ .SUFFIXES: -# Set this to "false" to provide verbose builds of third-party components, -# namely C and C++ dependencies. -export SILENCE_THIRD_PARTY_BUILDS := true - -ifeq ($(SILENCE_THIRD_PARTY_BUILDS), true) -export THIRD_PARTY_BUILD_OUTPUT := >/dev/null 2>&1 -else -export THIRD_PARTY_BUILD_OUTPUT := -endif +VERSION=0.8.0 OS=$(shell uname) ARCH=$(shell uname -m) @@ -34,7 +26,7 @@ MAC_OS_X_VERSION ?= 10.8 BUILD_PATH = $(PWD)/.build -GO_VERSION := 1.3 +GO_VERSION := 1.3.3 GOOS = $(subst Darwin,darwin,$(subst Linux,linux,$(OS))) ifeq ($(GOOS),darwin) @@ -54,42 +46,18 @@ GOENV = TMPDIR=$(TMPDIR) GOROOT=$(GOROOT) GOPATH=$(GOPATH) GO = $(GOENV) $(GOCC) GOFMT = $(GOROOT)/bin/gofmt -LEVELDB_VERSION := 1.14.0 -PROTOCOL_BUFFERS_VERSION := 2.5.0 -SNAPPY_VERSION := 1.1.0 - UNAME := $(shell uname) FULL_GOPATH := $(GOPATH)/src/github.com/prometheus/prometheus FULL_GOPATH_BASE := $(GOPATH)/src/github.com/prometheus export PREFIX=$(BUILD_PATH)/root -export LOCAL_BINARIES=$(PREFIX)/bin +export PATH := $(GOPATH)/bin:$(PATH) -export PATH := $(LOCAL_BINARIES):$(GOPATH)/bin:$(PATH) -export LD_LIBRARY_PATH := $(PREFIX)/lib:$(LD_LIBRARY_PATH) - -export CFLAGS := $(CFLAGS) -I$(PREFIX)/include -O3 -export CXXFLAGS := $(CXXFLAGS) -I$(PREFIX)/include -O3 -export CPPFLAGS := $(CPPFLAGS) -I$(PREFIX)/include -O3 -export LDFLAGS := $(LDFLAGS) -L$(PREFIX)/lib -export PKG_CONFIG_PATH := $(PREFIX)/lib/pkgconfig:$(PKG_CONFIG_PATH) - -export CGO_CFLAGS = $(CFLAGS) -export CGO_LDFLAGS = $(LDFLAGS) - -export GO_TEST_FLAGS ?= "-v" +export GO_TEST_FLAGS ?= "-v -short" GO_GET := $(GO) get -u -v -x -APT_GET_INSTALL := sudo apt-get install -y -BREW_INSTALL := brew install -# By default, wget sets the creation time to match the server's, which throws -# off Make. :-( -# -# Set WGET_OPTIONS to include ``--no-use-server-timestamps`` to alleviate this. -WGET := wget $(WGET_OPTIONS) -c -VERSION := $(shell cat VERSION) REV := $(shell git rev-parse --short HEAD) BRANCH := $(shell git rev-parse --abbrev-ref HEAD) HOSTNAME := $(shell hostname -f) @@ -100,11 +68,8 @@ BUILDFLAGS := -ldflags \ -X main.buildBranch $(BRANCH)\ -X main.buildUser $(USER)@$(HOSTNAME)\ -X main.buildDate $(BUILD_DATE)\ - -X main.goVersion $(GO_VERSION)\ - -X main.leveldbVersion $(LEVELDB_VERSION)\ - -X main.protobufVersion $(PROTOCOL_BUFFERS_VERSION)\ - -X main.snappyVersion $(SNAPPY_VERSION)" - -PROTOC := $(LOCAL_BINARIES)/protoc + -X main.goVersion $(GO_VERSION)" +PROTOC := protoc +CURL := curl ARCHIVE := prometheus-$(VERSION).$(GOOS)-$(GOARCH).tar.gz diff --git a/README.md b/README.md index 6abc31cd76..be44109f7e 100644 --- a/README.md +++ b/README.md @@ -13,88 +13,67 @@ The system is designed to collect telemetry from named targets on given intervals, evaluate rule expressions, display the results, and trigger an action if some condition is observed to be true. -## Prerequisites -If you read below in the _Getting Started_ section, the build infrastructure -will take care of the following things for you in most cases: +TODO: The above description is somewhat esoteric. Rephrase it into +somethith that tells normal people how they will usually benefit from +using Prometheus. - 1. Go 1.1. - 2. LevelDB: [https://code.google.com/p/leveldb/](https://code.google.com/p/leveldb/). - 3. Protocol Buffers Compiler: [http://code.google.com/p/protobuf/](http://code.google.com/p/protobuf/). - 4. goprotobuf: the code generator and runtime library: [http://code.google.com/p/goprotobuf/](http://code.google.com/p/goprotobuf/). - 5. Levigo, a Go-wrapper around LevelDB's C library: [https://github.com/jmhodges/levigo](https://github.com/jmhodges/levigo). - 6. GoRest, a RESTful style web-services framework: [http://code.google.com/p/gorest/](http://code.google.com/p/gorest/). - 7. Prometheus Client, Prometheus in Prometheus [https://github.com/prometheus/client_golang](https://github.com/prometheus/client_golang). - 8. Snappy, a compression library for LevelDB and Levigo [http://code.google.com/p/snappy/](http://code.google.com/p/snappy/). +## Install -## Getting Started +There are various ways of installing Prometheus. -For basic help how to get started: +### Precompiled packages - * The source code is periodically indexed: [Prometheus Core](http://godoc.org/github.com/prometheus/prometheus). - * For UNIX-like environment users, please consult the Travis CI configuration in _.travis.yml_ and _Makefile_. - * All of the core developers are accessible via the [Prometheus Developers Mailinglist](https://groups.google.com/forum/?fromgroups#!forum/prometheus-developers). +We plan to provide precompiled binaries for various platforms and even +packages for common Linux distribution soon. Once those are offered, +it will be the recommended way of installing Prometheus. -### General +### Use `make` -For first time users, simply run the following: +In most cirumstances, the following should work: $ make - $ ARGUMENTS="-configFile=documentation/examples/prometheus.conf" make run + $ ARGUMENTS="-config.file=documentation/examples/prometheus.conf" make run -``${ARGUMENTS}`` is passed verbatim into the makefile and thusly Prometheus as -``$(ARGUMENTS)``. This is useful for quick one-off invocations and smoke -testing. +``${ARGUMENTS}`` is passed verbatim to the commandline starting the Prometheus binary. +This is useful for quick one-off invocations and smoke testing. -If you run into problems, try the following: +The above requires a number of common tools to be installed, namely +`curl`, `git`, `gzip`, `hg` (Mercurial CLI), `sed`, `xxd`. Should you +need to change any of the protocol buffer definition files +(`*.proto`), you also need the protocol buffer compiler +[`protoc`](http://code.google.com/p/protobuf/](http://code.google.com/p/protobuf/), +v2.5.0 or higher, in your `$PATH`. - $ SILENCE_THIRD_PARTY_BUILDS=false make +Everything else will be downloaded and installed into a staging +environment in the `.build` sub-directory. That includes a Go +development environment of the appropriate version. -Upon having a satisfactory build, it's possible to create an artifact for -end-user distribution: +The `Makefile` offers a number of useful targets. Some examples: - $ make package - $ find build/package +* `make test` runs tests. +* `make tarball` creates a tar ball with the binary for distribution. +* `make race_condition_run` compiles and runs a binary with the race detector enabled. -``build/package`` will be sufficient for whatever archiving mechanism you -choose. The important thing to note is that Go presently does not -staticly link against C dependency libraries, so including the ``lib`` -directory is paramount. Providing ``LD_LIBRARY_PATH`` or -``DYLD_LIBRARY_PATH`` in a scaffolding shell script is advised. +### Use your own Go development environment +Using your own Go development environment with the usual tooling is +possible, too, but you have to take care of various generated files +(usually by running `make` in the respective sub-directory): -### Problems -If at any point you run into an error with the ``make`` build system in terms of -its not properly scaffolding things on a given environment, please file a bug or -open a pull request with your changes if you can fix it yourself. +* Compiling the protocol buffer definitions in `config` (only if you have changed them). +* Generating the parser and lexer code in `rules` (only if you have changed `parser.y` or `lexer.l`). +* The `files.go` blob in `web/blob`, which embeds the static web content into the binary. -Please note that we're explicitly shooting for stable runtime environments and -not the latest-whiz bang releases; thusly, we ask you to provide ample -architecture and release identification remarks for us. +Furthermore, the build info (see `build_info.go`) will not be +populated if you simply run `go build`. You have to pass in command +line flags as defined in `Makefile.INCLUDE` (see `${BUILDFLAGS}`) to +do that. -## Testing +## More information - $ make test - -## Packaging - - $ make package - -### Race Detector - -Go 1.1 includes a [race detector](http://tip.golang.org/doc/articles/race_detector.html) -which can be enabled at build time. Here's how to use it with Prometheus -(assumes that you've already run a successful build). - -To run the tests with race detection: - - $ GORACE="log_path=/tmp/foo" go test -race ./... - -To run the server with race detection: - - $ go build -race . - $ GORACE="log_path=/tmp/foo" ./prometheus - -[![Build Status](https://travis-ci.org/prometheus/prometheus.png)](https://travis-ci.org/prometheus/prometheus) + * The source code is periodically indexed: [Prometheus Core](http://godoc.org/github.com/prometheus/prometheus). + * You will find a Travis CI configuration in `.travis.yml`. + * All of the core developers are accessible via the [Prometheus Developers Mailinglist](https://groups.google.com/forum/?fromgroups#!forum/prometheus-developers). ## Contributing @@ -102,4 +81,4 @@ Refer to [CONTRIBUTING.md](CONTRIBUTING.md) ## License -Apache License 2.0 +Apache License 2.0, see [LICENSE](LICENSE). diff --git a/VERSION b/VERSION deleted file mode 100644 index a3df0a6959..0000000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.8.0 diff --git a/build_info.go b/build_info.go index 3d3fb3e45d..2548029320 100644 --- a/build_info.go +++ b/build_info.go @@ -19,29 +19,23 @@ import ( // Build information. Populated by Makefile. var ( - buildVersion string - buildRevision string - buildBranch string - buildUser string - buildDate string - goVersion string - leveldbVersion string - protobufVersion string - snappyVersion string + buildVersion string + buildRevision string + buildBranch string + buildUser string + buildDate string + goVersion string ) // BuildInfo encapsulates compile-time metadata about Prometheus made available // via go tool ld such that this can be reported on-demand. var BuildInfo = map[string]string{ - "version": buildVersion, - "revision": buildRevision, - "branch": buildBranch, - "user": buildUser, - "date": buildDate, - "go_version": goVersion, - "leveldb_version": leveldbVersion, - "protobuf_version": protobufVersion, - "snappy_version": snappyVersion, + "version": buildVersion, + "revision": buildRevision, + "branch": buildBranch, + "user": buildUser, + "date": buildDate, + "go_version": goVersion, } var versionInfoTmpl = template.Must(template.New("version").Parse( @@ -49,7 +43,4 @@ var versionInfoTmpl = template.Must(template.New("version").Parse( build user: {{.user}} build date: {{.date}} go version: {{.go_version}} - leveldb version: {{.leveldb_version}} - protobuf version: {{.protobuf_version}} - snappy version: {{.snappy_version}} `)) diff --git a/coding/indexable/time.go b/coding/indexable/time.go deleted file mode 100644 index 912b6a2ab1..0000000000 --- a/coding/indexable/time.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package indexable - -import ( - "encoding/binary" - - clientmodel "github.com/prometheus/client_golang/model" -) - -// EncodeTimeInto writes the provided time into the specified buffer subject -// to the LevelDB big endian key sort order requirement. -func EncodeTimeInto(dst []byte, t clientmodel.Timestamp) { - binary.BigEndian.PutUint64(dst, uint64(t.Unix())) -} - -// EncodeTime converts the provided time into a byte buffer subject to the -// LevelDB big endian key sort order requirement. -func EncodeTime(t clientmodel.Timestamp) []byte { - buffer := make([]byte, 8) - - EncodeTimeInto(buffer, t) - - return buffer -} - -// DecodeTime deserializes a big endian byte array into a Unix time in UTC, -// omitting granularity precision less than a second. -func DecodeTime(src []byte) clientmodel.Timestamp { - return clientmodel.TimestampFromUnix(int64(binary.BigEndian.Uint64(src))) -} diff --git a/coding/indexable/time_test.go b/coding/indexable/time_test.go deleted file mode 100644 index 881fe87a3f..0000000000 --- a/coding/indexable/time_test.go +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package indexable - -import ( - "math/rand" - "testing" - "testing/quick" - - clientmodel "github.com/prometheus/client_golang/model" -) - -func TestTimeEndToEnd(t *testing.T) { - tester := func(x int) bool { - random := rand.New(rand.NewSource(int64(x))) - buffer := make([]byte, 8) - incoming := clientmodel.TimestampFromUnix(random.Int63()) - - EncodeTimeInto(buffer, incoming) - outgoing := DecodeTime(buffer) - - return incoming.Equal(outgoing) && incoming.Unix() == outgoing.Unix() - } - - if err := quick.Check(tester, nil); err != nil { - t.Error(err) - } -} diff --git a/config/Makefile b/config/Makefile index b01d50c08c..f41c461713 100644 --- a/config/Makefile +++ b/config/Makefile @@ -17,12 +17,6 @@ SUFFIXES: include ../Makefile.INCLUDE -# In order to build the generated targets in this directory, run the -# following: -# -# make -C build goprotobuf-protoc-gen-go-stamp - - generated/config.pb.go: config.proto - $(MAKE) -C ../.build goprotobuf-protoc-gen-go-stamp + $(GO_GET) code.google.com/p/goprotobuf/protoc-gen-go $(PROTOC) --proto_path=$(PREFIX)/include:. --go_out=generated/ config.proto diff --git a/main.go b/main.go index 01cb8bfb69..bbe51aa09f 100644 --- a/main.go +++ b/main.go @@ -26,12 +26,13 @@ import ( registry "github.com/prometheus/client_golang/prometheus" clientmodel "github.com/prometheus/client_golang/model" + registry "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/notification" "github.com/prometheus/prometheus/retrieval" "github.com/prometheus/prometheus/rules/manager" - "github.com/prometheus/prometheus/storage/metric/tiered" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/storage/remote/opentsdb" "github.com/prometheus/prometheus/web" @@ -42,208 +43,99 @@ const deletionBatchSize = 100 // Commandline flags. var ( - configFile = flag.String("configFile", "prometheus.conf", "Prometheus configuration file name.") - metricsStoragePath = flag.String("metricsStoragePath", "/tmp/metrics", "Base path for metrics storage.") + configFile = flag.String("config.file", "prometheus.conf", "Prometheus configuration file name.") - alertmanagerUrl = flag.String("alertmanager.url", "", "The URL of the alert manager to send notifications to.") + alertmanagerURL = flag.String("alertmanager.url", "", "The URL of the alert manager to send notifications to.") + notificationQueueCapacity = flag.Int("alertmanager.notification-queue-capacity", 100, "The capacity of the queue for pending alert manager notifications.") + + metricsStoragePath = flag.String("storage.local.path", "/tmp/metrics", "Base path for metrics storage.") remoteTSDBUrl = flag.String("storage.remote.url", "", "The URL of the OpenTSDB instance to send samples to.") remoteTSDBTimeout = flag.Duration("storage.remote.timeout", 30*time.Second, "The timeout to use when sending samples to OpenTSDB.") - samplesQueueCapacity = flag.Int("storage.queue.samplesCapacity", 4096, "The size of the unwritten samples queue.") - diskAppendQueueCapacity = flag.Int("storage.queue.diskAppendCapacity", 1000000, "The size of the queue for items that are pending writing to disk.") - memoryAppendQueueCapacity = flag.Int("storage.queue.memoryAppendCapacity", 10000, "The size of the queue for items that are pending writing to memory.") + samplesQueueCapacity = flag.Int("storage.incoming-samples-queue-capacity", 4096, "The capacity of the queue of samples to be stored.") - compactInterval = flag.Duration("compact.interval", 3*time.Hour, "The amount of time between compactions.") - compactGroupSize = flag.Int("compact.groupSize", 500, "The minimum group size for compacted samples.") - compactAgeInclusiveness = flag.Duration("compact.ageInclusiveness", 5*time.Minute, "The age beyond which samples should be compacted.") + numMemoryChunks = flag.Int("storage.local.memory-chunks", 1024*1024, "How many chunks to keep in memory. While the size of a chunk is 1kiB, the total memory usage will be significantly higher than this value * 1kiB. Furthermore, for various reasons, more chunks might have to be kept in memory temporarily.") - deleteInterval = flag.Duration("delete.interval", 11*time.Hour, "The amount of time between deletion of old values.") + storageRetentionPeriod = flag.Duration("storage.local.retention", 15*24*time.Hour, "How long to retain samples in the local storage.") - deleteAge = flag.Duration("delete.ageMaximum", 15*24*time.Hour, "The relative maximum age for values before they are deleted.") + checkpointInterval = flag.Duration("storage.local.checkpoint-interval", 5*time.Minute, "The period at which the in-memory index of time series is checkpointed.") - arenaFlushInterval = flag.Duration("arena.flushInterval", 15*time.Minute, "The period at which the in-memory arena is flushed to disk.") - arenaTTL = flag.Duration("arena.ttl", 10*time.Minute, "The relative age of values to purge to disk from memory.") + storageDirty = flag.Bool("storage.local.dirty", false, "If set, the local storage layer will perform crash recovery even if the last shutdown appears to be clean.") - notificationQueueCapacity = flag.Int("alertmanager.notificationQueueCapacity", 100, "The size of the queue for pending alert manager notifications.") + printVersion = flag.Bool("version", false, "Print version information.") +) - printVersion = flag.Bool("version", false, "print version information") - - shutdownTimeout = flag.Duration("shutdownGracePeriod", 0*time.Second, "The amount of time Prometheus gives background services to finish running when shutdown is requested.") +// Instrumentation. +var ( + samplesQueueCapDesc = registry.NewDesc( + "prometheus_samples_queue_capacity", + "Capacity of the queue for unwritten samples.", + nil, nil, + ) + samplesQueueLenDesc = registry.NewDesc( + "prometheus_samples_queue_length", + "Current number of items in the queue for unwritten samples. Each item comprises all samples exposed by one target as one metric family (i.e. metrics of the same name).", + nil, nil, + ) ) type prometheus struct { - compactionTimer *time.Ticker - deletionTimer *time.Ticker - - curationSema chan struct{} - stopBackgroundOperations chan struct{} - unwrittenSamples chan *extraction.Result - ruleManager manager.RuleManager - targetManager retrieval.TargetManager - notifications chan notification.NotificationReqs - storage *tiered.TieredStorage - remoteTSDBQueue *remote.TSDBQueueManager + ruleManager manager.RuleManager + targetManager retrieval.TargetManager + notificationHandler *notification.NotificationHandler + storage local.Storage + remoteTSDBQueue *remote.TSDBQueueManager - curationState tiered.CurationStateUpdater + webService *web.WebService closeOnce sync.Once } -func (p *prometheus) interruptHandler() { - notifier := make(chan os.Signal) - signal.Notify(notifier, os.Interrupt, syscall.SIGTERM) - - <-notifier - - glog.Warning("Received SIGINT/SIGTERM; Exiting gracefully...") - - p.Close() - - os.Exit(0) -} - -func (p *prometheus) compact(olderThan time.Duration, groupSize int) error { - select { - case s, ok := <-p.curationSema: - if !ok { - glog.Warning("Prometheus is shutting down; no more curation runs are allowed.") - return nil - } - - defer func() { - p.curationSema <- s - }() - - default: - glog.Warningf("Deferred compaction for %s and %s due to existing operation.", olderThan, groupSize) - - return nil - } - - processor := tiered.NewCompactionProcessor(&tiered.CompactionProcessorOptions{ - MaximumMutationPoolBatch: groupSize * 3, - MinimumGroupSize: groupSize, - }) - defer processor.Close() - - curator := tiered.NewCurator(&tiered.CuratorOptions{ - Stop: p.stopBackgroundOperations, - - ViewQueue: p.storage.ViewQueue, - }) - defer curator.Close() - - return curator.Run(olderThan, clientmodel.Now(), processor, p.storage.DiskStorage.CurationRemarks, p.storage.DiskStorage.MetricSamples, p.storage.DiskStorage.MetricHighWatermarks, p.curationState) -} - -func (p *prometheus) delete(olderThan time.Duration, batchSize int) error { - select { - case s, ok := <-p.curationSema: - if !ok { - glog.Warning("Prometheus is shutting down; no more curation runs are allowed.") - return nil - } - - defer func() { - p.curationSema <- s - }() - - default: - glog.Warningf("Deferred deletion for %s due to existing operation.", olderThan) - - return nil - } - - processor := tiered.NewDeletionProcessor(&tiered.DeletionProcessorOptions{ - MaximumMutationPoolBatch: batchSize, - }) - defer processor.Close() - - curator := tiered.NewCurator(&tiered.CuratorOptions{ - Stop: p.stopBackgroundOperations, - - ViewQueue: p.storage.ViewQueue, - }) - defer curator.Close() - - return curator.Run(olderThan, clientmodel.Now(), processor, p.storage.DiskStorage.CurationRemarks, p.storage.DiskStorage.MetricSamples, p.storage.DiskStorage.MetricHighWatermarks, p.curationState) -} - -func (p *prometheus) Close() { - p.closeOnce.Do(p.close) -} - -func (p *prometheus) close() { - // The "Done" remarks are a misnomer for some subsystems due to lack of - // blocking and synchronization. - glog.Info("Shutdown has been requested; subsytems are closing:") - p.targetManager.Stop() - glog.Info("Remote Target Manager: Done") - p.ruleManager.Stop() - glog.Info("Rule Executor: Done") - - // Stop any currently active curation (deletion or compaction). - close(p.stopBackgroundOperations) - glog.Info("Current Curation Workers: Requested") - - // Disallow further curation work. - close(p.curationSema) - - // Stop curation timers. - if p.compactionTimer != nil { - p.compactionTimer.Stop() - } - if p.deletionTimer != nil { - p.deletionTimer.Stop() - } - glog.Info("Future Curation Workers: Done") - - glog.Infof("Waiting %s for background systems to exit and flush before finalizing (DO NOT INTERRUPT THE PROCESS) ...", *shutdownTimeout) - - // Wart: We should have a concrete form of synchronization for this, not a - // hokey sleep statement. - time.Sleep(*shutdownTimeout) - - close(p.unwrittenSamples) - - p.storage.Close() - glog.Info("Local Storage: Done") - - if p.remoteTSDBQueue != nil { - p.remoteTSDBQueue.Close() - glog.Info("Remote Storage: Done") - } - - close(p.notifications) - glog.Info("Sundry Queues: Done") - glog.Info("See you next time!") -} - -func main() { - // TODO(all): Future additions to main should be, where applicable, glumped - // into the prometheus struct above---at least where the scoping of the entire - // server is concerned. - flag.Parse() - - versionInfoTmpl.Execute(os.Stdout, BuildInfo) - - if *printVersion { - os.Exit(0) - } - +// NewPrometheus creates a new prometheus object based on flag values. +// Call Serve() to start serving and Close() for clean shutdown. +func NewPrometheus() *prometheus { conf, err := config.LoadFromFile(*configFile) if err != nil { glog.Fatalf("Error loading configuration from %s: %v", *configFile, err) } - ts, err := tiered.NewTieredStorage(uint(*diskAppendQueueCapacity), 100, *arenaFlushInterval, *arenaTTL, *metricsStoragePath) - if err != nil { - glog.Fatal("Error opening storage: ", err) + unwrittenSamples := make(chan *extraction.Result, *samplesQueueCapacity) + + ingester := &retrieval.MergeLabelsIngester{ + Labels: conf.GlobalLabels(), + CollisionPrefix: clientmodel.ExporterLabelPrefix, + Ingester: retrieval.ChannelIngester(unwrittenSamples), + } + targetManager := retrieval.NewTargetManager(ingester) + targetManager.AddTargetsFromConfig(conf) + + notificationHandler := notification.NewNotificationHandler(*alertmanagerURL, *notificationQueueCapacity) + + o := &local.MemorySeriesStorageOptions{ + MemoryChunks: *numMemoryChunks, + PersistenceStoragePath: *metricsStoragePath, + PersistenceRetentionPeriod: *storageRetentionPeriod, + CheckpointInterval: *checkpointInterval, + Dirty: *storageDirty, + } + memStorage, err := local.NewMemorySeriesStorage(o) + if err != nil { + glog.Fatal("Error opening memory series storage: ", err) + } + + ruleManager := manager.NewRuleManager(&manager.RuleManagerOptions{ + Results: unwrittenSamples, + NotificationHandler: notificationHandler, + EvaluationInterval: conf.EvaluationInterval(), + Storage: memStorage, + PrometheusUrl: web.MustBuildServerUrl(), + }) + if err := ruleManager.AddRulesFromConfig(conf); err != nil { + glog.Fatal("Error loading rule files: ", err) } - registry.MustRegister(ts) var remoteTSDBQueue *remote.TSDBQueueManager if *remoteTSDBUrl == "" { @@ -251,50 +143,12 @@ func main() { } else { openTSDB := opentsdb.NewClient(*remoteTSDBUrl, *remoteTSDBTimeout) remoteTSDBQueue = remote.NewTSDBQueueManager(openTSDB, 512) - registry.MustRegister(remoteTSDBQueue) - go remoteTSDBQueue.Run() } - unwrittenSamples := make(chan *extraction.Result, *samplesQueueCapacity) - ingester := &retrieval.MergeLabelsIngester{ - Labels: conf.GlobalLabels(), - CollisionPrefix: clientmodel.ExporterLabelPrefix, - - Ingester: retrieval.ChannelIngester(unwrittenSamples), - } - - compactionTimer := time.NewTicker(*compactInterval) - deletionTimer := time.NewTicker(*deleteInterval) - - // Queue depth will need to be exposed - targetManager := retrieval.NewTargetManager(ingester) - targetManager.AddTargetsFromConfig(conf) - - notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity) - - // Queue depth will need to be exposed - ruleManager := manager.NewRuleManager(&manager.RuleManagerOptions{ - Results: unwrittenSamples, - Notifications: notifications, - EvaluationInterval: conf.EvaluationInterval(), - Storage: ts, - PrometheusUrl: web.MustBuildServerUrl(), - }) - if err := ruleManager.AddRulesFromConfig(conf); err != nil { - glog.Fatal("Error loading rule files: ", err) - } - go ruleManager.Run() - - notificationHandler := notification.NewNotificationHandler(*alertmanagerUrl, notifications) - registry.MustRegister(notificationHandler) - go notificationHandler.Run() - flags := map[string]string{} - flag.VisitAll(func(f *flag.Flag) { flags[f.Name] = f.Value.String() }) - prometheusStatus := &web.PrometheusStatusHandler{ BuildInfo: BuildInfo, Config: conf.String(), @@ -309,96 +163,144 @@ func main() { } consolesHandler := &web.ConsolesHandler{ - Storage: ts, - } - - databasesHandler := &web.DatabasesHandler{ - Provider: ts.DiskStorage, - RefreshInterval: 5 * time.Minute, + Storage: memStorage, } metricsService := &api.MetricsService{ Config: &conf, TargetManager: targetManager, - Storage: ts, + Storage: memStorage, } - prometheus := &prometheus{ - compactionTimer: compactionTimer, - - deletionTimer: deletionTimer, - - curationState: prometheusStatus, - curationSema: make(chan struct{}, 1), - - unwrittenSamples: unwrittenSamples, - - stopBackgroundOperations: make(chan struct{}), - - ruleManager: ruleManager, - targetManager: targetManager, - notifications: notifications, - storage: ts, - remoteTSDBQueue: remoteTSDBQueue, - } - defer prometheus.Close() - webService := &web.WebService{ - StatusHandler: prometheusStatus, - MetricsHandler: metricsService, - DatabasesHandler: databasesHandler, - ConsolesHandler: consolesHandler, - AlertsHandler: alertsHandler, - - QuitDelegate: prometheus.Close, + StatusHandler: prometheusStatus, + MetricsHandler: metricsService, + ConsolesHandler: consolesHandler, + AlertsHandler: alertsHandler, } - prometheus.curationSema <- struct{}{} + p := &prometheus{ + unwrittenSamples: unwrittenSamples, - storageStarted := make(chan bool) - go ts.Serve(storageStarted) - <-storageStarted + ruleManager: ruleManager, + targetManager: targetManager, + notificationHandler: notificationHandler, + storage: memStorage, + remoteTSDBQueue: remoteTSDBQueue, - go prometheus.interruptHandler() + webService: webService, + } + webService.QuitDelegate = p.Close + return p +} + +// Serve starts the Prometheus server. It returns after the server has been shut +// down. The method installs an interrupt handler, allowing to trigger a +// shutdown by sending SIGTERM to the process. +func (p *prometheus) Serve() { + if p.remoteTSDBQueue != nil { + go p.remoteTSDBQueue.Run() + } + go p.ruleManager.Run() + go p.notificationHandler.Run() + go p.interruptHandler() + + p.storage.Start() go func() { - for _ = range prometheus.compactionTimer.C { - glog.Info("Starting compaction...") - err := prometheus.compact(*compactAgeInclusiveness, *compactGroupSize) - - if err != nil { - glog.Error("could not compact: ", err) - } - glog.Info("Done") - } - }() - - go func() { - for _ = range prometheus.deletionTimer.C { - glog.Info("Starting deletion of stale values...") - err := prometheus.delete(*deleteAge, deletionBatchSize) - - if err != nil { - glog.Error("could not delete: ", err) - } - glog.Info("Done") - } - }() - - go func() { - err := webService.ServeForever() + err := p.webService.ServeForever() if err != nil { glog.Fatal(err) } }() - // TODO(all): Migrate this into prometheus.serve(). - for block := range unwrittenSamples { + for block := range p.unwrittenSamples { if block.Err == nil && len(block.Samples) > 0 { - ts.AppendSamples(block.Samples) - if remoteTSDBQueue != nil { - remoteTSDBQueue.Queue(block.Samples) + p.storage.AppendSamples(block.Samples) + if p.remoteTSDBQueue != nil { + p.remoteTSDBQueue.Queue(block.Samples) } } } + + // The following shut-down operations have to happen after + // unwrittenSamples is drained. So do not move them into close(). + if err := p.storage.Stop(); err != nil { + glog.Error("Error stopping local storage: ", err) + } + + if p.remoteTSDBQueue != nil { + p.remoteTSDBQueue.Stop() + } + + p.notificationHandler.Stop() + glog.Info("See you next time!") +} + +// Close cleanly shuts down the Prometheus server. +func (p *prometheus) Close() { + p.closeOnce.Do(p.close) +} + +func (p *prometheus) interruptHandler() { + notifier := make(chan os.Signal) + signal.Notify(notifier, os.Interrupt, syscall.SIGTERM) + <-notifier + + glog.Warning("Received SIGTERM, exiting gracefully...") + p.Close() +} + +func (p *prometheus) close() { + glog.Info("Shutdown has been requested; subsytems are closing:") + p.targetManager.Stop() + p.ruleManager.Stop() + + close(p.unwrittenSamples) + // Note: Before closing the remaining subsystems (storage, ...), we have + // to wait until p.unwrittenSamples is actually drained. Therefore, + // remaining shut-downs happen in Serve(). +} + +// Describe implements registry.Collector. +func (p *prometheus) Describe(ch chan<- *registry.Desc) { + ch <- samplesQueueCapDesc + ch <- samplesQueueLenDesc + p.notificationHandler.Describe(ch) + p.storage.Describe(ch) + if p.remoteTSDBQueue != nil { + p.remoteTSDBQueue.Describe(ch) + } +} + +// Collect implements registry.Collector. +func (p *prometheus) Collect(ch chan<- registry.Metric) { + ch <- registry.MustNewConstMetric( + samplesQueueCapDesc, + registry.GaugeValue, + float64(cap(p.unwrittenSamples)), + ) + ch <- registry.MustNewConstMetric( + samplesQueueLenDesc, + registry.GaugeValue, + float64(len(p.unwrittenSamples)), + ) + p.notificationHandler.Collect(ch) + p.storage.Collect(ch) + if p.remoteTSDBQueue != nil { + p.remoteTSDBQueue.Collect(ch) + } +} + +func main() { + flag.Parse() + versionInfoTmpl.Execute(os.Stdout, BuildInfo) + + if *printVersion { + os.Exit(0) + } + + p := NewPrometheus() + registry.MustRegister(p) + p.Serve() } diff --git a/model/Makefile b/model/Makefile deleted file mode 100644 index 70893dbac5..0000000000 --- a/model/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright 2013 Prometheus Team -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -all: generated/data.pb.go generated/descriptor.blob - -SUFFIXES: - -include ../Makefile.INCLUDE - -# In order to build the generated targets in this directory, run the -# following: -# -# make -C .build goprotobuf-protoc-gen-go-stamp - -generated/data.pb.go: data.proto - $(MAKE) -C ../.build goprotobuf-protoc-gen-go-stamp - $(PROTOC) --proto_path=$(PREFIX)/include:. --include_imports --go_out=generated/ --descriptor_set_out=generated/descriptor.blob data.proto - -generated/descriptor.blob: data.proto - $(MAKE) -C ../.build goprotobuf-protoc-gen-go-stamp - $(PROTOC) --proto_path=$(PREFIX)/include:. --include_imports --go_out=generated/ --descriptor_set_out=generated/descriptor.blob data.proto diff --git a/model/data.proto b/model/data.proto deleted file mode 100644 index 4e43bd977d..0000000000 --- a/model/data.proto +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package io.prometheus; - -import "google/protobuf/descriptor.proto"; - -message LabelPair { - optional string name = 1; - optional string value = 2; -} - -message LabelName { - optional string name = 1; -} - -message LabelValueCollection { - repeated string member = 1; -} - -message Metric { - repeated LabelPair label_pair = 1; -} - -message Fingerprint { - optional string signature = 1; -} - -message FingerprintCollection { - repeated Fingerprint member = 1; -} - -message LabelSet { - repeated LabelPair member = 1; -} - -// The default LevelDB comparator sorts not only lexicographically, but also by -// key length (which takes precedence). Thus, no variable-length fields may be -// introduced into the key definition below. -message SampleKey { - optional Fingerprint fingerprint = 1; - optional bytes timestamp = 2; - optional sfixed64 last_timestamp = 3; - optional fixed32 sample_count = 4; -} - -message MembershipIndexValue { -} - -message MetricHighWatermark { - optional int64 timestamp = 1; -} - -// CompactionProcessorDefinition models a curation process across the sample -// corpus that ensures that sparse samples. -message CompactionProcessorDefinition { - // minimum_group_size identifies how minimally samples should be grouped - // together to write a new samples chunk. - optional uint32 minimum_group_size = 1; -} - -// CurationKey models the state of curation for a given metric fingerprint and -// its associated samples. The time series database only knows about compaction -// and resampling behaviors that are explicitly defined to it in its runtime -// configuration, meaning it never scans on-disk tables for CurationKey -// policies; rather, it looks up via the CurationKey tuple to find out what the -// effectuation state for a given metric fingerprint is. -// -// For instance, how far along as a rule for (Fingerprint A, Samples Older Than -// B, and Curation Processor) has been effectuated on-disk. -message CurationKey { - // fingerprint identifies the fingerprint for the given policy. - optional Fingerprint fingerprint = 1; - - // processor_message_type_name identifies the underlying message type that - // was used to encode processor_message_raw. - optional string processor_message_type_name = 2; - - // processor_message_raw identifies the serialized ProcessorSignature for this - // operation. - optional bytes processor_message_raw = 3; - - // ignore_younger_than represents in seconds relative to when the curation - // cycle start when the curator should stop operating. For instance, if - // the curation cycle starts at time T and the curation remark dictates that - // the curation should starts processing samples at time S, the curator should - // work from S until ignore_younger_than seconds before T: - // - // PAST NOW FUTURE - // - // S--------------->|----------T - // |---IYT----| - // - // [Curation Resumption Time (S), T - IYT) - optional int64 ignore_younger_than = 4; - - // This could be populated by decoding the generated descriptor file into a - // FileDescriptorSet message and extracting the type definition for the given - // message schema that describes processor_message_type_name. - // - // optional google.protobuf.DescriptorProto processor_message_type_descriptor_raw = 5; -} - -// CurationValue models the progress for a given CurationKey. -message CurationValue { - // last_completion_timestamp represents the seconds since the epoch UTC at - // which the curator last completed its duty cycle for a given metric - // fingerprint. - optional int64 last_completion_timestamp = 1; -} - -// DeletionProcessorDefinition models a curation process across the sample -// corpus that deletes old values. -message DeletionProcessorDefinition { -} diff --git a/model/generated/data.pb.go b/model/generated/data.pb.go deleted file mode 100644 index a887d924cc..0000000000 --- a/model/generated/data.pb.go +++ /dev/null @@ -1,344 +0,0 @@ -// Code generated by protoc-gen-go. -// source: data.proto -// DO NOT EDIT! - -/* -Package io_prometheus is a generated protocol buffer package. - -It is generated from these files: - data.proto - -It has these top-level messages: - LabelPair - LabelName - LabelValueCollection - Metric - Fingerprint - FingerprintCollection - LabelSet - SampleKey - MembershipIndexValue - MetricHighWatermark - CompactionProcessorDefinition - CurationKey - CurationValue - DeletionProcessorDefinition -*/ -package io_prometheus - -import proto "code.google.com/p/goprotobuf/proto" -import json "encoding/json" -import math "math" - -// discarding unused import google_protobuf "google/protobuf/descriptor.pb" - -// Reference proto, json, and math imports to suppress error if they are not otherwise used. -var _ = proto.Marshal -var _ = &json.SyntaxError{} -var _ = math.Inf - -type LabelPair struct { - Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` - Value *string `protobuf:"bytes,2,opt,name=value" json:"value,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *LabelPair) Reset() { *m = LabelPair{} } -func (m *LabelPair) String() string { return proto.CompactTextString(m) } -func (*LabelPair) ProtoMessage() {} - -func (m *LabelPair) GetName() string { - if m != nil && m.Name != nil { - return *m.Name - } - return "" -} - -func (m *LabelPair) GetValue() string { - if m != nil && m.Value != nil { - return *m.Value - } - return "" -} - -type LabelName struct { - Name *string `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *LabelName) Reset() { *m = LabelName{} } -func (m *LabelName) String() string { return proto.CompactTextString(m) } -func (*LabelName) ProtoMessage() {} - -func (m *LabelName) GetName() string { - if m != nil && m.Name != nil { - return *m.Name - } - return "" -} - -type LabelValueCollection struct { - Member []string `protobuf:"bytes,1,rep,name=member" json:"member,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *LabelValueCollection) Reset() { *m = LabelValueCollection{} } -func (m *LabelValueCollection) String() string { return proto.CompactTextString(m) } -func (*LabelValueCollection) ProtoMessage() {} - -func (m *LabelValueCollection) GetMember() []string { - if m != nil { - return m.Member - } - return nil -} - -type Metric struct { - LabelPair []*LabelPair `protobuf:"bytes,1,rep,name=label_pair" json:"label_pair,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *Metric) Reset() { *m = Metric{} } -func (m *Metric) String() string { return proto.CompactTextString(m) } -func (*Metric) ProtoMessage() {} - -func (m *Metric) GetLabelPair() []*LabelPair { - if m != nil { - return m.LabelPair - } - return nil -} - -type Fingerprint struct { - Signature *string `protobuf:"bytes,1,opt,name=signature" json:"signature,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *Fingerprint) Reset() { *m = Fingerprint{} } -func (m *Fingerprint) String() string { return proto.CompactTextString(m) } -func (*Fingerprint) ProtoMessage() {} - -func (m *Fingerprint) GetSignature() string { - if m != nil && m.Signature != nil { - return *m.Signature - } - return "" -} - -type FingerprintCollection struct { - Member []*Fingerprint `protobuf:"bytes,1,rep,name=member" json:"member,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *FingerprintCollection) Reset() { *m = FingerprintCollection{} } -func (m *FingerprintCollection) String() string { return proto.CompactTextString(m) } -func (*FingerprintCollection) ProtoMessage() {} - -func (m *FingerprintCollection) GetMember() []*Fingerprint { - if m != nil { - return m.Member - } - return nil -} - -type LabelSet struct { - Member []*LabelPair `protobuf:"bytes,1,rep,name=member" json:"member,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *LabelSet) Reset() { *m = LabelSet{} } -func (m *LabelSet) String() string { return proto.CompactTextString(m) } -func (*LabelSet) ProtoMessage() {} - -func (m *LabelSet) GetMember() []*LabelPair { - if m != nil { - return m.Member - } - return nil -} - -// The default LevelDB comparator sorts not only lexicographically, but also by -// key length (which takes precedence). Thus, no variable-length fields may be -// introduced into the key definition below. -type SampleKey struct { - Fingerprint *Fingerprint `protobuf:"bytes,1,opt,name=fingerprint" json:"fingerprint,omitempty"` - Timestamp []byte `protobuf:"bytes,2,opt,name=timestamp" json:"timestamp,omitempty"` - LastTimestamp *int64 `protobuf:"fixed64,3,opt,name=last_timestamp" json:"last_timestamp,omitempty"` - SampleCount *uint32 `protobuf:"fixed32,4,opt,name=sample_count" json:"sample_count,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *SampleKey) Reset() { *m = SampleKey{} } -func (m *SampleKey) String() string { return proto.CompactTextString(m) } -func (*SampleKey) ProtoMessage() {} - -func (m *SampleKey) GetFingerprint() *Fingerprint { - if m != nil { - return m.Fingerprint - } - return nil -} - -func (m *SampleKey) GetTimestamp() []byte { - if m != nil { - return m.Timestamp - } - return nil -} - -func (m *SampleKey) GetLastTimestamp() int64 { - if m != nil && m.LastTimestamp != nil { - return *m.LastTimestamp - } - return 0 -} - -func (m *SampleKey) GetSampleCount() uint32 { - if m != nil && m.SampleCount != nil { - return *m.SampleCount - } - return 0 -} - -type MembershipIndexValue struct { - XXX_unrecognized []byte `json:"-"` -} - -func (m *MembershipIndexValue) Reset() { *m = MembershipIndexValue{} } -func (m *MembershipIndexValue) String() string { return proto.CompactTextString(m) } -func (*MembershipIndexValue) ProtoMessage() {} - -type MetricHighWatermark struct { - Timestamp *int64 `protobuf:"varint,1,opt,name=timestamp" json:"timestamp,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *MetricHighWatermark) Reset() { *m = MetricHighWatermark{} } -func (m *MetricHighWatermark) String() string { return proto.CompactTextString(m) } -func (*MetricHighWatermark) ProtoMessage() {} - -func (m *MetricHighWatermark) GetTimestamp() int64 { - if m != nil && m.Timestamp != nil { - return *m.Timestamp - } - return 0 -} - -// CompactionProcessorDefinition models a curation process across the sample -// corpus that ensures that sparse samples. -type CompactionProcessorDefinition struct { - // minimum_group_size identifies how minimally samples should be grouped - // together to write a new samples chunk. - MinimumGroupSize *uint32 `protobuf:"varint,1,opt,name=minimum_group_size" json:"minimum_group_size,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *CompactionProcessorDefinition) Reset() { *m = CompactionProcessorDefinition{} } -func (m *CompactionProcessorDefinition) String() string { return proto.CompactTextString(m) } -func (*CompactionProcessorDefinition) ProtoMessage() {} - -func (m *CompactionProcessorDefinition) GetMinimumGroupSize() uint32 { - if m != nil && m.MinimumGroupSize != nil { - return *m.MinimumGroupSize - } - return 0 -} - -// CurationKey models the state of curation for a given metric fingerprint and -// its associated samples. The time series database only knows about compaction -// and resampling behaviors that are explicitly defined to it in its runtime -// configuration, meaning it never scans on-disk tables for CurationKey -// policies; rather, it looks up via the CurationKey tuple to find out what the -// effectuation state for a given metric fingerprint is. -// -// For instance, how far along as a rule for (Fingerprint A, Samples Older Than -// B, and Curation Processor) has been effectuated on-disk. -type CurationKey struct { - // fingerprint identifies the fingerprint for the given policy. - Fingerprint *Fingerprint `protobuf:"bytes,1,opt,name=fingerprint" json:"fingerprint,omitempty"` - // processor_message_type_name identifies the underlying message type that - // was used to encode processor_message_raw. - ProcessorMessageTypeName *string `protobuf:"bytes,2,opt,name=processor_message_type_name" json:"processor_message_type_name,omitempty"` - // processor_message_raw identifies the serialized ProcessorSignature for this - // operation. - ProcessorMessageRaw []byte `protobuf:"bytes,3,opt,name=processor_message_raw" json:"processor_message_raw,omitempty"` - // ignore_younger_than represents in seconds relative to when the curation - // cycle start when the curator should stop operating. For instance, if - // the curation cycle starts at time T and the curation remark dictates that - // the curation should starts processing samples at time S, the curator should - // work from S until ignore_younger_than seconds before T: - // - // PAST NOW FUTURE - // - // S--------------->|----------T - // |---IYT----| - // - // [Curation Resumption Time (S), T - IYT) - IgnoreYoungerThan *int64 `protobuf:"varint,4,opt,name=ignore_younger_than" json:"ignore_younger_than,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *CurationKey) Reset() { *m = CurationKey{} } -func (m *CurationKey) String() string { return proto.CompactTextString(m) } -func (*CurationKey) ProtoMessage() {} - -func (m *CurationKey) GetFingerprint() *Fingerprint { - if m != nil { - return m.Fingerprint - } - return nil -} - -func (m *CurationKey) GetProcessorMessageTypeName() string { - if m != nil && m.ProcessorMessageTypeName != nil { - return *m.ProcessorMessageTypeName - } - return "" -} - -func (m *CurationKey) GetProcessorMessageRaw() []byte { - if m != nil { - return m.ProcessorMessageRaw - } - return nil -} - -func (m *CurationKey) GetIgnoreYoungerThan() int64 { - if m != nil && m.IgnoreYoungerThan != nil { - return *m.IgnoreYoungerThan - } - return 0 -} - -// CurationValue models the progress for a given CurationKey. -type CurationValue struct { - // last_completion_timestamp represents the seconds since the epoch UTC at - // which the curator last completed its duty cycle for a given metric - // fingerprint. - LastCompletionTimestamp *int64 `protobuf:"varint,1,opt,name=last_completion_timestamp" json:"last_completion_timestamp,omitempty"` - XXX_unrecognized []byte `json:"-"` -} - -func (m *CurationValue) Reset() { *m = CurationValue{} } -func (m *CurationValue) String() string { return proto.CompactTextString(m) } -func (*CurationValue) ProtoMessage() {} - -func (m *CurationValue) GetLastCompletionTimestamp() int64 { - if m != nil && m.LastCompletionTimestamp != nil { - return *m.LastCompletionTimestamp - } - return 0 -} - -// DeletionProcessorDefinition models a curation process across the sample -// corpus that deletes old values. -type DeletionProcessorDefinition struct { - XXX_unrecognized []byte `json:"-"` -} - -func (m *DeletionProcessorDefinition) Reset() { *m = DeletionProcessorDefinition{} } -func (m *DeletionProcessorDefinition) String() string { return proto.CompactTextString(m) } -func (*DeletionProcessorDefinition) ProtoMessage() {} - -func init() { -} diff --git a/model/generated/descriptor.blob b/model/generated/descriptor.blob deleted file mode 100644 index 9c88a52fdb..0000000000 Binary files a/model/generated/descriptor.blob and /dev/null differ diff --git a/notification/notification.go b/notification/notification.go index 75dbe8abbf..bf700ffe10 100644 --- a/notification/notification.go +++ b/notification/notification.go @@ -23,9 +23,9 @@ import ( "time" "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" clientmodel "github.com/prometheus/client_golang/model" - "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/utility" ) @@ -47,7 +47,7 @@ const ( ) var ( - deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.") + deadline = flag.Duration("alertmanager.http-deadline", 10*time.Second, "Alert manager HTTP API timeout.") ) // A request for sending a notification to the alert manager for a single alert @@ -81,21 +81,24 @@ type NotificationHandler struct { // The URL of the alert manager to send notifications to. alertmanagerUrl string // Buffer of notifications that have not yet been sent. - pendingNotifications <-chan NotificationReqs + pendingNotifications chan NotificationReqs // HTTP client with custom timeout settings. httpClient httpPoster notificationLatency *prometheus.SummaryVec notificationsQueueLength prometheus.Gauge notificationsQueueCapacity prometheus.Metric + + stopped chan struct{} } // Construct a new NotificationHandler. -func NewNotificationHandler(alertmanagerUrl string, notificationReqs <-chan NotificationReqs) *NotificationHandler { +func NewNotificationHandler(alertmanagerUrl string, notificationQueueCapacity int) *NotificationHandler { return &NotificationHandler{ alertmanagerUrl: alertmanagerUrl, - pendingNotifications: notificationReqs, - httpClient: utility.NewDeadlineClient(*deadline), + pendingNotifications: make(chan NotificationReqs, notificationQueueCapacity), + + httpClient: utility.NewDeadlineClient(*deadline), notificationLatency: prometheus.NewSummaryVec( prometheus.SummaryOpts{ @@ -119,8 +122,9 @@ func NewNotificationHandler(alertmanagerUrl string, notificationReqs <-chan Noti nil, nil, ), prometheus.GaugeValue, - float64(cap(notificationReqs)), + float64(notificationQueueCapacity), ), + stopped: make(chan struct{}), } } @@ -163,7 +167,7 @@ func (n *NotificationHandler) sendNotifications(reqs NotificationReqs) error { return nil } -// Continuously dispatch notifications. +// Run dispatches notifications continuously. func (n *NotificationHandler) Run() { for reqs := range n.pendingNotifications { if n.alertmanagerUrl == "" { @@ -185,6 +189,35 @@ func (n *NotificationHandler) Run() { float64(time.Since(begin) / time.Millisecond), ) } + close(n.stopped) +} + +// SubmitReqs queues the given notification requests for processing. +func (n *NotificationHandler) SubmitReqs(reqs NotificationReqs) { + n.pendingNotifications <- reqs +} + +// Stop shuts down the notification handler. +func (n *NotificationHandler) Stop() { + glog.Info("Stopping notification handler...") + close(n.pendingNotifications) + <-n.stopped + glog.Info("Notification handler stopped.") +} + +// Describe implements prometheus.Collector. +func (n *NotificationHandler) Describe(ch chan<- *prometheus.Desc) { + n.notificationLatency.Describe(ch) + ch <- n.notificationsQueueLength.Desc() + ch <- n.notificationsQueueCapacity.Desc() +} + +// Collect implements prometheus.Collector. +func (n *NotificationHandler) Collect(ch chan<- prometheus.Metric) { + n.notificationLatency.Collect(ch) + n.notificationsQueueLength.Set(float64(len(n.pendingNotifications))) + ch <- n.notificationsQueueLength + ch <- n.notificationsQueueCapacity } // Describe implements prometheus.Collector. diff --git a/notification/notification_test.go b/notification/notification_test.go index 48424f2e5c..e01e4d3168 100644 --- a/notification/notification_test.go +++ b/notification/notification_test.go @@ -46,9 +46,8 @@ type testNotificationScenario struct { } func (s *testNotificationScenario) test(i int, t *testing.T) { - notifications := make(chan NotificationReqs) - defer close(notifications) - h := NewNotificationHandler("alertmanager_url", notifications) + h := NewNotificationHandler("alertmanager_url", 0) + defer h.Stop() receivedPost := make(chan bool, 1) poster := testHttpPoster{receivedPost: receivedPost} @@ -56,7 +55,7 @@ func (s *testNotificationScenario) test(i int, t *testing.T) { go h.Run() - notifications <- NotificationReqs{ + h.SubmitReqs(NotificationReqs{ { Summary: s.summary, Description: s.description, @@ -68,7 +67,7 @@ func (s *testNotificationScenario) test(i int, t *testing.T) { RuleString: "Test rule string", GeneratorURL: "prometheus_url", }, - } + }) <-receivedPost if poster.message != s.message { diff --git a/retrieval/target.go b/retrieval/target.go index 2d5d4e282b..11ec36d816 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -126,10 +126,12 @@ type Target interface { GlobalAddress() string // Return the target's base labels. BaseLabels() clientmodel.LabelSet - // Merge a new externally supplied target definition (e.g. with changed base - // labels) into an old target definition for the same endpoint. Preserve - // remaining information - like health state - from the old target. - Merge(newTarget Target) + // SetBaseLabelsFrom queues a replacement of the current base labels by + // the labels of the given target. The method returns immediately after + // queuing. The actual replacement of the base labels happens + // asynchronously (but most likely before the next scrape for the target + // begins). + SetBaseLabelsFrom(Target) // Scrape target at the specified interval. RunScraper(extraction.Ingester, time.Duration) // Stop scraping, synchronous. @@ -139,6 +141,9 @@ type Target interface { } // target is a Target that refers to a singular HTTP or HTTPS endpoint. +// +// TODO: The implementation is not yet goroutine safe, but for the web status, +// methods are called concurrently. type target struct { // The current health state of the target. state TargetState @@ -146,9 +151,10 @@ type target struct { lastError error // The last time a scrape was attempted. lastScrape time.Time - // Channel to signal RunScraper should stop, holds a channel - // to notify once stopped. - stopScraper chan bool + // Closing stopScraper signals that scraping should stop. + stopScraper chan struct{} + // Channel to queue base labels to be replaced. + newBaseLabels chan clientmodel.LabelSet address string // What is the deadline for the HTTP or HTTPS against this endpoint. @@ -162,11 +168,12 @@ type target struct { // Furnish a reasonably configured target for querying. func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.LabelSet) Target { target := &target{ - address: address, - Deadline: deadline, - baseLabels: baseLabels, - httpClient: utility.NewDeadlineClient(deadline), - stopScraper: make(chan bool), + address: address, + Deadline: deadline, + baseLabels: baseLabels, + httpClient: utility.NewDeadlineClient(deadline), + stopScraper: make(chan struct{}), + newBaseLabels: make(chan clientmodel.LabelSet, 1), } return target @@ -197,11 +204,25 @@ func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clie }) } +// RunScraper implements Target. func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration) { + defer func() { + // Need to drain t.newBaseLabels to not make senders block during shutdown. + for { + select { + case <-t.newBaseLabels: + // Do nothing. + default: + return + } + } + }() + jitterTimer := time.NewTimer(time.Duration(float64(interval) * rand.Float64())) select { case <-jitterTimer.C: case <-t.stopScraper: + jitterTimer.Stop() return } jitterTimer.Stop() @@ -211,20 +232,39 @@ func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration t.lastScrape = time.Now() t.scrape(ingester) + + // Explanation of the contraption below: + // + // In case t.newBaseLabels or t.stopScraper have something to receive, + // we want to read from those channels rather than starting a new scrape + // (which might take very long). That's why the outer select has no + // ticker.C. Should neither t.newBaseLabels nor t.stopScraper have + // anything to receive, we go into the inner select, where ticker.C is + // in the mix. for { select { - case <-ticker.C: - targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second)) - t.lastScrape = time.Now() - t.scrape(ingester) + case newBaseLabels := <-t.newBaseLabels: + t.baseLabels = newBaseLabels case <-t.stopScraper: return + default: + select { + case newBaseLabels := <-t.newBaseLabels: + t.baseLabels = newBaseLabels + case <-t.stopScraper: + return + case <-ticker.C: + targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second)) + t.lastScrape = time.Now() + t.scrape(ingester) + } } } } +// StopScraper implements Target. func (t *target) StopScraper() { - t.stopScraper <- true + close(t.stopScraper) } const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,application/json;schema="prometheus/telemetry";version=0.0.2;q=0.2,*/*;q=0.1` @@ -270,8 +310,8 @@ func (t *target) scrape(ingester extraction.Ingester) (err error) { return err } - // XXX: This is a wart; we need to handle this more gracefully down the - // road, especially once we have service discovery support. + // TODO: This is a wart; we need to handle this more gracefully down the + // road, especially once we have service discovery support. baseLabels := clientmodel.LabelSet{InstanceLabel: clientmodel.LabelValue(t.Address())} for baseLabel, baseValue := range t.baseLabels { baseLabels[baseLabel] = baseValue @@ -289,22 +329,27 @@ func (t *target) scrape(ingester extraction.Ingester) (err error) { return processor.ProcessSingle(resp.Body, i, processOptions) } +// LastError implements Target. func (t *target) LastError() error { return t.lastError } +// State implements Target. func (t *target) State() TargetState { return t.state } +// LastScrape implements Target. func (t *target) LastScrape() time.Time { return t.lastScrape } +// Address implements Target. func (t *target) Address() string { return t.address } +// GlobalAddress implements Target. func (t *target) GlobalAddress() string { address := t.address hostname, err := os.Hostname() @@ -318,18 +363,17 @@ func (t *target) GlobalAddress() string { return address } +// BaseLabels implements Target. func (t *target) BaseLabels() clientmodel.LabelSet { return t.baseLabels } -// Merge a new externally supplied target definition (e.g. with changed base -// labels) into an old target definition for the same endpoint. Preserve -// remaining information - like health state - from the old target. -func (t *target) Merge(newTarget Target) { +// SetBaseLabelsFrom implements Target. +func (t *target) SetBaseLabelsFrom(newTarget Target) { if t.Address() != newTarget.Address() { panic("targets don't refer to the same endpoint") } - t.baseLabels = newTarget.BaseLabels() + t.newBaseLabels <- newTarget.BaseLabels() } type targets []Target diff --git a/retrieval/target_test.go b/retrieval/target_test.go index 93238489ff..de43dc9635 100644 --- a/retrieval/target_test.go +++ b/retrieval/target_test.go @@ -149,7 +149,7 @@ func TestTargetRunScraperScrapes(t *testing.T) { state: UNKNOWN, address: "bad schema", httpClient: utility.NewDeadlineClient(0), - stopScraper: make(chan bool, 1), + stopScraper: make(chan struct{}), } go testTarget.RunScraper(nopIngester{}, time.Duration(time.Millisecond)) diff --git a/retrieval/targetmanager.go b/retrieval/targetmanager.go index eda6bef814..882b5364b1 100644 --- a/retrieval/targetmanager.go +++ b/retrieval/targetmanager.go @@ -14,6 +14,7 @@ package retrieval import ( + "sync" "github.com/golang/glog" "github.com/prometheus/client_golang/extraction" @@ -57,7 +58,7 @@ func (m *targetManager) TargetPoolForJob(job config.JobConfig) *TargetPool { glog.Infof("Pool for job %s does not exist; creating and starting...", job.GetName()) m.poolsByJob[job.GetName()] = targetPool - // BUG(all): Investigate whether this auto-goroutine creation is desired. + // TODO: Investigate whether this auto-goroutine creation is desired. go targetPool.Run() } @@ -105,13 +106,22 @@ func (m *targetManager) AddTargetsFromConfig(config config.Config) { } func (m *targetManager) Stop() { - glog.Info("Target manager exiting...") - for _, p := range m.poolsByJob { - p.Stop() + glog.Info("Stopping target manager...") + var wg sync.WaitGroup + for j, p := range m.poolsByJob { + wg.Add(1) + go func(j string, p *TargetPool) { + defer wg.Done() + glog.Infof("Stopping target pool %q...", j) + p.Stop() + glog.Infof("Target pool %q stopped.", j) + }(j, p) } + wg.Wait() + glog.Info("Target manager stopped.") } -// XXX: Not really thread-safe. Only used in /status page for now. +// TODO: Not goroutine-safe. Only used in /status page for now. func (m *targetManager) Pools() map[string]*TargetPool { return m.poolsByJob } diff --git a/retrieval/targetmanager_test.go b/retrieval/targetmanager_test.go index 885e6a69fe..f82cfb2a34 100644 --- a/retrieval/targetmanager_test.go +++ b/retrieval/targetmanager_test.go @@ -76,7 +76,7 @@ func (t fakeTarget) State() TargetState { return ALIVE } -func (t *fakeTarget) Merge(newTarget Target) {} +func (t *fakeTarget) SetBaseLabelsFrom(newTarget Target) {} func testTargetManager(t testing.TB) { targetManager := NewTargetManager(nopIngester{}) diff --git a/retrieval/targetpool.go b/retrieval/targetpool.go index 89cc4d0aed..83bf13a335 100644 --- a/retrieval/targetpool.go +++ b/retrieval/targetpool.go @@ -30,7 +30,7 @@ const ( type TargetPool struct { sync.RWMutex - done chan chan bool + done chan chan struct{} manager TargetManager targetsByAddress map[string]Target interval time.Duration @@ -48,7 +48,7 @@ func NewTargetPool(m TargetManager, p TargetProvider, ing extraction.Ingester, i targetsByAddress: make(map[string]Target), addTargetQueue: make(chan Target, targetAddQueueSize), targetProvider: p, - done: make(chan chan bool), + done: make(chan chan struct{}), } } @@ -71,15 +71,14 @@ func (p *TargetPool) Run() { p.addTarget(newTarget) case stopped := <-p.done: p.ReplaceTargets([]Target{}) - glog.Info("TargetPool exiting...") - stopped <- true + close(stopped) return } } } -func (p TargetPool) Stop() { - stopped := make(chan bool) +func (p *TargetPool) Stop() { + stopped := make(chan struct{}) p.done <- stopped <-stopped } @@ -108,20 +107,27 @@ func (p *TargetPool) ReplaceTargets(newTargets []Target) { newTargetAddresses.Add(newTarget.Address()) oldTarget, ok := p.targetsByAddress[newTarget.Address()] if ok { - oldTarget.Merge(newTarget) + oldTarget.SetBaseLabelsFrom(newTarget) } else { p.targetsByAddress[newTarget.Address()] = newTarget go newTarget.RunScraper(p.ingester, p.interval) } } // Stop any targets no longer present. + var wg sync.WaitGroup for k, oldTarget := range p.targetsByAddress { if !newTargetAddresses.Has(k) { - glog.V(1).Info("Stopping scraper for target ", k) - oldTarget.StopScraper() - delete(p.targetsByAddress, k) + wg.Add(1) + go func(k string, oldTarget Target) { + defer wg.Done() + glog.V(1).Infof("Stopping scraper for target %s...", k) + oldTarget.StopScraper() + delete(p.targetsByAddress, k) + glog.V(1).Infof("Scraper for target %s stopped.", k) + }(k, oldTarget) } } + wg.Wait() } func (p *TargetPool) Targets() []Target { diff --git a/retrieval/targetpool_test.go b/retrieval/targetpool_test.go index 1e8005f27b..636600add2 100644 --- a/retrieval/targetpool_test.go +++ b/retrieval/targetpool_test.go @@ -14,8 +14,11 @@ package retrieval import ( + "net/http" "testing" "time" + + clientmodel "github.com/prometheus/client_golang/model" ) func testTargetPool(t testing.TB) { @@ -46,12 +49,12 @@ func testTargetPool(t testing.TB) { name: "single element", inputs: []input{ { - address: "http://single.com", + address: "single1", }, }, outputs: []output{ { - address: "http://single.com", + address: "single1", }, }, }, @@ -59,18 +62,18 @@ func testTargetPool(t testing.TB) { name: "plural schedules", inputs: []input{ { - address: "http://plural.net", + address: "plural1", }, { - address: "http://plural.com", + address: "plural2", }, }, outputs: []output{ { - address: "http://plural.net", + address: "plural1", }, { - address: "http://plural.com", + address: "plural2", }, }, }, @@ -81,9 +84,10 @@ func testTargetPool(t testing.TB) { for _, input := range scenario.inputs { target := target{ - address: input.address, + address: input.address, + newBaseLabels: make(chan clientmodel.LabelSet, 1), + httpClient: &http.Client{}, } - pool.addTarget(&target) } @@ -91,11 +95,8 @@ func testTargetPool(t testing.TB) { t.Errorf("%s %d. expected TargetPool size to be %d but was %d", scenario.name, i, len(scenario.outputs), len(pool.targetsByAddress)) } else { for j, output := range scenario.outputs { - target := pool.Targets()[j] - - if target.Address() != output.address { + if target, ok := pool.targetsByAddress[output.address]; !ok { t.Errorf("%s %d.%d. expected Target address to be %s but was %s", scenario.name, i, j, output.address, target.Address()) - } } @@ -113,30 +114,34 @@ func TestTargetPool(t *testing.T) { func TestTargetPoolReplaceTargets(t *testing.T) { pool := NewTargetPool(nil, nil, nopIngester{}, time.Duration(1)) oldTarget1 := &target{ - address: "example1", - state: UNREACHABLE, - stopScraper: make(chan bool, 1), + address: "example1", + state: UNREACHABLE, + stopScraper: make(chan struct{}), + newBaseLabels: make(chan clientmodel.LabelSet, 1), + httpClient: &http.Client{}, } oldTarget2 := &target{ - address: "example2", - state: UNREACHABLE, - stopScraper: make(chan bool, 1), + address: "example2", + state: UNREACHABLE, + stopScraper: make(chan struct{}), + newBaseLabels: make(chan clientmodel.LabelSet, 1), + httpClient: &http.Client{}, } newTarget1 := &target{ - address: "example1", - state: ALIVE, - stopScraper: make(chan bool, 1), + address: "example1", + state: ALIVE, + stopScraper: make(chan struct{}), + newBaseLabels: make(chan clientmodel.LabelSet, 1), + httpClient: &http.Client{}, } newTarget2 := &target{ - address: "example3", - state: ALIVE, - stopScraper: make(chan bool, 1), + address: "example3", + state: ALIVE, + stopScraper: make(chan struct{}), + newBaseLabels: make(chan clientmodel.LabelSet, 1), + httpClient: &http.Client{}, } - oldTarget1.StopScraper() - oldTarget2.StopScraper() - newTarget2.StopScraper() - pool.addTarget(oldTarget1) pool.addTarget(oldTarget2) diff --git a/rules/Makefile b/rules/Makefile index 489f1e9ecd..e2de40f77b 100644 --- a/rules/Makefile +++ b/rules/Makefile @@ -14,10 +14,11 @@ all: parser.y.go lexer.l.go parser.y.go: parser.y - go tool yacc -o parser.y.go -v "" parser.y + $(GOCC) tool yacc -o parser.y.go -v "" parser.y lexer.l.go: parser.y.go lexer.l # This is golex from https://github.com/cznic/golex. + $(GO_GET) github.com/cznic/golex golex -o="lexer.l.go" lexer.l clean: diff --git a/rules/alerting.go b/rules/alerting.go index 1872c948b6..d8caee7d5e 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -23,7 +23,7 @@ import ( "github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/stats" - "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/utility" ) @@ -118,11 +118,11 @@ func (rule *AlertingRule) Name() string { return rule.name } -func (rule *AlertingRule) EvalRaw(timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence) (ast.Vector, error) { +func (rule *AlertingRule) EvalRaw(timestamp clientmodel.Timestamp, storage local.Storage) (ast.Vector, error) { return ast.EvalVectorInstant(rule.Vector, timestamp, storage, stats.NewTimerGroup()) } -func (rule *AlertingRule) Eval(timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence) (ast.Vector, error) { +func (rule *AlertingRule) Eval(timestamp clientmodel.Timestamp, storage local.Storage) (ast.Vector, error) { // Get the raw value of the rule expression. exprResult, err := rule.EvalRaw(timestamp, storage) if err != nil { diff --git a/rules/ast/ast.go b/rules/ast/ast.go index c03602db6e..cf720eaed2 100644 --- a/rules/ast/ast.go +++ b/rules/ast/ast.go @@ -15,20 +15,22 @@ package ast import ( "errors" + "flag" "fmt" "hash/fnv" "math" "sort" "time" - "github.com/golang/glog" - clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/prometheus/stats" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/storage/metric" ) +var stalenessDelta = flag.Duration("query.staleness-delta", 300*time.Second, "Staleness delta allowance during expression evaluations.") + // ---------------------------------------------------------------------------- // Raw data value types. @@ -81,6 +83,17 @@ const ( OR ) +// shouldDropMetric indicates whether the metric name should be dropped after +// applying this operator to a vector. +func (opType BinOpType) shouldDropMetric() bool { + switch opType { + case ADD, SUB, MUL, DIV, MOD: + return true + default: + return false + } +} + // AggrType is an enum for aggregation types. type AggrType int @@ -114,7 +127,7 @@ type Node interface { type ScalarNode interface { Node // Eval evaluates and returns the value of the scalar represented by this node. - Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue + Eval(timestamp clientmodel.Timestamp) clientmodel.SampleValue } // VectorNode is a Node for vector values. @@ -123,17 +136,17 @@ type VectorNode interface { // Eval evaluates the node recursively and returns the result // as a Vector (i.e. a slice of Samples all at the given // Timestamp). - Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector + Eval(timestamp clientmodel.Timestamp) Vector } // MatrixNode is a Node for matrix values. type MatrixNode interface { Node // Eval evaluates the node recursively and returns the result as a Matrix. - Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix + Eval(timestamp clientmodel.Timestamp) Matrix // Eval evaluates the node recursively and returns the result // as a Matrix that only contains the boundary values. - EvalBoundaries(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix + EvalBoundaries(timestamp clientmodel.Timestamp) Matrix } // StringNode is a Node for string values. @@ -141,7 +154,7 @@ type StringNode interface { Node // Eval evaluates and returns the value of the string // represented by this node. - Eval(timestamp clientmodel.Timestamp, view *viewAdapter) string + Eval(timestamp clientmodel.Timestamp) string } // ---------------------------------------------------------------------------- @@ -176,7 +189,11 @@ type ( // A VectorSelector represents a metric name plus labelset. VectorSelector struct { labelMatchers metric.LabelMatchers + // The series iterators are populated at query analysis time. + iterators map[clientmodel.Fingerprint]local.SeriesIterator + metrics map[clientmodel.Fingerprint]clientmodel.Metric // Fingerprints are populated from label matchers at query analysis time. + // TODO: do we still need these? fingerprints clientmodel.Fingerprints } @@ -213,8 +230,11 @@ type ( // timerange. MatrixSelector struct { labelMatchers metric.LabelMatchers - // Fingerprints are populated from label matchers at query - // analysis time. + // The series iterators are populated at query analysis time. + iterators map[clientmodel.Fingerprint]local.SeriesIterator + metrics map[clientmodel.Fingerprint]clientmodel.Metric + // Fingerprints are populated from label matchers at query analysis time. + // TODO: do we still need these? fingerprints clientmodel.Fingerprints interval time.Duration } @@ -308,22 +328,22 @@ func (node StringFunctionCall) Children() Nodes { return node.args } // Eval implements the ScalarNode interface and returns the selector // value. -func (node *ScalarLiteral) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue { +func (node *ScalarLiteral) Eval(timestamp clientmodel.Timestamp) clientmodel.SampleValue { return node.value } // Eval implements the ScalarNode interface and returns the result of // the expression. -func (node *ScalarArithExpr) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue { - lhs := node.lhs.Eval(timestamp, view) - rhs := node.rhs.Eval(timestamp, view) +func (node *ScalarArithExpr) Eval(timestamp clientmodel.Timestamp) clientmodel.SampleValue { + lhs := node.lhs.Eval(timestamp) + rhs := node.rhs.Eval(timestamp) return evalScalarBinop(node.opType, lhs, rhs) } // Eval implements the ScalarNode interface and returns the result of // the function call. -func (node *ScalarFunctionCall) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) clientmodel.SampleValue { - return node.function.callFn(timestamp, view, node.args).(clientmodel.SampleValue) +func (node *ScalarFunctionCall) Eval(timestamp clientmodel.Timestamp) clientmodel.SampleValue { + return node.function.callFn(timestamp, node.args).(clientmodel.SampleValue) } func (node *VectorAggregation) labelsToGroupingKey(labels clientmodel.Metric) uint64 { @@ -357,33 +377,34 @@ func labelsToKey(labels clientmodel.Metric) uint64 { } // EvalVectorInstant evaluates a VectorNode with an instant query. -func EvalVectorInstant(node VectorNode, timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence, queryStats *stats.TimerGroup) (vector Vector, err error) { - viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage, queryStats) +func EvalVectorInstant(node VectorNode, timestamp clientmodel.Timestamp, storage local.Storage, queryStats *stats.TimerGroup) (Vector, error) { + closer, err := prepareInstantQuery(node, timestamp, storage, queryStats) if err != nil { - return + return nil, err } - vector = node.Eval(timestamp, viewAdapter) - return + defer closer.Close() + return node.Eval(timestamp), nil } // EvalVectorRange evaluates a VectorNode with a range query. -func EvalVectorRange(node VectorNode, start clientmodel.Timestamp, end clientmodel.Timestamp, interval time.Duration, storage metric.PreloadingPersistence, queryStats *stats.TimerGroup) (Matrix, error) { +func EvalVectorRange(node VectorNode, start clientmodel.Timestamp, end clientmodel.Timestamp, interval time.Duration, storage local.Storage, queryStats *stats.TimerGroup) (Matrix, error) { // Explicitly initialize to an empty matrix since a nil Matrix encodes to // null in JSON. matrix := Matrix{} - viewTimer := queryStats.GetTimer(stats.TotalViewBuildingTime).Start() - viewAdapter, err := viewAdapterForRangeQuery(node, start, end, interval, storage, queryStats) - viewTimer.Stop() + prepareTimer := queryStats.GetTimer(stats.TotalQueryPreparationTime).Start() + closer, err := prepareRangeQuery(node, start, end, interval, storage, queryStats) + prepareTimer.Stop() if err != nil { return nil, err } + defer closer.Close() // TODO implement watchdog timer for long-running queries. evalTimer := queryStats.GetTimer(stats.InnerEvalTime).Start() sampleSets := map[uint64]*metric.SampleSet{} for t := start; t.Before(end); t = t.Add(interval) { - vector := node.Eval(t, viewAdapter) + vector := node.Eval(t) for _, sample := range vector { samplePair := metric.SamplePair{ Value: sample.Value, @@ -444,8 +465,8 @@ func (node *VectorAggregation) groupedAggregationsToVector(aggregations map[uint // Eval implements the VectorNode interface and returns the aggregated // Vector. -func (node *VectorAggregation) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector { - vector := node.vector.Eval(timestamp, view) +func (node *VectorAggregation) Eval(timestamp clientmodel.Timestamp) Vector { + vector := node.vector.Eval(timestamp) result := map[uint64]*groupedAggregation{} for _, sample := range vector { groupingKey := node.labelsToGroupingKey(sample.Metric) @@ -477,8 +498,8 @@ func (node *VectorAggregation) Eval(timestamp clientmodel.Timestamp, view *viewA m := clientmodel.Metric{} if node.keepExtraLabels { m = sample.Metric + delete(m, clientmodel.MetricNameLabel) } else { - m[clientmodel.MetricNameLabel] = sample.Metric[clientmodel.MetricNameLabel] for _, l := range node.groupBy { if v, ok := sample.Metric[l]; ok { m[l] = v @@ -498,19 +519,91 @@ func (node *VectorAggregation) Eval(timestamp clientmodel.Timestamp, view *viewA // Eval implements the VectorNode interface and returns the value of // the selector. -func (node *VectorSelector) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector { - values, err := view.GetValueAtTime(node.fingerprints, timestamp) - if err != nil { - glog.Error("Unable to get vector values: ", err) - return Vector{} +func (node *VectorSelector) Eval(timestamp clientmodel.Timestamp) Vector { + //// timer := v.stats.GetTimer(stats.GetValueAtTimeTime).Start() + samples := Vector{} + for fp, it := range node.iterators { + sampleCandidates := it.GetValueAtTime(timestamp) + samplePair := chooseClosestSample(sampleCandidates, timestamp) + if samplePair != nil { + samples = append(samples, &clientmodel.Sample{ + Metric: node.metrics[fp], // TODO: need copy here because downstream can modify! + Value: samplePair.Value, + Timestamp: timestamp, + }) + } + } + //// timer.Stop() + return samples +} + +// chooseClosestSample chooses the closest sample of a list of samples +// surrounding a given target time. If samples are found both before and after +// the target time, the sample value is interpolated between these. Otherwise, +// the single closest sample is returned verbatim. +func chooseClosestSample(samples metric.Values, timestamp clientmodel.Timestamp) *metric.SamplePair { + var closestBefore *metric.SamplePair + var closestAfter *metric.SamplePair + for _, candidate := range samples { + delta := candidate.Timestamp.Sub(timestamp) + // Samples before target time. + if delta < 0 { + // Ignore samples outside of staleness policy window. + if -delta > *stalenessDelta { + continue + } + // Ignore samples that are farther away than what we've seen before. + if closestBefore != nil && candidate.Timestamp.Before(closestBefore.Timestamp) { + continue + } + sample := candidate + closestBefore = &sample + } + + // Samples after target time. + if delta >= 0 { + // Ignore samples outside of staleness policy window. + if delta > *stalenessDelta { + continue + } + // Ignore samples that are farther away than samples we've seen before. + if closestAfter != nil && candidate.Timestamp.After(closestAfter.Timestamp) { + continue + } + sample := candidate + closestAfter = &sample + } + } + + switch { + case closestBefore != nil && closestAfter != nil: + return interpolateSamples(closestBefore, closestAfter, timestamp) + case closestBefore != nil: + return closestBefore + default: + return closestAfter + } +} + +// interpolateSamples interpolates a value at a target time between two +// provided sample pairs. +func interpolateSamples(first, second *metric.SamplePair, timestamp clientmodel.Timestamp) *metric.SamplePair { + dv := second.Value - first.Value + dt := second.Timestamp.Sub(first.Timestamp) + + dDt := dv / clientmodel.SampleValue(dt) + offset := clientmodel.SampleValue(timestamp.Sub(first.Timestamp)) + + return &metric.SamplePair{ + Value: first.Value + (offset * dDt), + Timestamp: timestamp, } - return values } // Eval implements the VectorNode interface and returns the result of // the function call. -func (node *VectorFunctionCall) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector { - return node.function.callFn(timestamp, view, node.args).(Vector) +func (node *VectorFunctionCall) Eval(timestamp clientmodel.Timestamp) Vector { + return node.function.callFn(timestamp, node.args).(Vector) } func evalScalarBinop(opType BinOpType, @@ -626,9 +719,6 @@ func evalVectorBinop(opType BinOpType, } func labelsEqual(labels1, labels2 clientmodel.Metric) bool { - if len(labels1) != len(labels2) { - return false - } for label, value := range labels1 { if labels2[label] != value && label != clientmodel.MetricNameLabel { return false @@ -639,39 +729,48 @@ func labelsEqual(labels1, labels2 clientmodel.Metric) bool { // Eval implements the VectorNode interface and returns the result of // the expression. -func (node *VectorArithExpr) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Vector { +func (node *VectorArithExpr) Eval(timestamp clientmodel.Timestamp) Vector { result := Vector{} if node.lhs.Type() == SCALAR && node.rhs.Type() == VECTOR { - lhs := node.lhs.(ScalarNode).Eval(timestamp, view) - rhs := node.rhs.(VectorNode).Eval(timestamp, view) + lhs := node.lhs.(ScalarNode).Eval(timestamp) + rhs := node.rhs.(VectorNode).Eval(timestamp) for _, rhsSample := range rhs { value, keep := evalVectorBinop(node.opType, lhs, rhsSample.Value) if keep { rhsSample.Value = value + if node.opType.shouldDropMetric() { + delete(rhsSample.Metric, clientmodel.MetricNameLabel) + } result = append(result, rhsSample) } } return result } else if node.lhs.Type() == VECTOR && node.rhs.Type() == SCALAR { - lhs := node.lhs.(VectorNode).Eval(timestamp, view) - rhs := node.rhs.(ScalarNode).Eval(timestamp, view) + lhs := node.lhs.(VectorNode).Eval(timestamp) + rhs := node.rhs.(ScalarNode).Eval(timestamp) for _, lhsSample := range lhs { value, keep := evalVectorBinop(node.opType, lhsSample.Value, rhs) if keep { lhsSample.Value = value + if node.opType.shouldDropMetric() { + delete(lhsSample.Metric, clientmodel.MetricNameLabel) + } result = append(result, lhsSample) } } return result } else if node.lhs.Type() == VECTOR && node.rhs.Type() == VECTOR { - lhs := node.lhs.(VectorNode).Eval(timestamp, view) - rhs := node.rhs.(VectorNode).Eval(timestamp, view) + lhs := node.lhs.(VectorNode).Eval(timestamp) + rhs := node.rhs.(VectorNode).Eval(timestamp) for _, lhsSample := range lhs { for _, rhsSample := range rhs { if labelsEqual(lhsSample.Metric, rhsSample.Metric) { value, keep := evalVectorBinop(node.opType, lhsSample.Value, rhsSample.Value) if keep { lhsSample.Value = value + if node.opType.shouldDropMetric() { + delete(lhsSample.Metric, clientmodel.MetricNameLabel) + } result = append(result, lhsSample) } } @@ -684,32 +783,54 @@ func (node *VectorArithExpr) Eval(timestamp clientmodel.Timestamp, view *viewAda // Eval implements the MatrixNode interface and returns the value of // the selector. -func (node *MatrixSelector) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix { +func (node *MatrixSelector) Eval(timestamp clientmodel.Timestamp) Matrix { interval := &metric.Interval{ OldestInclusive: timestamp.Add(-node.interval), NewestInclusive: timestamp, } - values, err := view.GetRangeValues(node.fingerprints, interval) - if err != nil { - glog.Error("Unable to get values for vector interval: ", err) - return Matrix{} + + //// timer := v.stats.GetTimer(stats.GetRangeValuesTime).Start() + sampleSets := []metric.SampleSet{} + for fp, it := range node.iterators { + samplePairs := it.GetRangeValues(*interval) + if len(samplePairs) == 0 { + continue + } + + sampleSet := metric.SampleSet{ + Metric: node.metrics[fp], // TODO: need copy here because downstream can modify! + Values: samplePairs, + } + sampleSets = append(sampleSets, sampleSet) } - return values + //// timer.Stop() + return sampleSets } // EvalBoundaries implements the MatrixNode interface and returns the // boundary values of the selector. -func (node *MatrixSelector) EvalBoundaries(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix { +func (node *MatrixSelector) EvalBoundaries(timestamp clientmodel.Timestamp) Matrix { interval := &metric.Interval{ OldestInclusive: timestamp.Add(-node.interval), NewestInclusive: timestamp, } - values, err := view.GetBoundaryValues(node.fingerprints, interval) - if err != nil { - glog.Error("Unable to get boundary values for vector interval: ", err) - return Matrix{} + + //// timer := v.stats.GetTimer(stats.GetBoundaryValuesTime).Start() + sampleSets := []metric.SampleSet{} + for fp, it := range node.iterators { + samplePairs := it.GetBoundaryValues(*interval) + if len(samplePairs) == 0 { + continue + } + + sampleSet := metric.SampleSet{ + Metric: node.metrics[fp], // TODO: make copy of metric. + Values: samplePairs, + } + sampleSets = append(sampleSets, sampleSet) } - return values + //// timer.Stop() + return sampleSets } // Len implements sort.Interface. @@ -729,14 +850,14 @@ func (matrix Matrix) Swap(i, j int) { // Eval implements the StringNode interface and returns the value of // the selector. -func (node *StringLiteral) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) string { +func (node *StringLiteral) Eval(timestamp clientmodel.Timestamp) string { return node.str } // Eval implements the StringNode interface and returns the result of // the function call. -func (node *StringFunctionCall) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) string { - return node.function.callFn(timestamp, view, node.args).(string) +func (node *StringFunctionCall) Eval(timestamp clientmodel.Timestamp) string { + return node.function.callFn(timestamp, node.args).(string) } // ---------------------------------------------------------------------------- @@ -754,6 +875,8 @@ func NewScalarLiteral(value clientmodel.SampleValue) *ScalarLiteral { func NewVectorSelector(m metric.LabelMatchers) *VectorSelector { return &VectorSelector{ labelMatchers: m, + iterators: map[clientmodel.Fingerprint]local.SeriesIterator{}, + metrics: map[clientmodel.Fingerprint]clientmodel.Metric{}, } } @@ -845,6 +968,8 @@ func NewMatrixSelector(vector *VectorSelector, interval time.Duration) *MatrixSe return &MatrixSelector{ labelMatchers: vector.labelMatchers, interval: interval, + iterators: map[clientmodel.Fingerprint]local.SeriesIterator{}, + metrics: map[clientmodel.Fingerprint]clientmodel.Metric{}, } } diff --git a/rules/ast/functions.go b/rules/ast/functions.go index ab0cce47c4..9671be49bd 100644 --- a/rules/ast/functions.go +++ b/rules/ast/functions.go @@ -31,7 +31,7 @@ type Function struct { name string argTypes []ExprType returnType ExprType - callFn func(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} + callFn func(timestamp clientmodel.Timestamp, args []Node) interface{} } // CheckArgTypes returns a non-nil error if the number or types of @@ -74,14 +74,14 @@ func (function *Function) CheckArgTypes(args []Node) error { } // === time() clientmodel.SampleValue === -func timeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { +func timeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { return clientmodel.SampleValue(timestamp.Unix()) } // === delta(matrix MatrixNode, isCounter ScalarNode) Vector === -func deltaImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { +func deltaImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { matrixNode := args[0].(MatrixNode) - isCounter := args[1].(ScalarNode).Eval(timestamp, view) > 0 + isCounter := args[1].(ScalarNode).Eval(timestamp) > 0 resultVector := Vector{} // If we treat these metrics as counters, we need to fetch all values @@ -89,9 +89,9 @@ func deltaImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) // I.e. if a counter resets, we want to ignore that reset. var matrixValue Matrix if isCounter { - matrixValue = matrixNode.Eval(timestamp, view) + matrixValue = matrixNode.Eval(timestamp) } else { - matrixValue = matrixNode.EvalBoundaries(timestamp, view) + matrixValue = matrixNode.EvalBoundaries(timestamp) } for _, samples := range matrixValue { // No sense in trying to compute a delta without at least two points. Drop @@ -133,15 +133,16 @@ func deltaImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) Value: resultValue, Timestamp: timestamp, } + delete(resultSample.Metric, clientmodel.MetricNameLabel) resultVector = append(resultVector, resultSample) } return resultVector } // === rate(node MatrixNode) Vector === -func rateImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { +func rateImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { args = append(args, &ScalarLiteral{value: 1}) - vector := deltaImpl(timestamp, view, args).(Vector) + vector := deltaImpl(timestamp, args).(Vector) // TODO: could be other type of MatrixNode in the future (right now, only // MatrixSelector exists). Find a better way of getting the duration of a @@ -188,28 +189,28 @@ func (s reverseHeap) Less(i, j int) bool { } // === sort(node VectorNode) Vector === -func sortImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - byValueSorter := vectorByValueHeap(args[0].(VectorNode).Eval(timestamp, view)) +func sortImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + byValueSorter := vectorByValueHeap(args[0].(VectorNode).Eval(timestamp)) sort.Sort(byValueSorter) return Vector(byValueSorter) } // === sortDesc(node VectorNode) Vector === -func sortDescImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - byValueSorter := vectorByValueHeap(args[0].(VectorNode).Eval(timestamp, view)) +func sortDescImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + byValueSorter := vectorByValueHeap(args[0].(VectorNode).Eval(timestamp)) sort.Sort(sort.Reverse(byValueSorter)) return Vector(byValueSorter) } // === topk(k ScalarNode, node VectorNode) Vector === -func topkImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - k := int(args[0].(ScalarNode).Eval(timestamp, view)) +func topkImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + k := int(args[0].(ScalarNode).Eval(timestamp)) if k < 1 { return Vector{} } topk := make(vectorByValueHeap, 0, k) - vector := args[1].(VectorNode).Eval(timestamp, view) + vector := args[1].(VectorNode).Eval(timestamp) for _, el := range vector { if len(topk) < k || topk[0].Value < el.Value { @@ -224,15 +225,15 @@ func topkImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) i } // === bottomk(k ScalarNode, node VectorNode) Vector === -func bottomkImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - k := int(args[0].(ScalarNode).Eval(timestamp, view)) +func bottomkImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + k := int(args[0].(ScalarNode).Eval(timestamp)) if k < 1 { return Vector{} } bottomk := make(vectorByValueHeap, 0, k) bkHeap := reverseHeap{Interface: &bottomk} - vector := args[1].(VectorNode).Eval(timestamp, view) + vector := args[1].(VectorNode).Eval(timestamp) for _, el := range vector { if len(bottomk) < k || bottomk[0].Value > el.Value { @@ -247,8 +248,8 @@ func bottomkImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node } // === drop_common_labels(node VectorNode) Vector === -func dropCommonLabelsImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - vector := args[0].(VectorNode).Eval(timestamp, view) +func dropCommonLabelsImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + vector := args[0].(VectorNode).Eval(timestamp) if len(vector) < 1 { return Vector{} } @@ -285,7 +286,7 @@ func dropCommonLabelsImpl(timestamp clientmodel.Timestamp, view *viewAdapter, ar } // === sampleVectorImpl() Vector === -func sampleVectorImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { +func sampleVectorImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { return Vector{ &clientmodel.Sample{ Metric: clientmodel.Metric{ @@ -358,8 +359,8 @@ func sampleVectorImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args [ } // === scalar(node VectorNode) Scalar === -func scalarImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - v := args[0].(VectorNode).Eval(timestamp, view) +func scalarImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + v := args[0].(VectorNode).Eval(timestamp) if len(v) != 1 { return clientmodel.SampleValue(math.NaN()) } @@ -367,13 +368,13 @@ func scalarImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) } // === count_scalar(vector VectorNode) model.SampleValue === -func countScalarImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - return clientmodel.SampleValue(len(args[0].(VectorNode).Eval(timestamp, view))) +func countScalarImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + return clientmodel.SampleValue(len(args[0].(VectorNode).Eval(timestamp))) } -func aggrOverTime(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node, aggrFn func(metric.Values) clientmodel.SampleValue) interface{} { +func aggrOverTime(timestamp clientmodel.Timestamp, args []Node, aggrFn func(metric.Values) clientmodel.SampleValue) interface{} { n := args[0].(MatrixNode) - matrixVal := n.Eval(timestamp, view) + matrixVal := n.Eval(timestamp) resultVector := Vector{} for _, el := range matrixVal { @@ -381,6 +382,7 @@ func aggrOverTime(timestamp clientmodel.Timestamp, view *viewAdapter, args []Nod continue } + delete(el.Metric, clientmodel.MetricNameLabel) resultVector = append(resultVector, &clientmodel.Sample{ Metric: el.Metric, Value: aggrFn(el.Values), @@ -391,8 +393,8 @@ func aggrOverTime(timestamp clientmodel.Timestamp, view *viewAdapter, args []Nod } // === avg_over_time(matrix MatrixNode) Vector === -func avgOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - return aggrOverTime(timestamp, view, args, func(values metric.Values) clientmodel.SampleValue { +func avgOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue { var sum clientmodel.SampleValue for _, v := range values { sum += v.Value @@ -402,15 +404,15 @@ func avgOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args [] } // === count_over_time(matrix MatrixNode) Vector === -func countOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - return aggrOverTime(timestamp, view, args, func(values metric.Values) clientmodel.SampleValue { +func countOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue { return clientmodel.SampleValue(len(values)) }) } // === max_over_time(matrix MatrixNode) Vector === -func maxOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - return aggrOverTime(timestamp, view, args, func(values metric.Values) clientmodel.SampleValue { +func maxOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue { max := math.Inf(-1) for _, v := range values { max = math.Max(max, float64(v.Value)) @@ -420,8 +422,8 @@ func maxOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args [] } // === min_over_time(matrix MatrixNode) Vector === -func minOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - return aggrOverTime(timestamp, view, args, func(values metric.Values) clientmodel.SampleValue { +func minOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue { min := math.Inf(1) for _, v := range values { min = math.Min(min, float64(v.Value)) @@ -431,8 +433,8 @@ func minOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args [] } // === sum_over_time(matrix MatrixNode) Vector === -func sumOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { - return aggrOverTime(timestamp, view, args, func(values metric.Values) clientmodel.SampleValue { +func sumOverTimeImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + return aggrOverTime(timestamp, args, func(values metric.Values) clientmodel.SampleValue { var sum clientmodel.SampleValue for _, v := range values { sum += v.Value @@ -442,15 +444,39 @@ func sumOverTimeImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args [] } // === abs(vector VectorNode) Vector === -func absImpl(timestamp clientmodel.Timestamp, view *viewAdapter, args []Node) interface{} { +func absImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { n := args[0].(VectorNode) - vector := n.Eval(timestamp, view) + vector := n.Eval(timestamp) for _, el := range vector { + delete(el.Metric, clientmodel.MetricNameLabel) el.Value = clientmodel.SampleValue(math.Abs(float64(el.Value))) } return vector } +// === absent(vector VectorNode) Vector === +func absentImpl(timestamp clientmodel.Timestamp, args []Node) interface{} { + n := args[0].(VectorNode) + if len(n.Eval(timestamp)) > 0 { + return Vector{} + } + m := clientmodel.Metric{} + if vs, ok := n.(*VectorSelector); ok { + for _, matcher := range vs.labelMatchers { + if matcher.Type == metric.Equal && matcher.Name != clientmodel.MetricNameLabel { + m[matcher.Name] = matcher.Value + } + } + } + return Vector{ + &clientmodel.Sample{ + Metric: m, + Value: 1, + Timestamp: timestamp, + }, + } +} + var functions = map[string]*Function{ "abs": { name: "abs", @@ -458,6 +484,12 @@ var functions = map[string]*Function{ returnType: VECTOR, callFn: absImpl, }, + "absent": { + name: "absent", + argTypes: []ExprType{VECTOR}, + returnType: VECTOR, + callFn: absentImpl, + }, "avg_over_time": { name: "avg_over_time", argTypes: []ExprType{MATRIX}, diff --git a/rules/ast/functions_test.go b/rules/ast/functions_test.go index 3392f63b75..fa4d1da717 100644 --- a/rules/ast/functions_test.go +++ b/rules/ast/functions_test.go @@ -28,7 +28,7 @@ func (node emptyRangeNode) NodeTreeToDotGraph() string { return "" } func (node emptyRangeNode) String() string { return "" } func (node emptyRangeNode) Children() Nodes { return Nodes{} } -func (node emptyRangeNode) Eval(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix { +func (node emptyRangeNode) Eval(timestamp clientmodel.Timestamp) Matrix { return Matrix{ metric.SampleSet{ Metric: clientmodel.Metric{clientmodel.MetricNameLabel: "empty_metric"}, @@ -37,7 +37,7 @@ func (node emptyRangeNode) Eval(timestamp clientmodel.Timestamp, view *viewAdapt } } -func (node emptyRangeNode) EvalBoundaries(timestamp clientmodel.Timestamp, view *viewAdapter) Matrix { +func (node emptyRangeNode) EvalBoundaries(timestamp clientmodel.Timestamp) Matrix { return Matrix{ metric.SampleSet{ Metric: clientmodel.Metric{clientmodel.MetricNameLabel: "empty_metric"}, @@ -48,11 +48,11 @@ func (node emptyRangeNode) EvalBoundaries(timestamp clientmodel.Timestamp, view func TestDeltaWithEmptyElementDoesNotCrash(t *testing.T) { now := clientmodel.Now() - vector := deltaImpl(now, nil, []Node{emptyRangeNode{}, &ScalarLiteral{value: 0}}).(Vector) + vector := deltaImpl(now, []Node{emptyRangeNode{}, &ScalarLiteral{value: 0}}).(Vector) if len(vector) != 0 { t.Fatalf("Expected empty result vector, got: %v", vector) } - vector = deltaImpl(now, nil, []Node{emptyRangeNode{}, &ScalarLiteral{value: 1}}).(Vector) + vector = deltaImpl(now, []Node{emptyRangeNode{}, &ScalarLiteral{value: 1}}).(Vector) if len(vector) != 0 { t.Fatalf("Expected empty result vector, got: %v", vector) } diff --git a/rules/ast/printer.go b/rules/ast/printer.go index 0fefcdd1cc..b32f0b10f4 100644 --- a/rules/ast/printer.go +++ b/rules/ast/printer.go @@ -23,7 +23,7 @@ import ( clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/prometheus/stats" - "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/utility" ) @@ -151,18 +151,19 @@ func TypedValueToJSON(data interface{}, typeStr string) string { } // EvalToString evaluates the given node into a string of the given format. -func EvalToString(node Node, timestamp clientmodel.Timestamp, format OutputFormat, storage metric.PreloadingPersistence, queryStats *stats.TimerGroup) string { - viewTimer := queryStats.GetTimer(stats.TotalViewBuildingTime).Start() - viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage, queryStats) - viewTimer.Stop() +func EvalToString(node Node, timestamp clientmodel.Timestamp, format OutputFormat, storage local.Storage, queryStats *stats.TimerGroup) string { + prepareTimer := queryStats.GetTimer(stats.TotalQueryPreparationTime).Start() + closer, err := prepareInstantQuery(node, timestamp, storage, queryStats) + prepareTimer.Stop() if err != nil { panic(err) } + defer closer.Close() evalTimer := queryStats.GetTimer(stats.InnerEvalTime).Start() switch node.Type() { case SCALAR: - scalar := node.(ScalarNode).Eval(timestamp, viewAdapter) + scalar := node.(ScalarNode).Eval(timestamp) evalTimer.Stop() switch format { case TEXT: @@ -171,7 +172,7 @@ func EvalToString(node Node, timestamp clientmodel.Timestamp, format OutputForma return TypedValueToJSON(scalar, "scalar") } case VECTOR: - vector := node.(VectorNode).Eval(timestamp, viewAdapter) + vector := node.(VectorNode).Eval(timestamp) evalTimer.Stop() switch format { case TEXT: @@ -180,7 +181,7 @@ func EvalToString(node Node, timestamp clientmodel.Timestamp, format OutputForma return TypedValueToJSON(vector, "vector") } case MATRIX: - matrix := node.(MatrixNode).Eval(timestamp, viewAdapter) + matrix := node.(MatrixNode).Eval(timestamp) evalTimer.Stop() switch format { case TEXT: @@ -189,7 +190,7 @@ func EvalToString(node Node, timestamp clientmodel.Timestamp, format OutputForma return TypedValueToJSON(matrix, "matrix") } case STRING: - str := node.(StringNode).Eval(timestamp, viewAdapter) + str := node.(StringNode).Eval(timestamp) evalTimer.Stop() switch format { case TEXT: @@ -202,28 +203,29 @@ func EvalToString(node Node, timestamp clientmodel.Timestamp, format OutputForma } // EvalToVector evaluates the given node into a Vector. Matrices aren't supported. -func EvalToVector(node Node, timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence, queryStats *stats.TimerGroup) (Vector, error) { - viewTimer := queryStats.GetTimer(stats.TotalViewBuildingTime).Start() - viewAdapter, err := viewAdapterForInstantQuery(node, timestamp, storage, queryStats) - viewTimer.Stop() +func EvalToVector(node Node, timestamp clientmodel.Timestamp, storage local.Storage, queryStats *stats.TimerGroup) (Vector, error) { + prepareTimer := queryStats.GetTimer(stats.TotalQueryPreparationTime).Start() + closer, err := prepareInstantQuery(node, timestamp, storage, queryStats) + prepareTimer.Stop() if err != nil { panic(err) } + defer closer.Close() evalTimer := queryStats.GetTimer(stats.InnerEvalTime).Start() switch node.Type() { case SCALAR: - scalar := node.(ScalarNode).Eval(timestamp, viewAdapter) + scalar := node.(ScalarNode).Eval(timestamp) evalTimer.Stop() return Vector{&clientmodel.Sample{Value: scalar}}, nil case VECTOR: - vector := node.(VectorNode).Eval(timestamp, viewAdapter) + vector := node.(VectorNode).Eval(timestamp) evalTimer.Stop() return vector, nil case MATRIX: return nil, errors.New("Matrices not supported by EvalToVector") case STRING: - str := node.(StringNode).Eval(timestamp, viewAdapter) + str := node.(StringNode).Eval(timestamp) evalTimer.Stop() return Vector{&clientmodel.Sample{ Metric: clientmodel.Metric{"__value__": clientmodel.LabelValue(str)}}}, nil diff --git a/rules/ast/query_analyzer.go b/rules/ast/query_analyzer.go index 8d909ba595..78a282997d 100644 --- a/rules/ast/query_analyzer.go +++ b/rules/ast/query_analyzer.go @@ -16,12 +16,10 @@ package ast import ( "time" - "github.com/golang/glog" - clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/prometheus/stats" - "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/storage/local" ) // FullRangeMap maps the fingerprint of a full range to the duration @@ -48,13 +46,13 @@ type QueryAnalyzer struct { IntervalRanges IntervalRangeMap // The underlying storage to which the query will be applied. Needed for // extracting timeseries fingerprint information during query analysis. - storage metric.Persistence + storage local.Storage } // NewQueryAnalyzer returns a pointer to a newly instantiated // QueryAnalyzer. The storage is needed to extract timeseries // fingerprint information during query analysis. -func NewQueryAnalyzer(storage metric.Persistence) *QueryAnalyzer { +func NewQueryAnalyzer(storage local.Storage) *QueryAnalyzer { return &QueryAnalyzer{ FullRanges: FullRangeMap{}, IntervalRanges: IntervalRangeMap{}, @@ -66,94 +64,122 @@ func NewQueryAnalyzer(storage metric.Persistence) *QueryAnalyzer { func (analyzer *QueryAnalyzer) Visit(node Node) { switch n := node.(type) { case *VectorSelector: - fingerprints, err := analyzer.storage.GetFingerprintsForLabelMatchers(n.labelMatchers) - if err != nil { - glog.Errorf("Error getting fingerprints for label matchers %v: %v", n.labelMatchers, err) - return - } + fingerprints := analyzer.storage.GetFingerprintsForLabelMatchers(n.labelMatchers) n.fingerprints = fingerprints - for _, fingerprint := range fingerprints { + for _, fp := range fingerprints { // Only add the fingerprint to IntervalRanges if not yet present in FullRanges. // Full ranges always contain more points and span more time than interval ranges. - if _, alreadyInFullRanges := analyzer.FullRanges[*fingerprint]; !alreadyInFullRanges { - analyzer.IntervalRanges[*fingerprint] = true + if _, alreadyInFullRanges := analyzer.FullRanges[fp]; !alreadyInFullRanges { + analyzer.IntervalRanges[fp] = true } + + n.metrics[fp] = analyzer.storage.GetMetricForFingerprint(fp) } case *MatrixSelector: - fingerprints, err := analyzer.storage.GetFingerprintsForLabelMatchers(n.labelMatchers) - if err != nil { - glog.Errorf("Error getting fingerprints for label matchers %v: %v", n.labelMatchers, err) - return - } + fingerprints := analyzer.storage.GetFingerprintsForLabelMatchers(n.labelMatchers) n.fingerprints = fingerprints - for _, fingerprint := range fingerprints { - if analyzer.FullRanges[*fingerprint] < n.interval { - analyzer.FullRanges[*fingerprint] = n.interval + for _, fp := range fingerprints { + if analyzer.FullRanges[fp] < n.interval { + analyzer.FullRanges[fp] = n.interval // Delete the fingerprint from IntervalRanges. Full ranges always contain // more points and span more time than interval ranges, so we don't need // an interval range for the same fingerprint, should we have one. - delete(analyzer.IntervalRanges, *fingerprint) + delete(analyzer.IntervalRanges, fp) } + + n.metrics[fp] = analyzer.storage.GetMetricForFingerprint(fp) } } } -// AnalyzeQueries walks the AST, starting at node, calling Visit on -// each node to collect fingerprints. -func (analyzer *QueryAnalyzer) AnalyzeQueries(node Node) { +type iteratorInitializer struct { + storage local.Storage +} + +func (i *iteratorInitializer) Visit(node Node) { + switch n := node.(type) { + case *VectorSelector: + for _, fp := range n.fingerprints { + n.iterators[fp] = i.storage.NewIterator(fp) + } + case *MatrixSelector: + for _, fp := range n.fingerprints { + n.iterators[fp] = i.storage.NewIterator(fp) + } + } +} + +func prepareInstantQuery(node Node, timestamp clientmodel.Timestamp, storage local.Storage, queryStats *stats.TimerGroup) (local.Preloader, error) { + analyzeTimer := queryStats.GetTimer(stats.QueryAnalysisTime).Start() + analyzer := NewQueryAnalyzer(storage) Walk(analyzer, node) -} - -func viewAdapterForInstantQuery(node Node, timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence, queryStats *stats.TimerGroup) (*viewAdapter, error) { - analyzeTimer := queryStats.GetTimer(stats.QueryAnalysisTime).Start() - analyzer := NewQueryAnalyzer(storage) - analyzer.AnalyzeQueries(node) analyzeTimer.Stop() - requestBuildTimer := queryStats.GetTimer(stats.ViewRequestBuildTime).Start() - viewBuilder := storage.NewViewRequestBuilder() - for fingerprint, rangeDuration := range analyzer.FullRanges { - viewBuilder.GetMetricRange(&fingerprint, timestamp.Add(-rangeDuration), timestamp) - } - for fingerprint := range analyzer.IntervalRanges { - viewBuilder.GetMetricAtTime(&fingerprint, timestamp) - } - requestBuildTimer.Stop() - - buildTimer := queryStats.GetTimer(stats.InnerViewBuildingTime).Start() - view, err := viewBuilder.Execute(60*time.Second, queryStats) - buildTimer.Stop() - if err != nil { - return nil, err - } - return NewViewAdapter(view, storage, queryStats), nil -} - -func viewAdapterForRangeQuery(node Node, start clientmodel.Timestamp, end clientmodel.Timestamp, interval time.Duration, storage metric.PreloadingPersistence, queryStats *stats.TimerGroup) (*viewAdapter, error) { - analyzeTimer := queryStats.GetTimer(stats.QueryAnalysisTime).Start() - analyzer := NewQueryAnalyzer(storage) - analyzer.AnalyzeQueries(node) - analyzeTimer.Stop() - - requestBuildTimer := queryStats.GetTimer(stats.ViewRequestBuildTime).Start() - viewBuilder := storage.NewViewRequestBuilder() - for fingerprint, rangeDuration := range analyzer.FullRanges { - if interval < rangeDuration { - viewBuilder.GetMetricRange(&fingerprint, start.Add(-rangeDuration), end) - } else { - viewBuilder.GetMetricRangeAtInterval(&fingerprint, start.Add(-rangeDuration), end, interval, rangeDuration) + // TODO: Preloading should time out after a given duration. + preloadTimer := queryStats.GetTimer(stats.PreloadTime).Start() + p := storage.NewPreloader() + for fp, rangeDuration := range analyzer.FullRanges { + if err := p.PreloadRange(fp, timestamp.Add(-rangeDuration), timestamp, *stalenessDelta); err != nil { + p.Close() + return nil, err } } - for fingerprint := range analyzer.IntervalRanges { - viewBuilder.GetMetricAtInterval(&fingerprint, start, end, interval) + for fp := range analyzer.IntervalRanges { + if err := p.PreloadRange(fp, timestamp, timestamp, *stalenessDelta); err != nil { + p.Close() + return nil, err + } } - requestBuildTimer.Stop() + preloadTimer.Stop() - buildTimer := queryStats.GetTimer(stats.InnerViewBuildingTime).Start() - view, err := viewBuilder.Execute(time.Duration(60)*time.Second, queryStats) - buildTimer.Stop() - if err != nil { - return nil, err + ii := &iteratorInitializer{ + storage: storage, } - return NewViewAdapter(view, storage, queryStats), nil + Walk(ii, node) + + return p, nil +} + +func prepareRangeQuery(node Node, start clientmodel.Timestamp, end clientmodel.Timestamp, interval time.Duration, storage local.Storage, queryStats *stats.TimerGroup) (local.Preloader, error) { + analyzeTimer := queryStats.GetTimer(stats.QueryAnalysisTime).Start() + analyzer := NewQueryAnalyzer(storage) + Walk(analyzer, node) + analyzeTimer.Stop() + + // TODO: Preloading should time out after a given duration. + preloadTimer := queryStats.GetTimer(stats.PreloadTime).Start() + p := storage.NewPreloader() + for fp, rangeDuration := range analyzer.FullRanges { + if err := p.PreloadRange(fp, start.Add(-rangeDuration), end, *stalenessDelta); err != nil { + p.Close() + return nil, err + } + /* + if interval < rangeDuration { + if err := p.GetMetricRange(fp, end, end.Sub(start)+rangeDuration); err != nil { + p.Close() + return nil, err + } + } else { + if err := p.GetMetricRangeAtInterval(fp, start, end, interval, rangeDuration); err != nil { + p.Close() + return nil, err + } + } + */ + } + for fp := range analyzer.IntervalRanges { + if err := p.PreloadRange(fp, start, end, *stalenessDelta); err != nil { + p.Close() + return nil, err + } + } + preloadTimer.Stop() + + ii := &iteratorInitializer{ + storage: storage, + } + Walk(ii, node) + + return p, nil } diff --git a/rules/ast/view_adapter.go b/rules/ast/view_adapter.go index ca16c490a3..dc18611e8a 100644 --- a/rules/ast/view_adapter.go +++ b/rules/ast/view_adapter.go @@ -13,186 +13,4 @@ package ast -import ( - "flag" - "time" - - clientmodel "github.com/prometheus/client_golang/model" - - "github.com/prometheus/prometheus/stats" - "github.com/prometheus/prometheus/storage/metric" -) - -var defaultStalenessDelta = flag.Int("defaultStalenessDelta", 300, "Default staleness delta allowance in seconds during expression evaluations.") - -// StalenessPolicy describes the lenience limits to apply to values -// from the materialized view. -type StalenessPolicy struct { - // Describes the inclusive limit at which individual points if requested will - // be matched and subject to interpolation. - DeltaAllowance time.Duration -} - -type viewAdapter struct { - // Policy that dictates when sample values around an evaluation time are to - // be interpreted as stale. - stalenessPolicy StalenessPolicy - // AST-global storage to use for operations that are not supported by views - // (i.e. fingerprint->metric lookups). - storage metric.Persistence - // The materialized view which contains all timeseries data required for - // executing a query. - view metric.View - // The TimerGroup object in which to capture query timing statistics. - stats *stats.TimerGroup -} - -// interpolateSamples interpolates a value at a target time between two -// provided sample pairs. -func interpolateSamples(first, second *metric.SamplePair, timestamp clientmodel.Timestamp) *metric.SamplePair { - dv := second.Value - first.Value - dt := second.Timestamp.Sub(first.Timestamp) - - dDt := dv / clientmodel.SampleValue(dt) - offset := clientmodel.SampleValue(timestamp.Sub(first.Timestamp)) - - return &metric.SamplePair{ - Value: first.Value + (offset * dDt), - Timestamp: timestamp, - } -} - -// chooseClosestSample chooses the closest sample of a list of samples -// surrounding a given target time. If samples are found both before and after -// the target time, the sample value is interpolated between these. Otherwise, -// the single closest sample is returned verbatim. -func (v *viewAdapter) chooseClosestSample(samples metric.Values, timestamp clientmodel.Timestamp) *metric.SamplePair { - var closestBefore *metric.SamplePair - var closestAfter *metric.SamplePair - for _, candidate := range samples { - delta := candidate.Timestamp.Sub(timestamp) - // Samples before target time. - if delta < 0 { - // Ignore samples outside of staleness policy window. - if -delta > v.stalenessPolicy.DeltaAllowance { - continue - } - // Ignore samples that are farther away than what we've seen before. - if closestBefore != nil && candidate.Timestamp.Before(closestBefore.Timestamp) { - continue - } - sample := candidate - closestBefore = &sample - } - - // Samples after target time. - if delta >= 0 { - // Ignore samples outside of staleness policy window. - if delta > v.stalenessPolicy.DeltaAllowance { - continue - } - // Ignore samples that are farther away than samples we've seen before. - if closestAfter != nil && candidate.Timestamp.After(closestAfter.Timestamp) { - continue - } - sample := candidate - closestAfter = &sample - } - } - - switch { - case closestBefore != nil && closestAfter != nil: - return interpolateSamples(closestBefore, closestAfter, timestamp) - case closestBefore != nil: - return closestBefore - default: - return closestAfter - } -} - -func (v *viewAdapter) GetValueAtTime(fingerprints clientmodel.Fingerprints, timestamp clientmodel.Timestamp) (Vector, error) { - timer := v.stats.GetTimer(stats.GetValueAtTimeTime).Start() - samples := Vector{} - for _, fingerprint := range fingerprints { - sampleCandidates := v.view.GetValueAtTime(fingerprint, timestamp) - samplePair := v.chooseClosestSample(sampleCandidates, timestamp) - m, err := v.storage.GetMetricForFingerprint(fingerprint) - if err != nil { - return nil, err - } - if samplePair != nil { - samples = append(samples, &clientmodel.Sample{ - Metric: m, - Value: samplePair.Value, - Timestamp: timestamp, - }) - } - } - timer.Stop() - return samples, nil -} - -func (v *viewAdapter) GetBoundaryValues(fingerprints clientmodel.Fingerprints, interval *metric.Interval) ([]metric.SampleSet, error) { - timer := v.stats.GetTimer(stats.GetBoundaryValuesTime).Start() - sampleSets := []metric.SampleSet{} - for _, fingerprint := range fingerprints { - samplePairs := v.view.GetBoundaryValues(fingerprint, *interval) - if len(samplePairs) == 0 { - continue - } - - // TODO: memoize/cache this. - m, err := v.storage.GetMetricForFingerprint(fingerprint) - if err != nil { - return nil, err - } - - sampleSet := metric.SampleSet{ - Metric: m, - Values: samplePairs, - } - sampleSets = append(sampleSets, sampleSet) - } - timer.Stop() - return sampleSets, nil -} - -func (v *viewAdapter) GetRangeValues(fingerprints clientmodel.Fingerprints, interval *metric.Interval) ([]metric.SampleSet, error) { - timer := v.stats.GetTimer(stats.GetRangeValuesTime).Start() - sampleSets := []metric.SampleSet{} - for _, fingerprint := range fingerprints { - samplePairs := v.view.GetRangeValues(fingerprint, *interval) - if len(samplePairs) == 0 { - continue - } - - // TODO: memoize/cache this. - m, err := v.storage.GetMetricForFingerprint(fingerprint) - if err != nil { - return nil, err - } - - sampleSet := metric.SampleSet{ - Metric: m, - Values: samplePairs, - } - sampleSets = append(sampleSets, sampleSet) - } - timer.Stop() - return sampleSets, nil -} - -// NewViewAdapter returns an initialized view adapter with a default -// staleness policy (based on the --defaultStalenessDelta flag). -func NewViewAdapter(view metric.View, storage metric.Persistence, queryStats *stats.TimerGroup) *viewAdapter { - stalenessPolicy := StalenessPolicy{ - DeltaAllowance: time.Duration(*defaultStalenessDelta) * time.Second, - } - - return &viewAdapter{ - stalenessPolicy: stalenessPolicy, - storage: storage, - view: view, - stats: queryStats, - } -} +// TODO: remove file. diff --git a/rules/helpers_test.go b/rules/helpers_test.go index 02d7aca9a0..aaf249056a 100644 --- a/rules/helpers_test.go +++ b/rules/helpers_test.go @@ -19,6 +19,7 @@ import ( clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/prometheus/rules/ast" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/storage/metric" ) @@ -51,7 +52,7 @@ func getTestVectorFromTestMatrix(matrix ast.Matrix) ast.Vector { return vector } -func storeMatrix(storage metric.Persistence, matrix ast.Matrix) (err error) { +func storeMatrix(storage local.Storage, matrix ast.Matrix) { pendingSamples := clientmodel.Samples{} for _, sampleSet := range matrix { for _, sample := range sampleSet.Values { @@ -62,8 +63,8 @@ func storeMatrix(storage metric.Persistence, matrix ast.Matrix) (err error) { }) } } - err = storage.AppendSamples(pendingSamples) - return + storage.AppendSamples(pendingSamples) + storage.WaitForIndexing() } var testMatrix = ast.Matrix{ diff --git a/rules/manager/manager.go b/rules/manager/manager.go index 0b8130dc94..ecff523ffb 100644 --- a/rules/manager/manager.go +++ b/rules/manager/manager.go @@ -27,7 +27,7 @@ import ( "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/notification" "github.com/prometheus/prometheus/rules" - "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/templates" ) @@ -83,20 +83,20 @@ type ruleManager struct { done chan bool interval time.Duration - storage metric.PreloadingPersistence + storage local.Storage - results chan<- *extraction.Result - notifications chan<- notification.NotificationReqs + results chan<- *extraction.Result + notificationHandler *notification.NotificationHandler prometheusUrl string } type RuleManagerOptions struct { EvaluationInterval time.Duration - Storage metric.PreloadingPersistence + Storage local.Storage - Notifications chan<- notification.NotificationReqs - Results chan<- *extraction.Result + NotificationHandler *notification.NotificationHandler + Results chan<- *extraction.Result PrometheusUrl string } @@ -106,11 +106,11 @@ func NewRuleManager(o *RuleManagerOptions) RuleManager { rules: []rules.Rule{}, done: make(chan bool), - interval: o.EvaluationInterval, - storage: o.Storage, - results: o.Results, - notifications: o.Notifications, - prometheusUrl: o.PrometheusUrl, + interval: o.EvaluationInterval, + storage: o.Storage, + results: o.Results, + notificationHandler: o.NotificationHandler, + prometheusUrl: o.PrometheusUrl, } return manager } @@ -126,17 +126,15 @@ func (m *ruleManager) Run() { m.runIteration(m.results) iterationDuration.Observe(float64(time.Since(start) / time.Millisecond)) case <-m.done: - glog.Info("rules.Rule manager exiting...") + glog.Info("Rule manager stopped.") return } } } func (m *ruleManager) Stop() { - select { - case m.done <- true: - default: - } + glog.Info("Stopping rule manager...") + m.done <- true } func (m *ruleManager) queueAlertNotifications(rule *rules.AlertingRule, timestamp clientmodel.Timestamp) { @@ -190,7 +188,7 @@ func (m *ruleManager) queueAlertNotifications(rule *rules.AlertingRule, timestam GeneratorURL: m.prometheusUrl + rules.GraphLinkForExpression(rule.Vector.String()), }) } - m.notifications <- notifications + m.notificationHandler.SubmitReqs(notifications) } func (m *ruleManager) runIteration(results chan<- *extraction.Result) { diff --git a/rules/recording.go b/rules/recording.go index 9f5695892c..f6e5f39f41 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -21,7 +21,7 @@ import ( "github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/stats" - "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/storage/local" ) // A RecordingRule records its vector expression into new timeseries. @@ -34,11 +34,11 @@ type RecordingRule struct { func (rule RecordingRule) Name() string { return rule.name } -func (rule RecordingRule) EvalRaw(timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence) (ast.Vector, error) { +func (rule RecordingRule) EvalRaw(timestamp clientmodel.Timestamp, storage local.Storage) (ast.Vector, error) { return ast.EvalVectorInstant(rule.vector, timestamp, storage, stats.NewTimerGroup()) } -func (rule RecordingRule) Eval(timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence) (ast.Vector, error) { +func (rule RecordingRule) Eval(timestamp clientmodel.Timestamp, storage local.Storage) (ast.Vector, error) { // Get the raw value of the rule expression. vector, err := rule.EvalRaw(timestamp, storage) if err != nil { diff --git a/rules/rules.go b/rules/rules.go index 0c624df30c..54be96a11a 100644 --- a/rules/rules.go +++ b/rules/rules.go @@ -19,7 +19,7 @@ import ( clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/prometheus/rules/ast" - "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/storage/local" ) // A Rule encapsulates a vector expression which is evaluated at a specified @@ -29,9 +29,9 @@ type Rule interface { Name() string // EvalRaw evaluates the rule's vector expression without triggering any // other actions, like recording or alerting. - EvalRaw(timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence) (ast.Vector, error) + EvalRaw(timestamp clientmodel.Timestamp, storage local.Storage) (ast.Vector, error) // Eval evaluates the rule, including any associated recording or alerting actions. - Eval(timestamp clientmodel.Timestamp, storage metric.PreloadingPersistence) (ast.Vector, error) + Eval(timestamp clientmodel.Timestamp, storage local.Storage) (ast.Vector, error) // ToDotGraph returns a Graphviz dot graph of the rule. ToDotGraph() string // String returns a human-readable string representation of the rule. diff --git a/rules/rules_test.go b/rules/rules_test.go index 5010e0160f..094965255e 100644 --- a/rules/rules_test.go +++ b/rules/rules_test.go @@ -24,7 +24,7 @@ import ( "github.com/prometheus/prometheus/rules/ast" "github.com/prometheus/prometheus/stats" - "github.com/prometheus/prometheus/storage/metric/tiered" + "github.com/prometheus/prometheus/storage/local" "github.com/prometheus/prometheus/utility/test" ) @@ -52,23 +52,10 @@ func vectorComparisonString(expected []string, actual []string) string { separator) } -type testTieredStorageCloser struct { - storage *tiered.TieredStorage - directory test.Closer -} - -func (t testTieredStorageCloser) Close() { - t.storage.Close() - t.directory.Close() -} - -func newTestStorage(t testing.TB) (storage *tiered.TieredStorage, closer test.Closer) { - storage, closer = tiered.NewTestTieredStorage(t) - if storage == nil { - t.Fatal("storage == nil") - } +func newTestStorage(t testing.TB) (storage local.Storage, closer test.Closer) { + storage, closer = local.NewTestStorage(t) storeMatrix(storage, testMatrix) - return + return storage, closer } func TestExpressions(t *testing.T) { @@ -83,30 +70,30 @@ func TestExpressions(t *testing.T) { }{ { expr: `SUM(http_requests)`, - output: []string{`http_requests => 3600 @[%v]`}, + output: []string{`{} => 3600 @[%v]`}, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests{instance="0"}) BY(job)`, output: []string{ - `http_requests{job="api-server"} => 400 @[%v]`, - `http_requests{job="app-server"} => 1200 @[%v]`, + `{job="api-server"} => 400 @[%v]`, + `{job="app-server"} => 1200 @[%v]`, }, fullRanges: 0, intervalRanges: 4, }, { expr: `SUM(http_requests{instance="0"}) BY(job) KEEPING_EXTRA`, output: []string{ - `http_requests{instance="0", job="api-server"} => 400 @[%v]`, - `http_requests{instance="0", job="app-server"} => 1200 @[%v]`, + `{instance="0", job="api-server"} => 400 @[%v]`, + `{instance="0", job="app-server"} => 1200 @[%v]`, }, fullRanges: 0, intervalRanges: 4, }, { expr: `SUM(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 1000 @[%v]`, - `http_requests{job="app-server"} => 2600 @[%v]`, + `{job="api-server"} => 1000 @[%v]`, + `{job="app-server"} => 2600 @[%v]`, }, fullRanges: 0, intervalRanges: 8, @@ -114,8 +101,8 @@ func TestExpressions(t *testing.T) { // Non-existent labels mentioned in BY-clauses shouldn't propagate to output. expr: `SUM(http_requests) BY (job, nonexistent)`, output: []string{ - `http_requests{job="api-server"} => 1000 @[%v]`, - `http_requests{job="app-server"} => 2600 @[%v]`, + `{job="api-server"} => 1000 @[%v]`, + `{job="app-server"} => 2600 @[%v]`, }, fullRanges: 0, intervalRanges: 8, @@ -125,141 +112,141 @@ func TestExpressions(t *testing.T) { SUM(http_requests) BY /* comments shouldn't have any effect */ (job) // another comment`, output: []string{ - `http_requests{job="api-server"} => 1000 @[%v]`, - `http_requests{job="app-server"} => 2600 @[%v]`, + `{job="api-server"} => 1000 @[%v]`, + `{job="app-server"} => 2600 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `COUNT(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 4 @[%v]`, - `http_requests{job="app-server"} => 4 @[%v]`, + `{job="api-server"} => 4 @[%v]`, + `{job="app-server"} => 4 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job, group)`, output: []string{ - `http_requests{group="canary", job="api-server"} => 700 @[%v]`, - `http_requests{group="canary", job="app-server"} => 1500 @[%v]`, - `http_requests{group="production", job="api-server"} => 300 @[%v]`, - `http_requests{group="production", job="app-server"} => 1100 @[%v]`, + `{group="canary", job="api-server"} => 700 @[%v]`, + `{group="canary", job="app-server"} => 1500 @[%v]`, + `{group="production", job="api-server"} => 300 @[%v]`, + `{group="production", job="app-server"} => 1100 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `AVG(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 250 @[%v]`, - `http_requests{job="app-server"} => 650 @[%v]`, + `{job="api-server"} => 250 @[%v]`, + `{job="app-server"} => 650 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `MIN(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 100 @[%v]`, - `http_requests{job="app-server"} => 500 @[%v]`, + `{job="api-server"} => 100 @[%v]`, + `{job="app-server"} => 500 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `MAX(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 400 @[%v]`, - `http_requests{job="app-server"} => 800 @[%v]`, + `{job="api-server"} => 400 @[%v]`, + `{job="app-server"} => 800 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) - COUNT(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 996 @[%v]`, - `http_requests{job="app-server"} => 2596 @[%v]`, + `{job="api-server"} => 996 @[%v]`, + `{job="app-server"} => 2596 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `2 - SUM(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => -998 @[%v]`, - `http_requests{job="app-server"} => -2598 @[%v]`, + `{job="api-server"} => -998 @[%v]`, + `{job="app-server"} => -2598 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `1000 / SUM(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 1 @[%v]`, - `http_requests{job="app-server"} => 0.38461538461538464 @[%v]`, + `{job="api-server"} => 1 @[%v]`, + `{job="app-server"} => 0.38461538461538464 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) - 2`, output: []string{ - `http_requests{job="api-server"} => 998 @[%v]`, - `http_requests{job="app-server"} => 2598 @[%v]`, + `{job="api-server"} => 998 @[%v]`, + `{job="app-server"} => 2598 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) % 3`, output: []string{ - `http_requests{job="api-server"} => 1 @[%v]`, - `http_requests{job="app-server"} => 2 @[%v]`, + `{job="api-server"} => 1 @[%v]`, + `{job="app-server"} => 2 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) / 0`, output: []string{ - `http_requests{job="api-server"} => +Inf @[%v]`, - `http_requests{job="app-server"} => +Inf @[%v]`, + `{job="api-server"} => +Inf @[%v]`, + `{job="app-server"} => +Inf @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) > 1000`, output: []string{ - `http_requests{job="app-server"} => 2600 @[%v]`, + `{job="app-server"} => 2600 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `1000 < SUM(http_requests) BY (job)`, output: []string{ - `http_requests{job="app-server"} => 1000 @[%v]`, + `{job="app-server"} => 1000 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) <= 1000`, output: []string{ - `http_requests{job="api-server"} => 1000 @[%v]`, + `{job="api-server"} => 1000 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) != 1000`, output: []string{ - `http_requests{job="app-server"} => 2600 @[%v]`, + `{job="app-server"} => 2600 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) == 1000`, output: []string{ - `http_requests{job="api-server"} => 1000 @[%v]`, + `{job="api-server"} => 1000 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { expr: `SUM(http_requests) BY (job) + SUM(http_requests) BY (job)`, output: []string{ - `http_requests{job="api-server"} => 2000 @[%v]`, - `http_requests{job="app-server"} => 5200 @[%v]`, + `{job="api-server"} => 2000 @[%v]`, + `{job="app-server"} => 5200 @[%v]`, }, fullRanges: 0, intervalRanges: 8, @@ -274,22 +261,22 @@ func TestExpressions(t *testing.T) { }, { expr: `http_requests{job="api-server", group="canary"} + delta(http_requests{job="api-server"}[5m], 1)`, output: []string{ - `http_requests{group="canary", instance="0", job="api-server"} => 330 @[%v]`, - `http_requests{group="canary", instance="1", job="api-server"} => 440 @[%v]`, + `{group="canary", instance="0", job="api-server"} => 330 @[%v]`, + `{group="canary", instance="1", job="api-server"} => 440 @[%v]`, }, fullRanges: 4, intervalRanges: 0, }, { expr: `delta(http_requests[25m], 1)`, output: []string{ - `http_requests{group="canary", instance="0", job="api-server"} => 150 @[%v]`, - `http_requests{group="canary", instance="0", job="app-server"} => 350 @[%v]`, - `http_requests{group="canary", instance="1", job="api-server"} => 200 @[%v]`, - `http_requests{group="canary", instance="1", job="app-server"} => 400 @[%v]`, - `http_requests{group="production", instance="0", job="api-server"} => 50 @[%v]`, - `http_requests{group="production", instance="0", job="app-server"} => 250 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 100 @[%v]`, - `http_requests{group="production", instance="1", job="app-server"} => 300 @[%v]`, + `{group="canary", instance="0", job="api-server"} => 150 @[%v]`, + `{group="canary", instance="0", job="app-server"} => 350 @[%v]`, + `{group="canary", instance="1", job="api-server"} => 200 @[%v]`, + `{group="canary", instance="1", job="app-server"} => 400 @[%v]`, + `{group="production", instance="0", job="api-server"} => 50 @[%v]`, + `{group="production", instance="0", job="app-server"} => 250 @[%v]`, + `{group="production", instance="1", job="api-server"} => 100 @[%v]`, + `{group="production", instance="1", job="app-server"} => 300 @[%v]`, }, fullRanges: 8, intervalRanges: 0, @@ -373,45 +360,45 @@ func TestExpressions(t *testing.T) { // Lower-cased aggregation operators should work too. expr: `sum(http_requests) by (job) + min(http_requests) by (job) + max(http_requests) by (job) + avg(http_requests) by (job)`, output: []string{ - `http_requests{job="app-server"} => 4550 @[%v]`, - `http_requests{job="api-server"} => 1750 @[%v]`, + `{job="app-server"} => 4550 @[%v]`, + `{job="api-server"} => 1750 @[%v]`, }, fullRanges: 0, intervalRanges: 8, }, { // Deltas should be adjusted for target interval vs. samples under target interval. expr: `delta(http_requests{group="canary", instance="1", job="app-server"}[18m], 1)`, - output: []string{`http_requests{group="canary", instance="1", job="app-server"} => 288 @[%v]`}, + output: []string{`{group="canary", instance="1", job="app-server"} => 288 @[%v]`}, fullRanges: 1, intervalRanges: 0, }, { // Rates should transform per-interval deltas to per-second rates. expr: `rate(http_requests{group="canary", instance="1", job="app-server"}[10m])`, - output: []string{`http_requests{group="canary", instance="1", job="app-server"} => 0.26666666666666666 @[%v]`}, + output: []string{`{group="canary", instance="1", job="app-server"} => 0.26666666666666666 @[%v]`}, fullRanges: 1, intervalRanges: 0, }, { // Counter resets in middle of range are ignored by delta() if counter == 1. expr: `delta(testcounter_reset_middle[50m], 1)`, - output: []string{`testcounter_reset_middle => 90 @[%v]`}, + output: []string{`{} => 90 @[%v]`}, fullRanges: 1, intervalRanges: 0, }, { // Counter resets in middle of range are not ignored by delta() if counter == 0. expr: `delta(testcounter_reset_middle[50m], 0)`, - output: []string{`testcounter_reset_middle => 50 @[%v]`}, + output: []string{`{} => 50 @[%v]`}, fullRanges: 1, intervalRanges: 0, }, { // Counter resets at end of range are ignored by delta() if counter == 1. expr: `delta(testcounter_reset_end[5m], 1)`, - output: []string{`testcounter_reset_end => 0 @[%v]`}, + output: []string{`{} => 0 @[%v]`}, fullRanges: 1, intervalRanges: 0, }, { // Counter resets at end of range are not ignored by delta() if counter == 0. expr: `delta(testcounter_reset_end[5m], 0)`, - output: []string{`testcounter_reset_end => -90 @[%v]`}, + output: []string{`{} => -90 @[%v]`}, fullRanges: 1, intervalRanges: 0, }, { @@ -483,8 +470,8 @@ func TestExpressions(t *testing.T) { { expr: `abs(-1 * http_requests{group="production",job="api-server"})`, output: []string{ - `http_requests{group="production", instance="0", job="api-server"} => 100 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 200 @[%v]`, + `{group="production", instance="0", job="api-server"} => 100 @[%v]`, + `{group="production", instance="1", job="api-server"} => 200 @[%v]`, }, fullRanges: 0, intervalRanges: 2, @@ -492,8 +479,8 @@ func TestExpressions(t *testing.T) { { expr: `avg_over_time(http_requests{group="production",job="api-server"}[1h])`, output: []string{ - `http_requests{group="production", instance="0", job="api-server"} => 50 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 100 @[%v]`, + `{group="production", instance="0", job="api-server"} => 50 @[%v]`, + `{group="production", instance="1", job="api-server"} => 100 @[%v]`, }, fullRanges: 2, intervalRanges: 0, @@ -501,8 +488,8 @@ func TestExpressions(t *testing.T) { { expr: `count_over_time(http_requests{group="production",job="api-server"}[1h])`, output: []string{ - `http_requests{group="production", instance="0", job="api-server"} => 11 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 11 @[%v]`, + `{group="production", instance="0", job="api-server"} => 11 @[%v]`, + `{group="production", instance="1", job="api-server"} => 11 @[%v]`, }, fullRanges: 2, intervalRanges: 0, @@ -510,8 +497,8 @@ func TestExpressions(t *testing.T) { { expr: `max_over_time(http_requests{group="production",job="api-server"}[1h])`, output: []string{ - `http_requests{group="production", instance="0", job="api-server"} => 100 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 200 @[%v]`, + `{group="production", instance="0", job="api-server"} => 100 @[%v]`, + `{group="production", instance="1", job="api-server"} => 200 @[%v]`, }, fullRanges: 2, intervalRanges: 0, @@ -519,8 +506,8 @@ func TestExpressions(t *testing.T) { { expr: `min_over_time(http_requests{group="production",job="api-server"}[1h])`, output: []string{ - `http_requests{group="production", instance="0", job="api-server"} => 0 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 0 @[%v]`, + `{group="production", instance="0", job="api-server"} => 0 @[%v]`, + `{group="production", instance="1", job="api-server"} => 0 @[%v]`, }, fullRanges: 2, intervalRanges: 0, @@ -528,8 +515,8 @@ func TestExpressions(t *testing.T) { { expr: `sum_over_time(http_requests{group="production",job="api-server"}[1h])`, output: []string{ - `http_requests{group="production", instance="0", job="api-server"} => 550 @[%v]`, - `http_requests{group="production", instance="1", job="api-server"} => 1100 @[%v]`, + `{group="production", instance="0", job="api-server"} => 550 @[%v]`, + `{group="production", instance="1", job="api-server"} => 1100 @[%v]`, }, fullRanges: 2, intervalRanges: 0, @@ -582,8 +569,8 @@ func TestExpressions(t *testing.T) { // Test alternative "by"-clause order. expr: `sum by (group) (http_requests{job="api-server"})`, output: []string{ - `http_requests{group="canary"} => 700 @[%v]`, - `http_requests{group="production"} => 300 @[%v]`, + `{group="canary"} => 700 @[%v]`, + `{group="production"} => 300 @[%v]`, }, fullRanges: 0, intervalRanges: 4, @@ -592,8 +579,8 @@ func TestExpressions(t *testing.T) { // Test alternative "by"-clause order with "keeping_extra". expr: `sum by (group) keeping_extra (http_requests{job="api-server"})`, output: []string{ - `http_requests{group="canary", job="api-server"} => 700 @[%v]`, - `http_requests{group="production", job="api-server"} => 300 @[%v]`, + `{group="canary", job="api-server"} => 700 @[%v]`, + `{group="production", job="api-server"} => 300 @[%v]`, }, fullRanges: 0, intervalRanges: 4, @@ -604,16 +591,55 @@ func TestExpressions(t *testing.T) { // in an organization), or risk serious user confusion. expr: `sum(sum by (group) keeping_extra (http_requests{job="api-server"})) by (job)`, output: []string{ - `http_requests{job="api-server"} => 1000 @[%v]`, + `{job="api-server"} => 1000 @[%v]`, }, fullRanges: 0, intervalRanges: 4, }, + { + expr: `absent(nonexistent)`, + output: []string{ + `{} => 1 @[%v]`, + }, + fullRanges: 0, + intervalRanges: 0, + }, + { + expr: `absent(nonexistent{job="testjob", instance="testinstance", method=~".*"})`, + output: []string{ + `{instance="testinstance", job="testjob"} => 1 @[%v]`, + }, + fullRanges: 0, + intervalRanges: 0, + }, + { + expr: `count_scalar(absent(http_requests))`, + output: []string{ + `scalar: 0 @[%v]`, + }, + fullRanges: 0, + intervalRanges: 8, + }, + { + expr: `count_scalar(absent(sum(http_requests)))`, + output: []string{ + `scalar: 0 @[%v]`, + }, + fullRanges: 0, + intervalRanges: 8, + }, + { + expr: `absent(sum(nonexistent{job="testjob", instance="testinstance"}))`, + output: []string{ + `{} => 1 @[%v]`, + }, + fullRanges: 0, + intervalRanges: 0, + }, } - tieredStorage, closer := newTestStorage(t) + storage, closer := newTestStorage(t) defer closer.Close() - tieredStorage.Flush() for i, exprTest := range expressionTests { expectedLines := annotateWithTime(exprTest.output, testEvalTime) @@ -631,7 +657,7 @@ func TestExpressions(t *testing.T) { t.Errorf("%d. Test should fail, but didn't", i) } failed := false - resultStr := ast.EvalToString(testExpr, testEvalTime, ast.TEXT, tieredStorage, stats.NewTimerGroup()) + resultStr := ast.EvalToString(testExpr, testEvalTime, ast.TEXT, storage, stats.NewTimerGroup()) resultLines := strings.Split(resultStr, "\n") if len(exprTest.output) != len(resultLines) { @@ -661,8 +687,8 @@ func TestExpressions(t *testing.T) { } } - analyzer := ast.NewQueryAnalyzer(tieredStorage) - analyzer.AnalyzeQueries(testExpr) + analyzer := ast.NewQueryAnalyzer(storage) + ast.Walk(analyzer, testExpr) if exprTest.fullRanges != len(analyzer.FullRanges) { t.Errorf("%d. Count of full ranges didn't match: %v vs %v", i, exprTest.fullRanges, len(analyzer.FullRanges)) failed = true @@ -771,9 +797,8 @@ func TestAlertingRule(t *testing.T) { }, } - tieredStorage, closer := newTestStorage(t) + storage, closer := newTestStorage(t) defer closer.Close() - tieredStorage.Flush() alertExpr, err := LoadExprFromString(`http_requests{group="canary", job="app-server"} < 100`) if err != nil { @@ -787,7 +812,7 @@ func TestAlertingRule(t *testing.T) { for i, expected := range evalOutputs { evalTime := testStartTime.Add(testSampleInterval * time.Duration(i)) - actual, err := rule.Eval(evalTime, tieredStorage) + actual, err := rule.Eval(evalTime, storage) if err != nil { t.Fatalf("Error during alerting rule evaluation: %s", err) } diff --git a/stats/query_stats.go b/stats/query_stats.go index 4fa73cac09..b838a2b7d7 100644 --- a/stats/query_stats.go +++ b/stats/query_stats.go @@ -22,8 +22,8 @@ const ( TotalEvalTime QueryTiming = iota ResultSortTime JsonEncodeTime - TotalViewBuildingTime - ViewRequestBuildTime + PreloadTime + TotalQueryPreparationTime InnerViewBuildingTime InnerEvalTime ResultAppendTime @@ -46,10 +46,10 @@ func (s QueryTiming) String() string { return "Result sorting time" case JsonEncodeTime: return "JSON encoding time" - case TotalViewBuildingTime: - return "Total view building time" - case ViewRequestBuildTime: - return "View request building time" + case PreloadTime: + return "Query preloading time" + case TotalQueryPreparationTime: + return "Total query preparation time" case InnerViewBuildingTime: return "Inner view building time" case InnerEvalTime: diff --git a/storage/interface.go b/storage/interface.go deleted file mode 100644 index 36dca39dab..0000000000 --- a/storage/interface.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package storage - -// RecordDecoder decodes each key-value pair in the database. The protocol -// around it makes the assumption that the underlying implementation is -// concurrency safe. -type RecordDecoder interface { - DecodeKey(in interface{}) (out interface{}, err error) - DecodeValue(in interface{}) (out interface{}, err error) -} - -// FilterResult describes the record matching and scanning behavior for the -// database. -type FilterResult int - -const ( - // Stop scanning the database. - Stop FilterResult = iota - // Skip this record but continue scanning. - Skip - // Accept this record for the Operator. - Accept -) - -func (f FilterResult) String() string { - switch f { - case Stop: - return "STOP" - case Skip: - return "SKIP" - case Accept: - return "ACCEPT" - } - - panic("unknown") -} - -// OperatorError is used for storage operations upon errors that may or may not -// be continuable. -type OperatorError struct { - Error error - Continuable bool -} - -// RecordFilter is responsible for controlling the behavior of the database scan -// process and determines the disposition of various records. -// -// The protocol around it makes the assumption that the underlying -// implementation is concurrency safe. -type RecordFilter interface { - // Filter receives the key and value as decoded from the RecordDecoder type. - Filter(key, value interface{}) (filterResult FilterResult) -} - -// RecordOperator is responsible for taking action upon each entity that is -// passed to it. -// -// The protocol around it makes the assumption that the underlying -// implementation is concurrency safe. -type RecordOperator interface { - // Take action on a given record. If the action returns an error, the entire - // scan process stops. - Operate(key, value interface{}) (err *OperatorError) -} diff --git a/storage/local/chunk.go b/storage/local/chunk.go new file mode 100644 index 0000000000..c0f47d4679 --- /dev/null +++ b/storage/local/chunk.go @@ -0,0 +1,233 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "container/list" + "io" + "sync" + "sync/atomic" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/metric" +) + +// chunkDesc contains meta-data for a chunk. Many of its methods are +// goroutine-safe proxies for chunk methods. +type chunkDesc struct { + sync.Mutex + chunk chunk // nil if chunk is evicted. + refCount int + chunkFirstTime clientmodel.Timestamp // Used if chunk is evicted. + chunkLastTime clientmodel.Timestamp // Used if chunk is evicted. + + // evictListElement is nil if the chunk is not in the evict list. + // evictListElement is _not_ protected by the chunkDesc mutex. + // It must only be touched by the evict list handler in memorySeriesStorage. + evictListElement *list.Element +} + +// newChunkDesc creates a new chunkDesc pointing to the provided chunk. The +// provided chunk is assumed to be not persisted yet. Therefore, the refCount of +// the new chunkDesc is 1 (preventing eviction prior to persisting). +func newChunkDesc(c chunk) *chunkDesc { + chunkOps.WithLabelValues(createAndPin).Inc() + atomic.AddInt64(&numMemChunks, 1) + // TODO: numMemChunkDescs is actually never read except during metrics + // collection. Turn it into a real metric. + atomic.AddInt64(&numMemChunkDescs, 1) + return &chunkDesc{chunk: c, refCount: 1} +} + +func (cd *chunkDesc) add(s *metric.SamplePair) []chunk { + cd.Lock() + defer cd.Unlock() + + return cd.chunk.add(s) +} + +// pin increments the refCount by one. Upon increment from 0 to 1, this +// chunkDesc is removed from the evict list. To enable the latter, the +// evictRequests channel has to be provided. +func (cd *chunkDesc) pin(evictRequests chan<- evictRequest) { + cd.Lock() + defer cd.Unlock() + + if cd.refCount == 0 { + // Remove ourselves from the evict list. + evictRequests <- evictRequest{cd, false} + } + cd.refCount++ +} + +// unpin decrements the refCount by one. Upon decrement from 1 to 0, this +// chunkDesc is added to the evict list. To enable the latter, the evictRequests +// channel has to be provided. +func (cd *chunkDesc) unpin(evictRequests chan<- evictRequest) { + cd.Lock() + defer cd.Unlock() + + if cd.refCount == 0 { + panic("cannot unpin already unpinned chunk") + } + cd.refCount-- + if cd.refCount == 0 { + // Add ourselves to the back of the evict list. + evictRequests <- evictRequest{cd, true} + } +} + +func (cd *chunkDesc) getRefCount() int { + cd.Lock() + defer cd.Unlock() + + return cd.refCount +} + +func (cd *chunkDesc) firstTime() clientmodel.Timestamp { + cd.Lock() + defer cd.Unlock() + + if cd.chunk == nil { + return cd.chunkFirstTime + } + return cd.chunk.firstTime() +} + +func (cd *chunkDesc) lastTime() clientmodel.Timestamp { + cd.Lock() + defer cd.Unlock() + + if cd.chunk == nil { + return cd.chunkLastTime + } + return cd.chunk.lastTime() +} + +func (cd *chunkDesc) isEvicted() bool { + cd.Lock() + defer cd.Unlock() + + return cd.chunk == nil +} + +func (cd *chunkDesc) contains(t clientmodel.Timestamp) bool { + return !t.Before(cd.firstTime()) && !t.After(cd.lastTime()) +} + +func (cd *chunkDesc) setChunk(c chunk) { + cd.Lock() + defer cd.Unlock() + + if cd.chunk != nil { + panic("chunk already set") + } + cd.chunk = c +} + +// maybeEvict evicts the chunk if the refCount is 0. It returns whether the chunk +// is now evicted, which includes the case that the chunk was evicted even +// before this method was called. +func (cd *chunkDesc) maybeEvict() bool { + cd.Lock() + defer cd.Unlock() + + if cd.chunk == nil { + return true + } + if cd.refCount != 0 { + return false + } + cd.chunkFirstTime = cd.chunk.firstTime() + cd.chunkLastTime = cd.chunk.lastTime() + cd.chunk = nil + chunkOps.WithLabelValues(evict).Inc() + atomic.AddInt64(&numMemChunks, -1) + return true +} + +// chunk is the interface for all chunks. Chunks are generally not +// goroutine-safe. +type chunk interface { + // add adds a SamplePair to the chunks, performs any necessary + // re-encoding, and adds any necessary overflow chunks. It returns the + // new version of the original chunk, followed by overflow chunks, if + // any. The first chunk returned might be the same as the original one + // or a newly allocated version. In any case, take the returned chunk as + // the relevant one and discard the orginal chunk. + add(*metric.SamplePair) []chunk + clone() chunk + firstTime() clientmodel.Timestamp + lastTime() clientmodel.Timestamp + newIterator() chunkIterator + marshal(io.Writer) error + unmarshal(io.Reader) error + // values returns a channel, from which all sample values in the chunk + // can be received in order. The channel is closed after the last + // one. It is generally not safe to mutate the chunk while the channel + // is still open. + values() <-chan *metric.SamplePair +} + +// A chunkIterator enables efficient access to the content of a chunk. It is +// generally not safe to use a chunkIterator concurrently with or after chunk +// mutation. +type chunkIterator interface { + // Gets the two values that are immediately adjacent to a given time. In + // case a value exist at precisely the given time, only that single + // value is returned. Only the first or last value is returned (as a + // single value), if the given time is before or after the first or last + // value, respectively. + getValueAtTime(clientmodel.Timestamp) metric.Values + // Gets all values contained within a given interval. + getRangeValues(metric.Interval) metric.Values + // Whether a given timestamp is contained between first and last value + // in the chunk. + contains(clientmodel.Timestamp) bool +} + +func transcodeAndAdd(dst chunk, src chunk, s *metric.SamplePair) []chunk { + chunkOps.WithLabelValues(transcode).Inc() + + head := dst + body := []chunk{} + for v := range src.values() { + newChunks := head.add(v) + body = append(body, newChunks[:len(newChunks)-1]...) + head = newChunks[len(newChunks)-1] + } + newChunks := head.add(s) + body = append(body, newChunks[:len(newChunks)-1]...) + head = newChunks[len(newChunks)-1] + return append(body, head) +} + +func chunkType(c chunk) byte { + switch c.(type) { + case *deltaEncodedChunk: + return 0 + default: + panic("unknown chunk type") + } +} + +func chunkForType(chunkType byte) chunk { + switch chunkType { + case 0: + return newDeltaEncodedChunk(d1, d0, true) + default: + panic("unknown chunk type") + } +} diff --git a/storage/local/codable/codable.go b/storage/local/codable/codable.go new file mode 100644 index 0000000000..e8ff209aad --- /dev/null +++ b/storage/local/codable/codable.go @@ -0,0 +1,436 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package codable provides types that implement encoding.BinaryMarshaler and +// encoding.BinaryUnmarshaler and functions that help to encode and decode +// primitives. The Prometheus storage backend uses them to persist objects to +// files and to save objects in LevelDB. +// +// The encodings used in this package are designed in a way that objects can be +// unmarshaled from a continuous byte stream, i.e. the information when to stop +// reading is determined by the format. No separate termination information is +// needed. +// +// Strings are encoded as the length of their bytes as a varint followed by +// their bytes. +// +// Slices are encoded as their length as a varint followed by their elements. +// +// Maps are encoded as the number of mappings as a varint, followed by the +// mappings, each of which consists of the key followed by the value. +package codable + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "sync" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/metric" +) + +// A byteReader is an io.ByteReader that also implements the vanilla io.Reader +// interface. +type byteReader interface { + io.Reader + io.ByteReader +} + +// bufPool is a pool for staging buffers. Using a pool allows concurrency-safe +// reuse of buffers +var bufPool sync.Pool + +// getBuf returns a buffer from the pool. The length of the returned slice is l. +func getBuf(l int) []byte { + x := bufPool.Get() + if x == nil { + return make([]byte, l) + } + buf := x.([]byte) + if cap(buf) < l { + return make([]byte, l) + } + return buf[:l] +} + +// putBuf returns a buffer to the pool. +func putBuf(buf []byte) { + bufPool.Put(buf) +} + +// EncodeVarint encodes an int64 as a varint and writes it to an io.Writer. +// It returns the number of bytes written. +// This is a GC-friendly implementation that takes the required staging buffer +// from a buffer pool. +func EncodeVarint(w io.Writer, i int64) (int, error) { + buf := getBuf(binary.MaxVarintLen64) + defer putBuf(buf) + + bytesWritten := binary.PutVarint(buf, i) + _, err := w.Write(buf[:bytesWritten]) + return bytesWritten, err +} + +// EncodeUint64 writes an uint64 to an io.Writer in big-endian byte-order. +// This is a GC-friendly implementation that takes the required staging buffer +// from a buffer pool. +func EncodeUint64(w io.Writer, u uint64) error { + buf := getBuf(8) + defer putBuf(buf) + + binary.BigEndian.PutUint64(buf, u) + _, err := w.Write(buf) + return err +} + +// DecodeUint64 reads an uint64 from an io.Reader in big-endian byte-order. +// This is a GC-friendly implementation that takes the required staging buffer +// from a buffer pool. +func DecodeUint64(r io.Reader) (uint64, error) { + buf := getBuf(8) + defer putBuf(buf) + + if _, err := io.ReadFull(r, buf); err != nil { + return 0, err + } + return binary.BigEndian.Uint64(buf), nil +} + +// encodeString writes the varint encoded length followed by the bytes of s to +// b. +func encodeString(b *bytes.Buffer, s string) error { + if _, err := EncodeVarint(b, int64(len(s))); err != nil { + return err + } + if _, err := b.WriteString(s); err != nil { + return err + } + return nil +} + +// decodeString decodes a string encoded by encodeString. +func decodeString(b byteReader) (string, error) { + length, err := binary.ReadVarint(b) + if err != nil { + return "", err + } + + buf := getBuf(int(length)) + defer putBuf(buf) + + if _, err := io.ReadFull(b, buf); err != nil { + return "", err + } + return string(buf), nil +} + +// A Metric is a clientmodel.Metric that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. +type Metric clientmodel.Metric + +// MarshalBinary implements encoding.BinaryMarshaler. +func (m Metric) MarshalBinary() ([]byte, error) { + buf := &bytes.Buffer{} + if _, err := EncodeVarint(buf, int64(len(m))); err != nil { + return nil, err + } + for l, v := range m { + if err := encodeString(buf, string(l)); err != nil { + return nil, err + } + if err := encodeString(buf, string(v)); err != nil { + return nil, err + } + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. It can be used with the +// zero value of Metric. +func (m *Metric) UnmarshalBinary(buf []byte) error { + return m.UnmarshalFromReader(bytes.NewReader(buf)) +} + +// UnmarshalFromReader unmarshals a Metric from a reader that implements +// both, io.Reader and io.ByteReader. It can be used with the zero value of +// Metric. +func (m *Metric) UnmarshalFromReader(r byteReader) error { + numLabelPairs, err := binary.ReadVarint(r) + if err != nil { + return err + } + *m = make(Metric, numLabelPairs) + + for ; numLabelPairs > 0; numLabelPairs-- { + ln, err := decodeString(r) + if err != nil { + return err + } + lv, err := decodeString(r) + if err != nil { + return err + } + (*m)[clientmodel.LabelName(ln)] = clientmodel.LabelValue(lv) + } + return nil +} + +// A Fingerprint is a clientmodel.Fingerprint that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. The implementation +// depends on clientmodel.Fingerprint to be convertible to uint64. It encodes +// the fingerprint as a big-endian uint64. +type Fingerprint clientmodel.Fingerprint + +// MarshalBinary implements encoding.BinaryMarshaler. +func (fp Fingerprint) MarshalBinary() ([]byte, error) { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, uint64(fp)) + return b, nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (fp *Fingerprint) UnmarshalBinary(buf []byte) error { + *fp = Fingerprint(binary.BigEndian.Uint64(buf)) + return nil +} + +// FingerprintSet is a map[clientmodel.Fingerprint]struct{} that +// implements encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its +// binary form is identical to that of Fingerprints. +type FingerprintSet map[clientmodel.Fingerprint]struct{} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (fps FingerprintSet) MarshalBinary() ([]byte, error) { + b := make([]byte, binary.MaxVarintLen64+len(fps)*8) + lenBytes := binary.PutVarint(b, int64(len(fps))) + offset := lenBytes + + for fp := range fps { + binary.BigEndian.PutUint64(b[offset:], uint64(fp)) + offset += 8 + } + return b[:len(fps)*8+lenBytes], nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (fps *FingerprintSet) UnmarshalBinary(buf []byte) error { + numFPs, offset := binary.Varint(buf) + if offset <= 0 { + return fmt.Errorf("could not decode length of Fingerprints, varint decoding returned %d", offset) + } + *fps = make(FingerprintSet, numFPs) + + for i := 0; i < int(numFPs); i++ { + (*fps)[clientmodel.Fingerprint(binary.BigEndian.Uint64(buf[offset+i*8:]))] = struct{}{} + } + return nil +} + +// Fingerprints is a clientmodel.Fingerprints that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its binary form is +// identical to that of FingerprintSet. +type Fingerprints clientmodel.Fingerprints + +// MarshalBinary implements encoding.BinaryMarshaler. +func (fps Fingerprints) MarshalBinary() ([]byte, error) { + b := make([]byte, binary.MaxVarintLen64+len(fps)*8) + lenBytes := binary.PutVarint(b, int64(len(fps))) + + for i, fp := range fps { + binary.BigEndian.PutUint64(b[i*8+lenBytes:], uint64(fp)) + } + return b[:len(fps)*8+lenBytes], nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (fps *Fingerprints) UnmarshalBinary(buf []byte) error { + numFPs, offset := binary.Varint(buf) + if offset <= 0 { + return fmt.Errorf("could not decode length of Fingerprints, varint decoding returned %d", offset) + } + *fps = make(Fingerprints, numFPs) + + for i := range *fps { + (*fps)[i] = clientmodel.Fingerprint(binary.BigEndian.Uint64(buf[offset+i*8:])) + } + return nil +} + +// LabelPair is a metric.LabelPair that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. +type LabelPair metric.LabelPair + +// MarshalBinary implements encoding.BinaryMarshaler. +func (lp LabelPair) MarshalBinary() ([]byte, error) { + buf := &bytes.Buffer{} + if err := encodeString(buf, string(lp.Name)); err != nil { + return nil, err + } + if err := encodeString(buf, string(lp.Value)); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (lp *LabelPair) UnmarshalBinary(buf []byte) error { + r := bytes.NewReader(buf) + n, err := decodeString(r) + if err != nil { + return err + } + v, err := decodeString(r) + if err != nil { + return err + } + lp.Name = clientmodel.LabelName(n) + lp.Value = clientmodel.LabelValue(v) + return nil +} + +// LabelName is a clientmodel.LabelName that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. +type LabelName clientmodel.LabelName + +// MarshalBinary implements encoding.BinaryMarshaler. +func (l LabelName) MarshalBinary() ([]byte, error) { + buf := &bytes.Buffer{} + if err := encodeString(buf, string(l)); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (l *LabelName) UnmarshalBinary(buf []byte) error { + r := bytes.NewReader(buf) + n, err := decodeString(r) + if err != nil { + return err + } + *l = LabelName(n) + return nil +} + +// LabelValueSet is a map[clientmodel.LabelValue]struct{} that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its binary form is +// identical to that of LabelValues. +type LabelValueSet map[clientmodel.LabelValue]struct{} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (vs LabelValueSet) MarshalBinary() ([]byte, error) { + buf := &bytes.Buffer{} + if _, err := EncodeVarint(buf, int64(len(vs))); err != nil { + return nil, err + } + for v := range vs { + if err := encodeString(buf, string(v)); err != nil { + return nil, err + } + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (vs *LabelValueSet) UnmarshalBinary(buf []byte) error { + r := bytes.NewReader(buf) + numValues, err := binary.ReadVarint(r) + if err != nil { + return err + } + *vs = make(LabelValueSet, numValues) + + for i := int64(0); i < numValues; i++ { + v, err := decodeString(r) + if err != nil { + return err + } + (*vs)[clientmodel.LabelValue(v)] = struct{}{} + } + return nil +} + +// LabelValues is a clientmodel.LabelValues that implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its binary form is +// identical to that of LabelValueSet. +type LabelValues clientmodel.LabelValues + +// MarshalBinary implements encoding.BinaryMarshaler. +func (vs LabelValues) MarshalBinary() ([]byte, error) { + buf := &bytes.Buffer{} + if _, err := EncodeVarint(buf, int64(len(vs))); err != nil { + return nil, err + } + for _, v := range vs { + if err := encodeString(buf, string(v)); err != nil { + return nil, err + } + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (vs *LabelValues) UnmarshalBinary(buf []byte) error { + r := bytes.NewReader(buf) + numValues, err := binary.ReadVarint(r) + if err != nil { + return err + } + *vs = make(LabelValues, numValues) + + for i := range *vs { + v, err := decodeString(r) + if err != nil { + return err + } + (*vs)[i] = clientmodel.LabelValue(v) + } + return nil +} + +// TimeRange is used to define a time range and implements +// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. +type TimeRange struct { + First, Last clientmodel.Timestamp +} + +// MarshalBinary implements encoding.BinaryMarshaler. +func (tr TimeRange) MarshalBinary() ([]byte, error) { + buf := &bytes.Buffer{} + if _, err := EncodeVarint(buf, int64(tr.First)); err != nil { + return nil, err + } + if _, err := EncodeVarint(buf, int64(tr.Last)); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +// UnmarshalBinary implements encoding.BinaryUnmarshaler. +func (tr *TimeRange) UnmarshalBinary(buf []byte) error { + r := bytes.NewReader(buf) + first, err := binary.ReadVarint(r) + if err != nil { + return err + } + last, err := binary.ReadVarint(r) + if err != nil { + return err + } + tr.First = clientmodel.Timestamp(first) + tr.Last = clientmodel.Timestamp(last) + return nil +} diff --git a/storage/local/codable/codable_test.go b/storage/local/codable/codable_test.go new file mode 100644 index 0000000000..14ee8c15a4 --- /dev/null +++ b/storage/local/codable/codable_test.go @@ -0,0 +1,165 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package codable + +import ( + "bytes" + "encoding" + "reflect" + "testing" +) + +func newFingerprint(fp int64) *Fingerprint { + cfp := Fingerprint(fp) + return &cfp +} + +func newLabelName(ln string) *LabelName { + cln := LabelName(ln) + return &cln +} + +func TestUint64(t *testing.T) { + var b bytes.Buffer + const n = 422010471112345 + if err := EncodeUint64(&b, n); err != nil { + t.Fatal(err) + } + got, err := DecodeUint64(&b) + if err != nil { + t.Fatal(err) + } + if got != n { + t.Errorf("want %d, got %d", n, got) + } +} + +var scenarios = []struct { + in encoding.BinaryMarshaler + out encoding.BinaryUnmarshaler + equal func(in, out interface{}) bool +}{ + { + in: &Metric{ + "label_1": "value_2", + "label_2": "value_2", + "label_3": "value_3", + }, + out: &Metric{}, + }, { + in: newFingerprint(12345), + out: newFingerprint(0), + }, { + in: &Fingerprints{1, 2, 56, 1234}, + out: &Fingerprints{}, + }, { + in: &Fingerprints{1, 2, 56, 1234}, + out: &FingerprintSet{}, + equal: func(in, out interface{}) bool { + inSet := FingerprintSet{} + for _, fp := range *(in.(*Fingerprints)) { + inSet[fp] = struct{}{} + } + return reflect.DeepEqual(inSet, *(out.(*FingerprintSet))) + }, + }, { + in: &FingerprintSet{ + 1: struct{}{}, + 2: struct{}{}, + 56: struct{}{}, + 1234: struct{}{}, + }, + out: &FingerprintSet{}, + }, { + in: &FingerprintSet{ + 1: struct{}{}, + 2: struct{}{}, + 56: struct{}{}, + 1234: struct{}{}, + }, + out: &Fingerprints{}, + equal: func(in, out interface{}) bool { + outSet := FingerprintSet{} + for _, fp := range *(out.(*Fingerprints)) { + outSet[fp] = struct{}{} + } + return reflect.DeepEqual(outSet, *(in.(*FingerprintSet))) + }, + }, { + in: &LabelPair{ + Name: "label_name", + Value: "label_value", + }, + out: &LabelPair{}, + }, { + in: newLabelName("label_name"), + out: newLabelName(""), + }, { + in: &LabelValues{"value_1", "value_2", "value_3"}, + out: &LabelValues{}, + }, { + in: &LabelValues{"value_1", "value_2", "value_3"}, + out: &LabelValueSet{}, + equal: func(in, out interface{}) bool { + inSet := LabelValueSet{} + for _, lv := range *(in.(*LabelValues)) { + inSet[lv] = struct{}{} + } + return reflect.DeepEqual(inSet, *(out.(*LabelValueSet))) + }, + }, { + in: &LabelValueSet{ + "value_1": struct{}{}, + "value_2": struct{}{}, + "value_3": struct{}{}, + }, + out: &LabelValueSet{}, + }, { + in: &LabelValueSet{ + "value_1": struct{}{}, + "value_2": struct{}{}, + "value_3": struct{}{}, + }, + out: &LabelValues{}, + equal: func(in, out interface{}) bool { + outSet := LabelValueSet{} + for _, lv := range *(out.(*LabelValues)) { + outSet[lv] = struct{}{} + } + return reflect.DeepEqual(outSet, *(in.(*LabelValueSet))) + }, + }, { + in: &TimeRange{42, 2001}, + out: &TimeRange{}, + }, +} + +func TestCodec(t *testing.T) { + for i, s := range scenarios { + encoded, err := s.in.MarshalBinary() + if err != nil { + t.Fatal(err) + } + if err := s.out.UnmarshalBinary(encoded); err != nil { + t.Fatal(err) + } + equal := s.equal + if equal == nil { + equal = reflect.DeepEqual + } + if !equal(s.in, s.out) { + t.Errorf("%d. Got: %v; want %v; encoded bytes are: %v", i, s.out, s.in, encoded) + } + } +} diff --git a/storage/local/delta.go b/storage/local/delta.go new file mode 100644 index 0000000000..15d8d6e3ae --- /dev/null +++ b/storage/local/delta.go @@ -0,0 +1,423 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "encoding/binary" + "fmt" + "io" + "math" + "sort" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/metric" +) + +type deltaBytes byte + +const ( + d0 deltaBytes = 0 + d1 = 1 + d2 = 2 + d4 = 4 + d8 = 8 +) + +// The 21-byte header of a delta-encoded chunk looks like: +// +// - time delta bytes: 1 bytes +// - value delta bytes: 1 bytes +// - is integer: 1 byte +// - base time: 8 bytes +// - base value: 8 bytes +// - used buf bytes: 2 bytes +const ( + deltaHeaderBytes = 21 + + deltaHeaderTimeBytesOffset = 0 + deltaHeaderValueBytesOffset = 1 + deltaHeaderIsIntOffset = 2 + deltaHeaderBaseTimeOffset = 3 + deltaHeaderBaseValueOffset = 11 + deltaHeaderBufLenOffset = 19 +) + +// A deltaEncodedChunk adaptively stores sample timestamps and values with a +// delta encoding of various types (int, float) and bit width. However, once 8 +// bytes would be needed to encode a delta value, a fall-back to the absolute +// numbers happens (so that timestamps are saved directly as int64 and values as +// float64). It implements the chunk interface. +type deltaEncodedChunk struct { + buf []byte +} + +// newDeltaEncodedChunk returns a newly allocated deltaEncodedChunk. +func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool) *deltaEncodedChunk { + buf := make([]byte, deltaHeaderIsIntOffset+1, 1024) + + buf[deltaHeaderTimeBytesOffset] = byte(tb) + buf[deltaHeaderValueBytesOffset] = byte(vb) + if vb < d8 && isInt { // Only use int for fewer than 8 value delta bytes. + buf[deltaHeaderIsIntOffset] = 1 + } else { + buf[deltaHeaderIsIntOffset] = 0 + } + + return &deltaEncodedChunk{ + buf: buf, + } +} + +func (c *deltaEncodedChunk) newFollowupChunk() chunk { + return newDeltaEncodedChunk(d1, d0, true) +} + +// clone implements chunk. +func (c *deltaEncodedChunk) clone() chunk { + buf := make([]byte, len(c.buf), 1024) + copy(buf, c.buf) + return &deltaEncodedChunk{ + buf: buf, + } +} + +func neededDeltaBytes(deltaT clientmodel.Timestamp, deltaV clientmodel.SampleValue, isInt bool) (dtb, dvb deltaBytes) { + dtb = d1 + if deltaT > math.MaxUint8 { + dtb = d2 + } + if deltaT > math.MaxUint16 { + dtb = d4 + } + if deltaT > math.MaxUint32 { + dtb = d8 + } + + if isInt { + dvb = d0 + if deltaV != 0 { + dvb = d1 + } + if deltaV < math.MinInt8 || deltaV > math.MaxInt8 { + dvb = d2 + } + if deltaV < math.MinInt16 || deltaV > math.MaxInt16 { + dvb = d4 + } + if deltaV < math.MinInt32 || deltaV > math.MaxInt32 { + dvb = d8 + } + } else { + dvb = d4 + if clientmodel.SampleValue(float32(deltaV)) != deltaV { + dvb = d8 + } + } + return dtb, dvb +} + +func max(a, b deltaBytes) deltaBytes { + if a > b { + return a + } + return b +} + +func (c *deltaEncodedChunk) timeBytes() deltaBytes { + return deltaBytes(c.buf[deltaHeaderTimeBytesOffset]) +} + +func (c *deltaEncodedChunk) valueBytes() deltaBytes { + return deltaBytes(c.buf[deltaHeaderValueBytesOffset]) +} + +func (c *deltaEncodedChunk) isInt() bool { + return c.buf[deltaHeaderIsIntOffset] == 1 +} + +func (c *deltaEncodedChunk) baseTime() clientmodel.Timestamp { + return clientmodel.Timestamp(binary.LittleEndian.Uint64(c.buf[deltaHeaderBaseTimeOffset:])) +} + +func (c *deltaEncodedChunk) baseValue() clientmodel.SampleValue { + return clientmodel.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c.buf[deltaHeaderBaseValueOffset:]))) +} + +// add implements chunk. +func (c *deltaEncodedChunk) add(s *metric.SamplePair) []chunk { + if len(c.buf) < deltaHeaderBytes { + c.buf = c.buf[:deltaHeaderBytes] + binary.LittleEndian.PutUint64(c.buf[deltaHeaderBaseTimeOffset:], uint64(s.Timestamp)) + binary.LittleEndian.PutUint64(c.buf[deltaHeaderBaseValueOffset:], math.Float64bits(float64(s.Value))) + } + + remainingBytes := cap(c.buf) - len(c.buf) + sampleSize := c.sampleSize() + + // Do we generally have space for another sample in this chunk? If not, + // overflow into a new one. + if remainingBytes < sampleSize { + overflowChunks := c.newFollowupChunk().add(s) + return []chunk{c, overflowChunks[0]} + } + + dt := s.Timestamp - c.baseTime() + dv := s.Value - c.baseValue() + tb := c.timeBytes() + vb := c.valueBytes() + + // If the new sample is incompatible with the current encoding, reencode the + // existing chunk data into new chunk(s). + // + // int->float. + // Note: Using math.Modf is slower than the conversion approach below. + if c.isInt() && clientmodel.SampleValue(int64(dv)) != dv { + return transcodeAndAdd(newDeltaEncodedChunk(tb, d4, false), c, s) + } + // float32->float64. + if !c.isInt() && vb == d4 && clientmodel.SampleValue(float32(dv)) != dv { + return transcodeAndAdd(newDeltaEncodedChunk(tb, d8, false), c, s) + } + if tb < d8 || vb < d8 { + // Maybe more bytes per sample. + if ntb, nvb := neededDeltaBytes(dt, dv, c.isInt()); ntb > tb || nvb > vb { + ntb = max(ntb, tb) + nvb = max(nvb, vb) + return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, c.isInt()), c, s) + } + } + offset := len(c.buf) + c.buf = c.buf[:offset+sampleSize] + + switch tb { + case d1: + c.buf[offset] = byte(dt) + case d2: + binary.LittleEndian.PutUint16(c.buf[offset:], uint16(dt)) + case d4: + binary.LittleEndian.PutUint32(c.buf[offset:], uint32(dt)) + case d8: + // Store the absolute value (no delta) in case of d8. + binary.LittleEndian.PutUint64(c.buf[offset:], uint64(s.Timestamp)) + default: + panic("invalid number of bytes for time delta") + } + + offset += int(tb) + + if c.isInt() { + switch vb { + case d0: + // No-op. Constant value is stored as base value. + case d1: + c.buf[offset] = byte(dv) + case d2: + binary.LittleEndian.PutUint16(c.buf[offset:], uint16(dv)) + case d4: + binary.LittleEndian.PutUint32(c.buf[offset:], uint32(dv)) + // d8 must not happen. Those samples are encoded as float64. + default: + panic("invalid number of bytes for integer delta") + } + } else { + switch vb { + case d4: + binary.LittleEndian.PutUint32(c.buf[offset:], math.Float32bits(float32(dv))) + case d8: + // Store the absolute value (no delta) in case of d8. + binary.LittleEndian.PutUint64(c.buf[offset:], math.Float64bits(float64(s.Value))) + default: + panic("invalid number of bytes for floating point delta") + } + } + return []chunk{c} +} + +func (c *deltaEncodedChunk) sampleSize() int { + return int(c.timeBytes() + c.valueBytes()) +} + +func (c *deltaEncodedChunk) len() int { + if len(c.buf) < deltaHeaderBytes { + return 0 + } + return (len(c.buf) - deltaHeaderBytes) / c.sampleSize() +} + +// values implements chunk. +func (c *deltaEncodedChunk) values() <-chan *metric.SamplePair { + n := c.len() + valuesChan := make(chan *metric.SamplePair) + go func() { + for i := 0; i < n; i++ { + valuesChan <- c.valueAtIndex(i) + } + close(valuesChan) + }() + return valuesChan +} + +func (c *deltaEncodedChunk) valueAtIndex(idx int) *metric.SamplePair { + offset := deltaHeaderBytes + idx*c.sampleSize() + + var ts clientmodel.Timestamp + switch c.timeBytes() { + case d1: + ts = c.baseTime() + clientmodel.Timestamp(uint8(c.buf[offset])) + case d2: + ts = c.baseTime() + clientmodel.Timestamp(binary.LittleEndian.Uint16(c.buf[offset:])) + case d4: + ts = c.baseTime() + clientmodel.Timestamp(binary.LittleEndian.Uint32(c.buf[offset:])) + case d8: + // Take absolute value for d8. + ts = clientmodel.Timestamp(binary.LittleEndian.Uint64(c.buf[offset:])) + default: + panic("Invalid number of bytes for time delta") + } + + offset += int(c.timeBytes()) + + var v clientmodel.SampleValue + if c.isInt() { + switch c.valueBytes() { + case d0: + v = c.baseValue() + case d1: + v = c.baseValue() + clientmodel.SampleValue(int8(c.buf[offset])) + case d2: + v = c.baseValue() + clientmodel.SampleValue(int16(binary.LittleEndian.Uint16(c.buf[offset:]))) + case d4: + v = c.baseValue() + clientmodel.SampleValue(int32(binary.LittleEndian.Uint32(c.buf[offset:]))) + // No d8 for ints. + default: + panic("Invalid number of bytes for integer delta") + } + } else { + switch c.valueBytes() { + case d4: + v = c.baseValue() + clientmodel.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(c.buf[offset:]))) + case d8: + // Take absolute value for d8. + v = clientmodel.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c.buf[offset:]))) + default: + panic("Invalid number of bytes for floating point delta") + } + } + return &metric.SamplePair{ + Timestamp: ts, + Value: v, + } +} + +// firstTime implements chunk. +func (c *deltaEncodedChunk) firstTime() clientmodel.Timestamp { + return c.valueAtIndex(0).Timestamp +} + +// lastTime implements chunk. +func (c *deltaEncodedChunk) lastTime() clientmodel.Timestamp { + return c.valueAtIndex(c.len() - 1).Timestamp +} + +// marshal implements chunk. +func (c *deltaEncodedChunk) marshal(w io.Writer) error { + if len(c.buf) > math.MaxUint16 { + panic("chunk buffer length would overflow a 16 bit uint.") + } + binary.LittleEndian.PutUint16(c.buf[deltaHeaderBufLenOffset:], uint16(len(c.buf))) + + n, err := w.Write(c.buf[:cap(c.buf)]) + if err != nil { + return err + } + if n != cap(c.buf) { + return fmt.Errorf("wanted to write %d bytes, wrote %d", len(c.buf), n) + } + return nil +} + +// unmarshal implements chunk. +func (c *deltaEncodedChunk) unmarshal(r io.Reader) error { + c.buf = c.buf[:cap(c.buf)] + readBytes := 0 + for readBytes < len(c.buf) { + n, err := r.Read(c.buf[readBytes:]) + if err != nil { + return err + } + readBytes += n + } + c.buf = c.buf[:binary.LittleEndian.Uint16(c.buf[deltaHeaderBufLenOffset:])] + return nil +} + +// deltaEncodedChunkIterator implements chunkIterator. +type deltaEncodedChunkIterator struct { + chunk *deltaEncodedChunk + // TODO: add more fields here to keep track of last position. +} + +// newIterator implements chunk. +func (c *deltaEncodedChunk) newIterator() chunkIterator { + return &deltaEncodedChunkIterator{ + chunk: c, + } +} + +// getValueAtTime implements chunkIterator. +func (it *deltaEncodedChunkIterator) getValueAtTime(t clientmodel.Timestamp) metric.Values { + i := sort.Search(it.chunk.len(), func(i int) bool { + return !it.chunk.valueAtIndex(i).Timestamp.Before(t) + }) + + switch i { + case 0: + return metric.Values{*it.chunk.valueAtIndex(0)} + case it.chunk.len(): + return metric.Values{*it.chunk.valueAtIndex(it.chunk.len() - 1)} + default: + v := it.chunk.valueAtIndex(i) + if v.Timestamp.Equal(t) { + return metric.Values{*v} + } + return metric.Values{*it.chunk.valueAtIndex(i - 1), *v} + } +} + +// getRangeValues implements chunkIterator. +func (it *deltaEncodedChunkIterator) getRangeValues(in metric.Interval) metric.Values { + oldest := sort.Search(it.chunk.len(), func(i int) bool { + return !it.chunk.valueAtIndex(i).Timestamp.Before(in.OldestInclusive) + }) + + newest := sort.Search(it.chunk.len(), func(i int) bool { + return it.chunk.valueAtIndex(i).Timestamp.After(in.NewestInclusive) + }) + + if oldest == it.chunk.len() { + return nil + } + + result := make(metric.Values, 0, newest-oldest) + for i := oldest; i < newest; i++ { + result = append(result, *it.chunk.valueAtIndex(i)) + } + return result +} + +// contains implements chunkIterator. +func (it *deltaEncodedChunkIterator) contains(t clientmodel.Timestamp) bool { + return !t.Before(it.chunk.firstTime()) && !t.After(it.chunk.lastTime()) +} diff --git a/storage/local/index/index.go b/storage/local/index/index.go new file mode 100644 index 0000000000..1c4c123d68 --- /dev/null +++ b/storage/local/index/index.go @@ -0,0 +1,289 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package index provides a number of indexes backed by persistent key-value +// stores. The only supported implementation of a key-value store is currently +// goleveldb, but other implementations can easily be added. +package index + +import ( + "flag" + "os" + "path" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/local/codable" + "github.com/prometheus/prometheus/storage/metric" +) + +const ( + fingerprintToMetricDir = "archived_fingerprint_to_metric" + fingerprintTimeRangeDir = "archived_fingerprint_to_timerange" + labelNameToLabelValuesDir = "labelname_to_labelvalues" + labelPairToFingerprintsDir = "labelpair_to_fingerprints" +) + +var ( + // TODO: Tweak default values. + fingerprintToMetricCacheSize = flag.Int("storage.local.index-cache-size.fingerprint-to-metric", 10*1024*1024, "The size in bytes for the fingerprint to metric index cache.") + fingerprintTimeRangeCacheSize = flag.Int("storage.local.index-cache-size.fingerprint-to-timerange", 5*1024*1024, "The size in bytes for the metric time range index cache.") + labelNameToLabelValuesCacheSize = flag.Int("storage.local.index-cache-size.label-name-to-label-values", 10*1024*1024, "The size in bytes for the label name to label values index cache.") + labelPairToFingerprintsCacheSize = flag.Int("storage.local.index-cache-size.label-pair-to-fingerprints", 20*1024*1024, "The size in bytes for the label pair to fingerprints index cache.") +) + +// FingerprintMetricMapping is an in-memory map of fingerprints to metrics. +type FingerprintMetricMapping map[clientmodel.Fingerprint]clientmodel.Metric + +// FingerprintMetricIndex models a database mapping fingerprints to metrics. +type FingerprintMetricIndex struct { + KeyValueStore +} + +// IndexBatch indexes a batch of mappings from fingerprints to metrics. +// +// This method is goroutine-safe, but note that no specific order of execution +// can be guaranteed (especially critical if IndexBatch and UnindexBatch are +// called concurrently for the same fingerprint). +func (i *FingerprintMetricIndex) IndexBatch(mapping FingerprintMetricMapping) error { + b := i.NewBatch() + + for fp, m := range mapping { + b.Put(codable.Fingerprint(fp), codable.Metric(m)) + } + + return i.Commit(b) +} + +// UnindexBatch unindexes a batch of mappings from fingerprints to metrics. +// +// This method is goroutine-safe, but note that no specific order of execution +// can be guaranteed (especially critical if IndexBatch and UnindexBatch are +// called concurrently for the same fingerprint). +func (i *FingerprintMetricIndex) UnindexBatch(mapping FingerprintMetricMapping) error { + b := i.NewBatch() + + for fp := range mapping { + b.Delete(codable.Fingerprint(fp)) + } + + return i.Commit(b) +} + +// Lookup looks up a metric by fingerprint. Looking up a non-existing +// fingerprint is not an error. In that case, (nil, false, nil) is returned. +// +// This method is goroutine-safe. +func (i *FingerprintMetricIndex) Lookup(fp clientmodel.Fingerprint) (metric clientmodel.Metric, ok bool, err error) { + ok, err = i.Get(codable.Fingerprint(fp), (*codable.Metric)(&metric)) + return +} + +// NewFingerprintMetricIndex returns a LevelDB-backed FingerprintMetricIndex +// ready to use. +func NewFingerprintMetricIndex(basePath string) (*FingerprintMetricIndex, error) { + fingerprintToMetricDB, err := NewLevelDB(LevelDBOptions{ + Path: path.Join(basePath, fingerprintToMetricDir), + CacheSizeBytes: *fingerprintToMetricCacheSize, + }) + if err != nil { + return nil, err + } + return &FingerprintMetricIndex{ + KeyValueStore: fingerprintToMetricDB, + }, nil +} + +// LabelNameLabelValuesMapping is an in-memory map of label names to +// label values. +type LabelNameLabelValuesMapping map[clientmodel.LabelName]codable.LabelValueSet + +// LabelNameLabelValuesIndex is a KeyValueStore that maps existing label names +// to all label values stored for that label name. +type LabelNameLabelValuesIndex struct { + KeyValueStore +} + +// IndexBatch adds a batch of label name to label values mappings to the +// index. A mapping of a label name to an empty slice of label values results in +// a deletion of that mapping from the index. +// +// While this method is fundamentally goroutine-safe, note that the order of +// execution for multiple batches executed concurrently is undefined. +func (i *LabelNameLabelValuesIndex) IndexBatch(b LabelNameLabelValuesMapping) error { + batch := i.NewBatch() + + for name, values := range b { + if len(values) == 0 { + if err := batch.Delete(codable.LabelName(name)); err != nil { + return err + } + } else { + if err := batch.Put(codable.LabelName(name), values); err != nil { + return err + } + } + } + + return i.Commit(batch) +} + +// Lookup looks up all label values for a given label name and returns them as +// clientmodel.LabelValues (which is a slice). Looking up a non-existing label +// name is not an error. In that case, (nil, false, nil) is returned. +// +// This method is goroutine-safe. +func (i *LabelNameLabelValuesIndex) Lookup(l clientmodel.LabelName) (values clientmodel.LabelValues, ok bool, err error) { + ok, err = i.Get(codable.LabelName(l), (*codable.LabelValues)(&values)) + return +} + +// LookupSet looks up all label values for a given label name and returns them +// as a set. Looking up a non-existing label name is not an error. In that case, +// (nil, false, nil) is returned. +// +// This method is goroutine-safe. +func (i *LabelNameLabelValuesIndex) LookupSet(l clientmodel.LabelName) (values map[clientmodel.LabelValue]struct{}, ok bool, err error) { + ok, err = i.Get(codable.LabelName(l), (*codable.LabelValueSet)(&values)) + if values == nil { + values = map[clientmodel.LabelValue]struct{}{} + } + return +} + +// NewLabelNameLabelValuesIndex returns a LevelDB-backed +// LabelNameLabelValuesIndex ready to use. +func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex, error) { + labelNameToLabelValuesDB, err := NewLevelDB(LevelDBOptions{ + Path: path.Join(basePath, labelNameToLabelValuesDir), + CacheSizeBytes: *labelNameToLabelValuesCacheSize, + }) + if err != nil { + return nil, err + } + return &LabelNameLabelValuesIndex{ + KeyValueStore: labelNameToLabelValuesDB, + }, nil +} + +// DeleteLabelNameLabelValuesIndex deletes the LevelDB-backed +// LabelNameLabelValuesIndex. Use only for a not yet opened index. +func DeleteLabelNameLabelValuesIndex(basePath string) error { + return os.RemoveAll(path.Join(basePath, labelNameToLabelValuesDir)) +} + +// LabelPairFingerprintsMapping is an in-memory map of label pairs to +// fingerprints. +type LabelPairFingerprintsMapping map[metric.LabelPair]codable.FingerprintSet + +// LabelPairFingerprintIndex is a KeyValueStore that maps existing label pairs +// to the fingerprints of all metrics containing those label pairs. +type LabelPairFingerprintIndex struct { + KeyValueStore +} + +// IndexBatch indexes a batch of mappings from label pairs to fingerprints. A +// mapping to an empty slice of fingerprints results in deletion of that mapping +// from the index. +// +// While this method is fundamentally goroutine-safe, note that the order of +// execution for multiple batches executed concurrently is undefined. +func (i *LabelPairFingerprintIndex) IndexBatch(m LabelPairFingerprintsMapping) error { + batch := i.NewBatch() + + for pair, fps := range m { + if len(fps) == 0 { + batch.Delete(codable.LabelPair(pair)) + } else { + batch.Put(codable.LabelPair(pair), fps) + } + } + + return i.Commit(batch) +} + +// Lookup looks up all fingerprints for a given label pair. Looking up a +// non-existing label pair is not an error. In that case, (nil, false, nil) is +// returned. +// +// This method is goroutine-safe. +func (i *LabelPairFingerprintIndex) Lookup(p metric.LabelPair) (fps clientmodel.Fingerprints, ok bool, err error) { + ok, err = i.Get((codable.LabelPair)(p), (*codable.Fingerprints)(&fps)) + return +} + +// LookupSet looks up all fingerprints for a given label pair. Looking up a +// non-existing label pair is not an error. In that case, (nil, false, nil) is +// returned. +// +// This method is goroutine-safe. +func (i *LabelPairFingerprintIndex) LookupSet(p metric.LabelPair) (fps map[clientmodel.Fingerprint]struct{}, ok bool, err error) { + ok, err = i.Get((codable.LabelPair)(p), (*codable.FingerprintSet)(&fps)) + if fps == nil { + fps = map[clientmodel.Fingerprint]struct{}{} + } + return +} + +// NewLabelPairFingerprintIndex returns a LevelDB-backed +// LabelPairFingerprintIndex ready to use. +func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex, error) { + labelPairToFingerprintsDB, err := NewLevelDB(LevelDBOptions{ + Path: path.Join(basePath, labelPairToFingerprintsDir), + CacheSizeBytes: *labelPairToFingerprintsCacheSize, + }) + if err != nil { + return nil, err + } + return &LabelPairFingerprintIndex{ + KeyValueStore: labelPairToFingerprintsDB, + }, nil +} + +// DeleteLabelPairFingerprintIndex deletes the LevelDB-backed +// LabelPairFingerprintIndex. Use only for a not yet opened index. +func DeleteLabelPairFingerprintIndex(basePath string) error { + return os.RemoveAll(path.Join(basePath, labelPairToFingerprintsDir)) +} + +// FingerprintTimeRangeIndex models a database tracking the time ranges +// of metrics by their fingerprints. +type FingerprintTimeRangeIndex struct { + KeyValueStore +} + +// Lookup returns the time range for the given fingerprint. Looking up a +// non-existing fingerprint is not an error. In that case, (0, 0, false, nil) is +// returned. +// +// This method is goroutine-safe. +func (i *FingerprintTimeRangeIndex) Lookup(fp clientmodel.Fingerprint) (firstTime, lastTime clientmodel.Timestamp, ok bool, err error) { + var tr codable.TimeRange + ok, err = i.Get(codable.Fingerprint(fp), &tr) + return tr.First, tr.Last, ok, err +} + +// NewFingerprintTimeRangeIndex returns a LevelDB-backed +// FingerprintTimeRangeIndex ready to use. +func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex, error) { + fingerprintTimeRangeDB, err := NewLevelDB(LevelDBOptions{ + Path: path.Join(basePath, fingerprintTimeRangeDir), + CacheSizeBytes: *fingerprintTimeRangeCacheSize, + }) + if err != nil { + return nil, err + } + return &FingerprintTimeRangeIndex{ + KeyValueStore: fingerprintTimeRangeDB, + }, nil +} diff --git a/storage/local/index/interface.go b/storage/local/index/interface.go new file mode 100644 index 0000000000..c475b4d515 --- /dev/null +++ b/storage/local/index/interface.go @@ -0,0 +1,61 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import "encoding" + +// KeyValueStore persists key/value pairs. Implementations must be fundamentally +// goroutine-safe. However, it is the caller's responsibility that keys and +// values can be safely marshaled and unmarshaled (via the MarshalBinary and +// UnmarshalBinary methods of the keys and values). For example, if you call the +// Put method of a KeyValueStore implementation, but the key or the value are +// modified concurrently while being marshaled into its binary representation, +// you obviously have a problem. Methods of KeyValueStore return only after +// (un)marshaling is complete. +type KeyValueStore interface { + Put(key, value encoding.BinaryMarshaler) error + // Get unmarshals the result into value. It returns false if no entry + // could be found for key. If value is nil, Get behaves like Has. + Get(key encoding.BinaryMarshaler, value encoding.BinaryUnmarshaler) (bool, error) + Has(key encoding.BinaryMarshaler) (bool, error) + // Delete returns an error if key does not exist. + Delete(key encoding.BinaryMarshaler) error + + NewBatch() Batch + Commit(b Batch) error + + // ForEach iterates through the complete KeyValueStore and calls the + // supplied function for each mapping. + ForEach(func(kv KeyValueAccessor) error) error + + Close() error +} + +// KeyValueAccessor allows access to the key and value of an entry in a +// KeyValueStore. +type KeyValueAccessor interface { + Key(encoding.BinaryUnmarshaler) error + Value(encoding.BinaryUnmarshaler) error +} + +// Batch allows KeyValueStore mutations to be pooled and committed together. An +// implementation does not have to be goroutine-safe. Never modify a Batch +// concurrently or commit the same batch multiple times concurrently. Marshaling +// of keys and values is guaranteed to be complete when the Put or Delete methods +// have returned. +type Batch interface { + Put(key, value encoding.BinaryMarshaler) error + Delete(key encoding.BinaryMarshaler) error + Reset() +} diff --git a/storage/local/index/leveldb.go b/storage/local/index/leveldb.go new file mode 100644 index 0000000000..16c162673d --- /dev/null +++ b/storage/local/index/leveldb.go @@ -0,0 +1,204 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package index + +import ( + "encoding" + + "github.com/syndtr/goleveldb/leveldb" + leveldb_cache "github.com/syndtr/goleveldb/leveldb/cache" + leveldb_filter "github.com/syndtr/goleveldb/leveldb/filter" + leveldb_iterator "github.com/syndtr/goleveldb/leveldb/iterator" + leveldb_opt "github.com/syndtr/goleveldb/leveldb/opt" + leveldb_util "github.com/syndtr/goleveldb/leveldb/util" +) + +var ( + keyspace = &leveldb_util.Range{ + Start: nil, + Limit: nil, + } + + iteratorOpts = &leveldb_opt.ReadOptions{ + DontFillCache: true, + } +) + +// LevelDB is a LevelDB-backed sorted KeyValueStore. +type LevelDB struct { + storage *leveldb.DB + readOpts *leveldb_opt.ReadOptions + writeOpts *leveldb_opt.WriteOptions +} + +// LevelDBOptions provides options for a LevelDB. +type LevelDBOptions struct { + Path string // Base path to store files. + CacheSizeBytes int +} + +// NewLevelDB returns a newly allocated LevelDB-backed KeyValueStore ready to +// use. +func NewLevelDB(o LevelDBOptions) (KeyValueStore, error) { + options := &leveldb_opt.Options{ + Compression: leveldb_opt.SnappyCompression, + BlockCache: leveldb_cache.NewLRUCache(o.CacheSizeBytes), + Filter: leveldb_filter.NewBloomFilter(10), + } + + storage, err := leveldb.OpenFile(o.Path, options) + if err != nil { + return nil, err + } + + return &LevelDB{ + storage: storage, + readOpts: &leveldb_opt.ReadOptions{}, + writeOpts: &leveldb_opt.WriteOptions{}, + }, nil +} + +// NewBatch implements KeyValueStore. +func (l *LevelDB) NewBatch() Batch { + return &LevelDBBatch{ + batch: &leveldb.Batch{}, + } +} + +// Close implements KeyValueStore. +func (l *LevelDB) Close() error { + return l.storage.Close() +} + +// Get implements KeyValueStore. +func (l *LevelDB) Get(key encoding.BinaryMarshaler, value encoding.BinaryUnmarshaler) (bool, error) { + k, err := key.MarshalBinary() + if err != nil { + return false, err + } + raw, err := l.storage.Get(k, l.readOpts) + if err == leveldb.ErrNotFound { + return false, nil + } + if err != nil { + return false, err + } + if value == nil { + return true, nil + } + return true, value.UnmarshalBinary(raw) +} + +// Has implements KeyValueStore. +func (l *LevelDB) Has(key encoding.BinaryMarshaler) (has bool, err error) { + return l.Get(key, nil) +} + +// Delete implements KeyValueStore. +func (l *LevelDB) Delete(key encoding.BinaryMarshaler) error { + k, err := key.MarshalBinary() + if err != nil { + return err + } + return l.storage.Delete(k, l.writeOpts) +} + +// Put implements KeyValueStore. +func (l *LevelDB) Put(key, value encoding.BinaryMarshaler) error { + k, err := key.MarshalBinary() + if err != nil { + return err + } + v, err := value.MarshalBinary() + if err != nil { + return err + } + return l.storage.Put(k, v, l.writeOpts) +} + +// Commit implements KeyValueStore. +func (l *LevelDB) Commit(b Batch) error { + return l.storage.Write(b.(*LevelDBBatch).batch, l.writeOpts) +} + +// ForEach implements KeyValueStore. +func (l *LevelDB) ForEach(cb func(kv KeyValueAccessor) error) error { + snap, err := l.storage.GetSnapshot() + if err != nil { + return err + } + defer snap.Release() + + iter := snap.NewIterator(keyspace, iteratorOpts) + + kv := &levelDBKeyValueAccessor{it: iter} + + for valid := iter.First(); valid; valid = iter.Next() { + if err = iter.Error(); err != nil { + return err + } + + if err := cb(kv); err != nil { + return err + } + } + return nil +} + +// LevelDBBatch is a Batch implementation for LevelDB. +type LevelDBBatch struct { + batch *leveldb.Batch +} + +// Put implements Batch. +func (b *LevelDBBatch) Put(key, value encoding.BinaryMarshaler) error { + k, err := key.MarshalBinary() + if err != nil { + return err + } + v, err := value.MarshalBinary() + if err != nil { + return err + } + b.batch.Put(k, v) + return nil +} + +// Delete implements Batch. +func (b *LevelDBBatch) Delete(key encoding.BinaryMarshaler) error { + k, err := key.MarshalBinary() + if err != nil { + return err + } + b.batch.Delete(k) + return nil +} + +// Reset implements Batch. +func (b *LevelDBBatch) Reset() { + b.batch.Reset() +} + +// levelDBKeyValueAccessor implements KeyValueAccessor. +type levelDBKeyValueAccessor struct { + it leveldb_iterator.Iterator +} + +func (i *levelDBKeyValueAccessor) Key(key encoding.BinaryUnmarshaler) error { + return key.UnmarshalBinary(i.it.Key()) +} + +func (i *levelDBKeyValueAccessor) Value(value encoding.BinaryUnmarshaler) error { + return value.UnmarshalBinary(i.it.Value()) +} diff --git a/storage/local/instrumentation.go b/storage/local/instrumentation.go new file mode 100644 index 0000000000..58f6125a22 --- /dev/null +++ b/storage/local/instrumentation.go @@ -0,0 +1,92 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import "github.com/prometheus/client_golang/prometheus" + +// Usually, a separate file for instrumentation is frowned upon. Metrics should +// be close to where they are used. However, the metrics below are set all over +// the place, so we go for a separate instrumentation file in this case. +var ( + chunkOps = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "chunk_ops_total", + Help: "The total number of chunk operations by their type.", + }, + []string{opTypeLabel}, + ) + chunkDescOps = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "chunkdesc_ops_total", + Help: "The total number of chunk descriptor operations by their type.", + }, + []string{opTypeLabel}, + ) +) + +const ( + namespace = "prometheus" + subsystem = "local_storage" + + opTypeLabel = "type" + + // Op-types for seriesOps. + create = "create" + archive = "archive" + unarchive = "unarchive" + memoryPurge = "purge_from_memory" + archivePurge = "purge_from_archive" + memoryMaintenance = "maintenance_in_memory" + archiveMaintenance = "maintenance_in_archive" + + // Op-types for chunkOps. + createAndPin = "create" // A chunkDesc creation with refCount=1. + persistAndUnpin = "persist" + pin = "pin" // Excluding the pin on creation. + unpin = "unpin" // Excluding the unpin on persisting. + clone = "clone" + transcode = "transcode" + purge = "purge" + + // Op-types for chunkOps and chunkDescOps. + evict = "evict" + load = "load" +) + +func init() { + prometheus.MustRegister(chunkOps) + prometheus.MustRegister(chunkDescOps) +} + +var ( + // Global counters, also used internally, so not implemented as + // metrics. Collected in memorySeriesStorage.Collect. + numMemChunks, numMemChunkDescs int64 + + // Metric descriptors for the above. + numMemChunksDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "memory_chunks"), + "The current number of chunks in memory, excluding cloned chunks (i.e. chunks without a descriptor).", + nil, nil, + ) + numMemChunkDescsDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "memory_chunkdescs"), + "The current number of chunk descriptors in memory.", + nil, nil, + ) +) diff --git a/storage/local/interface.go b/storage/local/interface.go new file mode 100644 index 0000000000..4b8b8e39ef --- /dev/null +++ b/storage/local/interface.go @@ -0,0 +1,87 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "time" + clientmodel "github.com/prometheus/client_golang/model" + "github.com/prometheus/client_golang/prometheus" + + "github.com/prometheus/prometheus/storage/metric" +) + +// Storage ingests and manages samples, along with various indexes. All methods +// are goroutine-safe. +type Storage interface { + prometheus.Collector + // AppendSamples stores a group of new samples. Multiple samples for the same + // fingerprint need to be submitted in chronological order, from oldest to + // newest (both in the same call to AppendSamples and across multiple calls). + AppendSamples(clientmodel.Samples) + // NewPreloader returns a new Preloader which allows preloading and pinning + // series data into memory for use within a query. + NewPreloader() Preloader + // Get all of the metric fingerprints that are associated with the + // provided label matchers. + GetFingerprintsForLabelMatchers(metric.LabelMatchers) clientmodel.Fingerprints + // Get all of the label values that are associated with a given label name. + GetLabelValuesForLabelName(clientmodel.LabelName) clientmodel.LabelValues + // Get the metric associated with the provided fingerprint. + GetMetricForFingerprint(clientmodel.Fingerprint) clientmodel.Metric + // Construct an iterator for a given fingerprint. + NewIterator(clientmodel.Fingerprint) SeriesIterator + // Run the various maintenance loops in goroutines. Returns when the + // storage is ready to use. Keeps everything running in the background + // until Stop is called. + Start() + // Stop shuts down the Storage gracefully, flushes all pending + // operations, stops all maintenance loops,and frees all resources. + Stop() error + // WaitForIndexing returns once all samples in the storage are + // indexed. Indexing is needed for GetFingerprintsForLabelMatchers and + // GetLabelValuesForLabelName and may lag behind. + WaitForIndexing() +} + +// SeriesIterator enables efficient access of sample values in a series. All +// methods are goroutine-safe. A SeriesIterator iterates over a snapshot of a +// series, i.e. it is safe to continue using a SeriesIterator after modifying +// the corresponding series, but the iterator will represent the state of the +// series prior the modification. +type SeriesIterator interface { + // Gets the two values that are immediately adjacent to a given time. In + // case a value exist at precisely the given time, only that single + // value is returned. Only the first or last value is returned (as a + // single value), if the given time is before or after the first or last + // value, respectively. + GetValueAtTime(clientmodel.Timestamp) metric.Values + // Gets the boundary values of an interval: the first and last value + // within a given interval. + GetBoundaryValues(metric.Interval) metric.Values + // Gets all values contained within a given interval. + GetRangeValues(metric.Interval) metric.Values +} + +// A Preloader preloads series data necessary for a query into memory and pins +// them until released via Close(). Its methods are generally not +// goroutine-safe. +type Preloader interface { + PreloadRange( + fp clientmodel.Fingerprint, + from clientmodel.Timestamp, through clientmodel.Timestamp, + stalenessDelta time.Duration, + ) error + // Close unpins any previously requested series data from memory. + Close() +} diff --git a/storage/local/locker.go b/storage/local/locker.go new file mode 100644 index 0000000000..c0ac876543 --- /dev/null +++ b/storage/local/locker.go @@ -0,0 +1,43 @@ +package local + +import ( + "sync" + + clientmodel "github.com/prometheus/client_golang/model" +) + +// fingerprintLocker allows locking individual fingerprints. To limit the number +// of mutexes needed for that, only a fixed number of mutexes are +// allocated. Fingerprints to be locked are assigned to those pre-allocated +// mutexes by their value. (Note that fingerprints are calculated by a hash +// function, so that an approximately equal distribution over the mutexes is +// expected, even without additional hashing of the fingerprint value.) +// Collisions are not detected. If two fingerprints get assigned to the same +// mutex, only one of them can be locked at the same time. As long as the number +// of pre-allocated mutexes is much larger than the number of goroutines +// requiring a fingerprint lock concurrently, the loss in efficiency is +// small. However, a goroutine must never lock more than one fingerprint at the +// same time. (In that case a collision would try to acquire the same mutex +// twice). +type fingerprintLocker struct { + fpMtxs []sync.Mutex + numFpMtxs uint +} + +// newFingerprintLocker returns a new fingerprintLocker ready for use. +func newFingerprintLocker(preallocatedMutexes int) *fingerprintLocker { + return &fingerprintLocker{ + make([]sync.Mutex, preallocatedMutexes), + uint(preallocatedMutexes), + } +} + +// Lock locks the given fingerprint. +func (l *fingerprintLocker) Lock(fp clientmodel.Fingerprint) { + l.fpMtxs[uint(fp)%l.numFpMtxs].Lock() +} + +// Unlock unlocks the given fingerprint. +func (l *fingerprintLocker) Unlock(fp clientmodel.Fingerprint) { + l.fpMtxs[uint(fp)%l.numFpMtxs].Unlock() +} diff --git a/storage/local/locker_test.go b/storage/local/locker_test.go new file mode 100644 index 0000000000..0a025f2f6a --- /dev/null +++ b/storage/local/locker_test.go @@ -0,0 +1,45 @@ +package local + +import ( + "sync" + "testing" + + clientmodel "github.com/prometheus/client_golang/model" +) + +func BenchmarkFingerprintLockerParallel(b *testing.B) { + numGoroutines := 10 + numFingerprints := 10 + numLockOps := b.N + locker := newFingerprintLocker(100) + + wg := sync.WaitGroup{} + b.ResetTimer() + for i := 0; i < numGoroutines; i++ { + wg.Add(1) + go func(i int) { + for j := 0; j < numLockOps; j++ { + fp1 := clientmodel.Fingerprint(j % numFingerprints) + fp2 := clientmodel.Fingerprint(j%numFingerprints + numFingerprints) + locker.Lock(fp1) + locker.Lock(fp2) + locker.Unlock(fp2) + locker.Unlock(fp1) + } + wg.Done() + }(i) + } + wg.Wait() +} + +func BenchmarkFingerprintLockerSerial(b *testing.B) { + numFingerprints := 10 + locker := newFingerprintLocker(100) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + fp := clientmodel.Fingerprint(i % numFingerprints) + locker.Lock(fp) + locker.Unlock(fp) + } +} diff --git a/storage/local/persistence.go b/storage/local/persistence.go new file mode 100644 index 0000000000..513bf726bd --- /dev/null +++ b/storage/local/persistence.go @@ -0,0 +1,1483 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "bufio" + "encoding/binary" + "fmt" + "io" + "math" + "os" + "path" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/local/codable" + "github.com/prometheus/prometheus/storage/local/index" + "github.com/prometheus/prometheus/storage/metric" +) + +const ( + seriesFileSuffix = ".db" + seriesTempFileSuffix = ".db.tmp" + seriesDirNameLen = 2 // How many bytes of the fingerprint in dir name. + + headsFileName = "heads.db" + headsTempFileName = "heads.db.tmp" + headsFormatVersion = 1 + headsMagicString = "PrometheusHeads" + + dirtyFileName = "DIRTY" + + fileBufSize = 1 << 16 // 64kiB. + + chunkHeaderLen = 17 + chunkHeaderTypeOffset = 0 + chunkHeaderFirstTimeOffset = 1 + chunkHeaderLastTimeOffset = 9 + + indexingMaxBatchSize = 1024 * 1024 + indexingBatchTimeout = 500 * time.Millisecond // Commit batch when idle for that long. + indexingQueueCapacity = 1024 * 16 +) + +var fpLen = len(clientmodel.Fingerprint(0).String()) // Length of a fingerprint as string. + +const ( + flagHeadChunkPersisted byte = 1 << iota + // Add more flags here like: + // flagFoo + // flagBar +) + +type indexingOpType byte + +const ( + add indexingOpType = iota + remove +) + +type indexingOp struct { + fingerprint clientmodel.Fingerprint + metric clientmodel.Metric + opType indexingOpType +} + +// A Persistence is used by a Storage implementation to store samples +// persistently across restarts. The methods are only goroutine-safe if +// explicitly marked as such below. The chunk-related methods PersistChunk, +// DropChunks, LoadChunks, and LoadChunkDescs can be called concurrently with +// each other if each call refers to a different fingerprint. +type persistence struct { + basePath string + chunkLen int + + archivedFingerprintToMetrics *index.FingerprintMetricIndex + archivedFingerprintToTimeRange *index.FingerprintTimeRangeIndex + labelPairToFingerprints *index.LabelPairFingerprintIndex + labelNameToLabelValues *index.LabelNameLabelValuesIndex + + indexingQueue chan indexingOp + indexingStopped chan struct{} + indexingFlush chan chan int + + indexingQueueLength prometheus.Gauge + indexingQueueCapacity prometheus.Metric + indexingBatchSizes prometheus.Summary + indexingBatchLatency prometheus.Summary + checkpointDuration prometheus.Gauge + + dirtyMtx sync.Mutex // Protects dirty and becameDirty. + dirty bool // true if persistence was started in dirty state. + becameDirty bool // true if an inconsistency came up during runtime. +} + +// newPersistence returns a newly allocated persistence backed by local disk storage, ready to use. +func newPersistence(basePath string, chunkLen int, dirty bool) (*persistence, error) { + if err := os.MkdirAll(basePath, 0700); err != nil { + return nil, err + } + archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath) + if err != nil { + return nil, err + } + archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath) + if err != nil { + return nil, err + } + + p := &persistence{ + basePath: basePath, + chunkLen: chunkLen, + + archivedFingerprintToMetrics: archivedFingerprintToMetrics, + archivedFingerprintToTimeRange: archivedFingerprintToTimeRange, + + indexingQueue: make(chan indexingOp, indexingQueueCapacity), + indexingStopped: make(chan struct{}), + indexingFlush: make(chan chan int), + + indexingQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "indexing_queue_length", + Help: "The number of metrics waiting to be indexed.", + }), + indexingQueueCapacity: prometheus.MustNewConstMetric( + prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "indexing_queue_capacity"), + "The capacity of the indexing queue.", + nil, nil, + ), + prometheus.GaugeValue, + float64(indexingQueueCapacity), + ), + indexingBatchSizes: prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "indexing_batch_sizes", + Help: "Quantiles for indexing batch sizes (number of metrics per batch).", + }, + ), + indexingBatchLatency: prometheus.NewSummary( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "indexing_batch_latency_milliseconds", + Help: "Quantiles for batch indexing latencies in milliseconds.", + }, + ), + checkpointDuration: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "checkpoint_duration_milliseconds", + Help: "The duration (in milliseconds) it took to checkpoint in-memory metrics and head chunks.", + }), + dirty: dirty, + } + if dirtyFile, err := os.OpenFile(p.dirtyFileName(), os.O_CREATE|os.O_EXCL, 0666); err == nil { + dirtyFile.Close() + } else if os.IsExist(err) { + p.dirty = true + } else { + return nil, err + } + + if p.dirty { + // Blow away the label indexes. We'll rebuild them later. + if err := index.DeleteLabelPairFingerprintIndex(basePath); err != nil { + return nil, err + } + if err := index.DeleteLabelNameLabelValuesIndex(basePath); err != nil { + return nil, err + } + } + labelPairToFingerprints, err := index.NewLabelPairFingerprintIndex(basePath) + if err != nil { + return nil, err + } + labelNameToLabelValues, err := index.NewLabelNameLabelValuesIndex(basePath) + if err != nil { + return nil, err + } + p.labelPairToFingerprints = labelPairToFingerprints + p.labelNameToLabelValues = labelNameToLabelValues + + go p.processIndexingQueue() + return p, nil +} + +// Describe implements prometheus.Collector. +func (p *persistence) Describe(ch chan<- *prometheus.Desc) { + ch <- p.indexingQueueLength.Desc() + ch <- p.indexingQueueCapacity.Desc() + p.indexingBatchSizes.Describe(ch) + p.indexingBatchLatency.Describe(ch) + ch <- p.checkpointDuration.Desc() +} + +// Collect implements prometheus.Collector. +func (p *persistence) Collect(ch chan<- prometheus.Metric) { + p.indexingQueueLength.Set(float64(len(p.indexingQueue))) + + ch <- p.indexingQueueLength + ch <- p.indexingQueueCapacity + p.indexingBatchSizes.Collect(ch) + p.indexingBatchLatency.Collect(ch) + ch <- p.checkpointDuration +} + +// dirtyFileName returns the name of the (empty) file used to mark the +// persistency layer as dirty. +func (p *persistence) dirtyFileName() string { + return path.Join(p.basePath, dirtyFileName) +} + +// isDirty returns the dirty flag in a goroutine-safe way. +func (p *persistence) isDirty() bool { + p.dirtyMtx.Lock() + defer p.dirtyMtx.Unlock() + return p.dirty +} + +// setDirty sets the dirty flag in a goroutine-safe way. Once the dirty flag was +// set to true with this method, it cannot be set to false again. (If we became +// dirty during our runtime, there is no way back. If we were dirty from the +// start, a clean-up might make us clean again.) +func (p *persistence) setDirty(dirty bool) { + p.dirtyMtx.Lock() + defer p.dirtyMtx.Unlock() + if p.becameDirty { + return + } + p.dirty = dirty + if dirty { + p.becameDirty = true + glog.Error("The storage is now inconsistent. Restart Prometheus ASAP to initiate recovery.") + } +} + +// recoverFromCrash is called by loadSeriesMapAndHeads if the persistence +// appears to be dirty after the loading (either because the loading resulted in +// an error or because the persistence was dirty from the start). Not goroutine +// safe. Only call before anything else is running (except index processing +// queue as started by newPersistence). +func (p *persistence) recoverFromCrash(fingerprintToSeries map[clientmodel.Fingerprint]*memorySeries) error { + glog.Warning("Starting crash recovery. Prometheus is inoperational until complete.") + + fpsSeen := map[clientmodel.Fingerprint]struct{}{} + count := 0 + seriesDirNameFmt := fmt.Sprintf("0%dx", seriesDirNameLen) + + glog.Info("Scanning files.") + for i := 0; i < 1<<(seriesDirNameLen*4); i++ { + dirname := path.Join(p.basePath, fmt.Sprintf(seriesDirNameFmt, i)) + dir, err := os.Open(dirname) + if os.IsNotExist(err) { + continue + } + if err != nil { + return err + } + defer dir.Close() + for fis := []os.FileInfo{}; err != io.EOF; fis, err = dir.Readdir(1024) { + if err != nil { + return err + } + for _, fi := range fis { + fp, ok := p.sanitizeSeries(dirname, fi, fingerprintToSeries) + if ok { + fpsSeen[fp] = struct{}{} + } + count++ + if count%10000 == 0 { + glog.Infof("%d files scanned.", count) + } + } + } + } + glog.Infof("File scan complete. %d series found.", len(fpsSeen)) + + glog.Info("Checking for series without series file.") + for fp, s := range fingerprintToSeries { + if _, seen := fpsSeen[fp]; !seen { + // fp exists in fingerprintToSeries, but has no representation on disk. + if s.headChunkPersisted { + // Oops, head chunk was persisted, but nothing on disk. + // Thus, we lost that series completely. Clean up the remnants. + delete(fingerprintToSeries, fp) + if err := p.dropArchivedMetric(fp); err != nil { + // Dropping the archived metric didn't work, so try + // to unindex it, just in case it's in the indexes. + p.unindexMetric(fp, s.metric) + } + glog.Warningf("Lost series detected: fingerprint %v, metric %v.", fp, s.metric) + continue + } + // If we are here, the only chunk we have is the head chunk. + // Adjust things accordingly. + if len(s.chunkDescs) > 1 || s.chunkDescsOffset != 0 { + minLostChunks := len(s.chunkDescs) + s.chunkDescsOffset - 1 + if minLostChunks <= 0 { + glog.Warningf( + "Possible loss of chunks for fingerprint %v, metric %v.", + fp, s.metric, + ) + } else { + glog.Warningf( + "Lost at least %d chunks for fingerprint %v, metric %v.", + minLostChunks, fp, s.metric, + ) + } + s.chunkDescs = s.chunkDescs[len(s.chunkDescs)-1:] + s.chunkDescsOffset = 0 + } + fpsSeen[fp] = struct{}{} // Add so that fpsSeen is complete. + } + } + glog.Info("Check for series without series file complete.") + + if err := p.cleanUpArchiveIndexes(fingerprintToSeries, fpsSeen); err != nil { + return err + } + if err := p.rebuildLabelIndexes(fingerprintToSeries); err != nil { + return err + } + + p.setDirty(false) + glog.Warning("Crash recovery complete.") + return nil +} + +// TODO: Document. +func (p *persistence) sanitizeSeries(dirname string, fi os.FileInfo, fingerprintToSeries map[clientmodel.Fingerprint]*memorySeries) (clientmodel.Fingerprint, bool) { + filename := path.Join(dirname, fi.Name()) + purge := func() { + glog.Warningf("Deleting lost series file %s.", filename) // TODO: Move to lost+found directory? + os.Remove(filename) + } + + var fp clientmodel.Fingerprint + if len(fi.Name()) != fpLen-seriesDirNameLen+len(seriesFileSuffix) || + !strings.HasSuffix(fi.Name(), seriesFileSuffix) { + glog.Warningf("Unexpected series file name %s.", filename) + purge() + return fp, false + } + fp.LoadFromString(path.Base(dirname) + fi.Name()[:fpLen-seriesDirNameLen]) // TODO: Panics if that doesn't parse as hex. + + bytesToTrim := fi.Size() % int64(p.chunkLen+chunkHeaderLen) + chunksInFile := int(fi.Size()) / (p.chunkLen + chunkHeaderLen) + if bytesToTrim != 0 { + glog.Warningf( + "Truncating file %s to exactly %d chunks, trimming %d extraneous bytes.", + filename, chunksInFile, bytesToTrim, + ) + f, err := os.OpenFile(filename, os.O_WRONLY, 0640) + if err != nil { + glog.Errorf("Could not open file %s: %s", filename, err) + purge() + return fp, false + } + if err := f.Truncate(fi.Size() - bytesToTrim); err != nil { + glog.Errorf("Failed to truncate file %s: %s", filename, err) + purge() + return fp, false + } + } + if chunksInFile == 0 { + glog.Warningf("No chunks left in file %s.", filename) + purge() + return fp, false + } + + s, ok := fingerprintToSeries[fp] + if ok { // This series is supposed to not be archived. + if s == nil { + panic("fingerprint mapped to nil pointer") + } + if bytesToTrim == 0 && s.chunkDescsOffset != -1 && + ((s.headChunkPersisted && chunksInFile == s.chunkDescsOffset+len(s.chunkDescs)) || + (!s.headChunkPersisted && chunksInFile == s.chunkDescsOffset+len(s.chunkDescs)-1)) { + // Everything is consistent. We are good. + return fp, true + } + // If we are here, something's fishy. + if s.headChunkPersisted { + // This is the easy case as we don't have a head chunk + // in heads.db. Treat this series as a freshly + // unarchived one. No chunks or chunkDescs in memory, no + // current head chunk. + glog.Warningf( + "Treating recovered metric %v, fingerprint %v, as freshly unarchived, with %d chunks in series file.", + s.metric, fp, chunksInFile, + ) + s.chunkDescs = nil + s.chunkDescsOffset = -1 + return fp, true + } + // This is the tricky one: We have a head chunk from heads.db, + // but the very same head chunk might already be in the series + // file. Strategy: Check the first time of both. If it is the + // same or newer, assume the latest chunk in the series file + // is the most recent head chunk. If not, keep the head chunk + // we got from heads.db. + // First, assume the head chunk is not yet persisted. + s.chunkDescs = s.chunkDescs[len(s.chunkDescs)-1:] + s.chunkDescsOffset = -1 + // Load all the chunk descs (which assumes we have none from the future). + cds, err := p.loadChunkDescs(fp, clientmodel.Now()) + if err != nil { + glog.Errorf( + "Failed to load chunk descriptors for metric %v, fingerprint %v: %s", + s.metric, fp, err, + ) + purge() + return fp, false + } + if cds[len(cds)-1].firstTime().Before(s.head().firstTime()) { + s.chunkDescs = append(cds, s.chunkDescs...) + glog.Warningf( + "Recovered metric %v, fingerprint %v: recovered %d chunks from series file, recovered head chunk from checkpoint.", + s.metric, fp, chunksInFile, + ) + } else { + glog.Warningf( + "Recovered metric %v, fingerprint %v: head chunk found among the %d recovered chunks in series file.", + s.metric, fp, chunksInFile, + ) + s.chunkDescs = cds + s.headChunkPersisted = true + } + s.chunkDescsOffset = 0 + return fp, true + } + // This series is supposed to be archived. + metric, err := p.getArchivedMetric(fp) + if err != nil { + glog.Errorf( + "Fingerprint %v assumed archived but couldn't be looked up in archived index: %s", + fp, err, + ) + purge() + return fp, false + } + if metric == nil { + glog.Warningf( + "Fingerprint %v assumed archived but couldn't be found in archived index.", + fp, + ) + purge() + return fp, false + } + // This series looks like a properly archived one. + return fp, true +} + +func (p *persistence) cleanUpArchiveIndexes( + fpToSeries map[clientmodel.Fingerprint]*memorySeries, + fpsSeen map[clientmodel.Fingerprint]struct{}, +) error { + glog.Info("Cleaning up archive indexes.") + var fp codable.Fingerprint + var m codable.Metric + count := 0 + if err := p.archivedFingerprintToMetrics.ForEach(func(kv index.KeyValueAccessor) error { + count++ + if count%10000 == 0 { + glog.Infof("%d archived metrics checked.", count) + } + if err := kv.Key(&fp); err != nil { + return err + } + _, fpSeen := fpsSeen[clientmodel.Fingerprint(fp)] + inMemory := false + if fpSeen { + _, inMemory = fpToSeries[clientmodel.Fingerprint(fp)] + } + if !fpSeen || inMemory { + if inMemory { + glog.Warningf("Archive clean-up: Fingerprint %v is not archived. Purging from archive indexes.", clientmodel.Fingerprint(fp)) + } + if !fpSeen { + glog.Warningf("Archive clean-up: Fingerprint %v is unknown. Purging from archive indexes.", clientmodel.Fingerprint(fp)) + } + if err := p.archivedFingerprintToMetrics.Delete(fp); err != nil { + return err + } + // Delete from timerange index, too. + p.archivedFingerprintToTimeRange.Delete(fp) + // TODO: Ignoring errors here as fp might not be in + // timerange index (which is good) but which would + // return an error. Delete signature could be changed + // like the Get signature to detect a real error. + return nil + } + // fp is legitimately archived. Make sure it is in timerange index, too. + has, err := p.archivedFingerprintToTimeRange.Has(fp) + if err != nil { + return err + } + if has { + return nil // All good. + } + glog.Warningf("Archive clean-up: Fingerprint %v is not in time-range index. Unarchiving it for recovery.") + if err := p.archivedFingerprintToMetrics.Delete(fp); err != nil { + return err + } + if err := kv.Value(&m); err != nil { + return err + } + series := newMemorySeries(clientmodel.Metric(m), false, math.MinInt64) + cds, err := p.loadChunkDescs(clientmodel.Fingerprint(fp), clientmodel.Now()) + if err != nil { + return err + } + series.chunkDescs = cds + series.chunkDescsOffset = 0 + fpToSeries[clientmodel.Fingerprint(fp)] = series + return nil + }); err != nil { + return err + } + count = 0 + if err := p.archivedFingerprintToTimeRange.ForEach(func(kv index.KeyValueAccessor) error { + count++ + if count%10000 == 0 { + glog.Infof("%d archived time ranges checked.", count) + } + if err := kv.Key(&fp); err != nil { + return err + } + has, err := p.archivedFingerprintToMetrics.Has(fp) + if err != nil { + return err + } + if has { + return nil // All good. + } + glog.Warningf("Archive clean-up: Purging unknown fingerprint %v in time-range index.", fp) + if err := p.archivedFingerprintToTimeRange.Delete(fp); err != nil { + return err + } + return nil + }); err != nil { + return err + } + glog.Info("Clean-up of archive indexes complete.") + return nil +} + +func (p *persistence) rebuildLabelIndexes( + fpToSeries map[clientmodel.Fingerprint]*memorySeries, +) error { + count := 0 + glog.Info("Rebuilding label indexes.") + glog.Info("Indexing metrics in memory.") + for fp, s := range fpToSeries { + p.indexMetric(fp, s.metric) + count++ + if count%10000 == 0 { + glog.Infof("%d metrics queued for indexing.", count) + } + } + glog.Info("Indexing archived metrics.") + var fp codable.Fingerprint + var m codable.Metric + if err := p.archivedFingerprintToMetrics.ForEach(func(kv index.KeyValueAccessor) error { + if err := kv.Key(&fp); err != nil { + return err + } + if err := kv.Value(&m); err != nil { + return err + } + p.indexMetric(clientmodel.Fingerprint(fp), clientmodel.Metric(m)) + count++ + if count%10000 == 0 { + glog.Infof("%d metrics queued for indexing.", count) + } + return nil + }); err != nil { + return err + } + glog.Info("All requests for rebuilding the label indexes queued. (Actual processing may lag behind.)") + return nil +} + +// getFingerprintsForLabelPair returns the fingerprints for the given label +// pair. This method is goroutine-safe but take into account that metrics queued +// for indexing with IndexMetric might not have made it into the index +// yet. (Same applies correspondingly to UnindexMetric.) +func (p *persistence) getFingerprintsForLabelPair(lp metric.LabelPair) (clientmodel.Fingerprints, error) { + fps, _, err := p.labelPairToFingerprints.Lookup(lp) + if err != nil { + return nil, err + } + return fps, nil +} + +// getLabelValuesForLabelName returns the label values for the given label +// name. This method is goroutine-safe but take into account that metrics queued +// for indexing with IndexMetric might not have made it into the index +// yet. (Same applies correspondingly to UnindexMetric.) +func (p *persistence) getLabelValuesForLabelName(ln clientmodel.LabelName) (clientmodel.LabelValues, error) { + lvs, _, err := p.labelNameToLabelValues.Lookup(ln) + if err != nil { + return nil, err + } + return lvs, nil +} + +// persistChunk persists a single chunk of a series. It is the caller's +// responsibility to not modify the chunk concurrently and to not persist or +// drop anything for the same fingerprint concurrently. It returns the +// (zero-based) index of the persisted chunk within the series file. In case of +// an error, the returned index is -1 (to avoid the misconception that the chunk +// was written at position 0). +func (p *persistence) persistChunk(fp clientmodel.Fingerprint, c chunk) (int, error) { + // 1. Open chunk file. + f, err := p.openChunkFileForWriting(fp) + if err != nil { + return -1, err + } + defer f.Close() + + b := bufio.NewWriterSize(f, chunkHeaderLen+p.chunkLen) + + // 2. Write the header (chunk type and first/last times). + err = writeChunkHeader(b, c) + if err != nil { + return -1, err + } + + // 3. Write chunk into file. + err = c.marshal(b) + if err != nil { + return -1, err + } + + // 4. Determine index within the file. + b.Flush() + offset, err := f.Seek(0, os.SEEK_CUR) + if err != nil { + return -1, err + } + index, err := p.chunkIndexForOffset(offset) + if err != nil { + return -1, err + } + + return index - 1, err +} + +// loadChunks loads a group of chunks of a timeseries by their index. The chunk +// with the earliest time will have index 0, the following ones will have +// incrementally larger indexes. The indexOffset denotes the offset to be added to +// each index in indexes. It is the caller's responsibility to not persist or +// drop anything for the same fingerprint concurrently. +func (p *persistence) loadChunks(fp clientmodel.Fingerprint, indexes []int, indexOffset int) ([]chunk, error) { + // TODO: we need to verify at some point that file length is a multiple of + // the chunk size. When is the best time to do this, and where to remember + // it? Right now, we only do it when loading chunkDescs. + f, err := p.openChunkFileForReading(fp) + if err != nil { + return nil, err + } + defer f.Close() + + chunks := make([]chunk, 0, len(indexes)) + typeBuf := make([]byte, 1) + for _, idx := range indexes { + _, err := f.Seek(p.offsetForChunkIndex(idx+indexOffset), os.SEEK_SET) + if err != nil { + return nil, err + } + + n, err := f.Read(typeBuf) + if err != nil { + return nil, err + } + if n != 1 { + panic("read returned != 1 bytes") + } + + _, err = f.Seek(chunkHeaderLen-1, os.SEEK_CUR) + if err != nil { + return nil, err + } + chunk := chunkForType(typeBuf[0]) + chunk.unmarshal(f) + chunks = append(chunks, chunk) + } + return chunks, nil +} + +// loadChunkDescs loads chunkDescs for a series up until a given time. It is +// the caller's responsibility to not persist or drop anything for the same +// fingerprint concurrently. +func (p *persistence) loadChunkDescs(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ([]*chunkDesc, error) { + f, err := p.openChunkFileForReading(fp) + if os.IsNotExist(err) { + return nil, nil + } + if err != nil { + return nil, err + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + return nil, err + } + totalChunkLen := chunkHeaderLen + p.chunkLen + if fi.Size()%int64(totalChunkLen) != 0 { + // TODO: record number of encountered corrupt series files in a metric? + + // Truncate the file size to the nearest multiple of chunkLen. + truncateTo := fi.Size() - fi.Size()%int64(totalChunkLen) + glog.Infof("Bad series file size for %s: %d bytes (no multiple of %d). Truncating to %d bytes.", fp, fi.Size(), totalChunkLen, truncateTo) + // TODO: this doesn't work, as this is a read-only file handle. + if err := f.Truncate(truncateTo); err != nil { + return nil, err + } + } + + numChunks := int(fi.Size()) / totalChunkLen + cds := make([]*chunkDesc, 0, numChunks) + for i := 0; i < numChunks; i++ { + _, err := f.Seek(p.offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, os.SEEK_SET) + if err != nil { + return nil, err + } + + chunkTimesBuf := make([]byte, 16) + _, err = io.ReadAtLeast(f, chunkTimesBuf, 16) + if err != nil { + return nil, err + } + cd := &chunkDesc{ + chunkFirstTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf)), + chunkLastTime: clientmodel.Timestamp(binary.LittleEndian.Uint64(chunkTimesBuf[8:])), + } + if !cd.chunkLastTime.Before(beforeTime) { + // From here on, we have chunkDescs in memory already. + break + } + cds = append(cds, cd) + } + chunkDescOps.WithLabelValues(load).Add(float64(len(cds))) + atomic.AddInt64(&numMemChunkDescs, int64(len(cds))) + return cds, nil +} + +// checkpointSeriesMapAndHeads persists the fingerprint to memory-series mapping +// and all open (non-full) head chunks. Do not call concurrently with +// loadSeriesMapAndHeads. +func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap, fpLocker *fingerprintLocker) (err error) { + glog.Info("Checkpointing in-memory metrics and head chunks...") + begin := time.Now() + f, err := os.OpenFile(p.headsTempFileName(), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0640) + if err != nil { + return + } + + defer func() { + closeErr := f.Close() + if err != nil { + return + } + err = closeErr + if err != nil { + return + } + err = os.Rename(p.headsTempFileName(), p.headsFileName()) + duration := time.Since(begin) + p.checkpointDuration.Set(float64(duration) / float64(time.Millisecond)) + glog.Infof("Done checkpointing in-memory metrics and head chunks in %v.", duration) + }() + + w := bufio.NewWriterSize(f, fileBufSize) + + if _, err = w.WriteString(headsMagicString); err != nil { + return + } + var numberOfSeriesOffset int + if numberOfSeriesOffset, err = codable.EncodeVarint(w, headsFormatVersion); err != nil { + return + } + numberOfSeriesOffset += len(headsMagicString) + numberOfSeriesInHeader := uint64(fingerprintToSeries.length()) + // We have to write the number of series as uint64 because we might need + // to overwrite it later, and a varint might change byte width then. + if err = codable.EncodeUint64(w, numberOfSeriesInHeader); err != nil { + return + } + + iter := fingerprintToSeries.iter() + defer func() { + // Consume the iterator in any case to not leak goroutines. + for _ = range iter { + } + }() + + var realNumberOfSeries uint64 + for m := range iter { + func() { // Wrapped in function to use defer for unlocking the fp. + fpLocker.Lock(m.fp) + defer fpLocker.Unlock(m.fp) + + if len(m.series.chunkDescs) == 0 { + // This series was completely purged or archived in the meantime. Ignore. + return + } + realNumberOfSeries++ + var seriesFlags byte + if m.series.headChunkPersisted { + seriesFlags |= flagHeadChunkPersisted + } + if err = w.WriteByte(seriesFlags); err != nil { + return + } + if err = codable.EncodeUint64(w, uint64(m.fp)); err != nil { + return + } + var buf []byte + buf, err = codable.Metric(m.series.metric).MarshalBinary() + if err != nil { + return + } + w.Write(buf) + if _, err = codable.EncodeVarint(w, int64(m.series.chunkDescsOffset)); err != nil { + return + } + if _, err = codable.EncodeVarint(w, int64(m.series.savedFirstTime)); err != nil { + return + } + if _, err = codable.EncodeVarint(w, int64(len(m.series.chunkDescs))); err != nil { + return + } + for i, chunkDesc := range m.series.chunkDescs { + if m.series.headChunkPersisted || i < len(m.series.chunkDescs)-1 { + if _, err = codable.EncodeVarint(w, int64(chunkDesc.firstTime())); err != nil { + return + } + if _, err = codable.EncodeVarint(w, int64(chunkDesc.lastTime())); err != nil { + return + } + } else { + // This is the non-persisted head chunk. Fully marshal it. + if err = w.WriteByte(chunkType(chunkDesc.chunk)); err != nil { + return + } + if err = chunkDesc.chunk.marshal(w); err != nil { + return + } + } + } + }() + if err != nil { + return + } + } + if err = w.Flush(); err != nil { + return + } + if realNumberOfSeries != numberOfSeriesInHeader { + // The number of series has changed in the meantime. + // Rewrite it in the header. + if _, err = f.Seek(int64(numberOfSeriesOffset), os.SEEK_SET); err != nil { + return + } + if err = codable.EncodeUint64(f, realNumberOfSeries); err != nil { + return + } + } + return +} + +// loadSeriesMapAndHeads loads the fingerprint to memory-series mapping and all +// open (non-full) head chunks. If recoverable corruption is detected, or if the +// dirty flag was set from the beginning, crash recovery is run, which might +// take a while. If an unrecoverable error is encountered, it is returned. Call +// this method during start-up while nothing else is running in storage +// land. This method is utterly goroutine-unsafe. +func (p *persistence) loadSeriesMapAndHeads() (sm *seriesMap, err error) { + var chunksTotal, chunkDescsTotal int64 + fingerprintToSeries := make(map[clientmodel.Fingerprint]*memorySeries) + sm = &seriesMap{m: fingerprintToSeries} + + defer func() { + if sm != nil && p.dirty { + glog.Warning("Persistence layer appears dirty.") + err = p.recoverFromCrash(fingerprintToSeries) + if err != nil { + sm = nil + } + } + if err == nil { + atomic.AddInt64(&numMemChunks, chunksTotal) + atomic.AddInt64(&numMemChunkDescs, chunkDescsTotal) + } + }() + + f, err := os.Open(p.headsFileName()) + if os.IsNotExist(err) { + return sm, nil + } + if err != nil { + glog.Warning("Could not open heads file:", err) + p.dirty = true + return + } + defer f.Close() + r := bufio.NewReaderSize(f, fileBufSize) + + buf := make([]byte, len(headsMagicString)) + if _, err := io.ReadFull(r, buf); err != nil { + glog.Warning("Could not read from heads file:", err) + p.dirty = true + return sm, nil + } + magic := string(buf) + if magic != headsMagicString { + glog.Warningf( + "unexpected magic string, want %q, got %q", + headsMagicString, magic, + ) + p.dirty = true + return + } + if version, err := binary.ReadVarint(r); version != headsFormatVersion || err != nil { + glog.Warningf("unknown heads format version, want %d", headsFormatVersion) + p.dirty = true + return sm, nil + } + numSeries, err := codable.DecodeUint64(r) + if err != nil { + glog.Warning("Could not decode number of series:", err) + p.dirty = true + return sm, nil + } + + for ; numSeries > 0; numSeries-- { + seriesFlags, err := r.ReadByte() + if err != nil { + glog.Warning("Could not read series flags:", err) + p.dirty = true + return sm, nil + } + headChunkPersisted := seriesFlags&flagHeadChunkPersisted != 0 + fp, err := codable.DecodeUint64(r) + if err != nil { + glog.Warning("Could not decode fingerprint:", err) + p.dirty = true + return sm, nil + } + var metric codable.Metric + if err := metric.UnmarshalFromReader(r); err != nil { + glog.Warning("Could not decode metric:", err) + p.dirty = true + return sm, nil + } + chunkDescsOffset, err := binary.ReadVarint(r) + if err != nil { + glog.Warning("Could not decode chunk descriptor offset:", err) + p.dirty = true + return sm, nil + } + savedFirstTime, err := binary.ReadVarint(r) + if err != nil { + glog.Warning("Could not decode saved first time:", err) + p.dirty = true + return sm, nil + } + numChunkDescs, err := binary.ReadVarint(r) + if err != nil { + glog.Warning("Could not decode number of chunk descriptors:", err) + p.dirty = true + return sm, nil + } + chunkDescs := make([]*chunkDesc, numChunkDescs) + chunkDescsTotal += numChunkDescs + + for i := int64(0); i < numChunkDescs; i++ { + if headChunkPersisted || i < numChunkDescs-1 { + firstTime, err := binary.ReadVarint(r) + if err != nil { + glog.Warning("Could not decode first time:", err) + p.dirty = true + return sm, nil + } + lastTime, err := binary.ReadVarint(r) + if err != nil { + glog.Warning("Could not decode last time:", err) + p.dirty = true + return sm, nil + } + chunkDescs[i] = &chunkDesc{ + chunkFirstTime: clientmodel.Timestamp(firstTime), + chunkLastTime: clientmodel.Timestamp(lastTime), + } + } else { + // Non-persisted head chunk. + chunksTotal++ + chunkType, err := r.ReadByte() + if err != nil { + glog.Warning("Could not decode chunk type:", err) + p.dirty = true + return sm, nil + } + chunk := chunkForType(chunkType) + if err := chunk.unmarshal(r); err != nil { + glog.Warning("Could not decode chunk type:", err) + p.dirty = true + return sm, nil + } + chunkDescs[i] = newChunkDesc(chunk) + } + } + + fingerprintToSeries[clientmodel.Fingerprint(fp)] = &memorySeries{ + metric: clientmodel.Metric(metric), + chunkDescs: chunkDescs, + chunkDescsOffset: int(chunkDescsOffset), + savedFirstTime: clientmodel.Timestamp(savedFirstTime), + headChunkPersisted: headChunkPersisted, + } + } + return sm, nil +} + +// dropChunks deletes all chunks from a series whose last sample time is before +// beforeTime. It returns the timestamp of the first sample in the oldest chunk +// _not_ dropped, the number of deleted chunks, and true if all chunks of the +// series have been deleted (in which case the returned timestamp will be 0 and +// must be ignored). It is the caller's responsibility to make sure nothing is +// persisted or loaded for the same fingerprint concurrently. +func (p *persistence) dropChunks(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) ( + firstTimeNotDropped clientmodel.Timestamp, + numDropped int, + allDropped bool, + err error, +) { + defer func() { + if err != nil { + p.setDirty(true) + } + }() + f, err := p.openChunkFileForReading(fp) + if os.IsNotExist(err) { + return 0, 0, true, nil + } + if err != nil { + return 0, 0, false, err + } + defer f.Close() + + // Find the first chunk that should be kept. + var i int + var firstTime clientmodel.Timestamp + for ; ; i++ { + _, err := f.Seek(p.offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, os.SEEK_SET) + if err != nil { + return 0, 0, false, err + } + timeBuf := make([]byte, 16) + _, err = io.ReadAtLeast(f, timeBuf, 16) + if err == io.EOF { + // We ran into the end of the file without finding any chunks that should + // be kept. Remove the whole file. + chunkOps.WithLabelValues(purge).Add(float64(i)) + if err := os.Remove(f.Name()); err != nil { + return 0, 0, true, err + } + return 0, i, true, nil + } + if err != nil { + return 0, 0, false, err + } + lastTime := clientmodel.Timestamp(binary.LittleEndian.Uint64(timeBuf[8:])) + if !lastTime.Before(beforeTime) { + firstTime = clientmodel.Timestamp(binary.LittleEndian.Uint64(timeBuf)) + chunkOps.WithLabelValues(purge).Add(float64(i)) + break + } + } + + // We've found the first chunk that should be kept. Seek backwards to the + // beginning of its header and start copying everything from there into a new + // file. + _, err = f.Seek(-(chunkHeaderFirstTimeOffset + 16), os.SEEK_CUR) + if err != nil { + return 0, 0, false, err + } + + temp, err := os.OpenFile(p.tempFileNameForFingerprint(fp), os.O_WRONLY|os.O_CREATE, 0640) + if err != nil { + return 0, 0, false, err + } + defer temp.Close() + + if _, err := io.Copy(temp, f); err != nil { + return 0, 0, false, err + } + + if err := os.Rename(p.tempFileNameForFingerprint(fp), p.fileNameForFingerprint(fp)); err != nil { + return 0, 0, false, err + } + return firstTime, i, false, nil +} + +// indexMetric queues the given metric for addition to the indexes needed by +// getFingerprintsForLabelPair, getLabelValuesForLabelName, and +// getFingerprintsModifiedBefore. If the queue is full, this method blocks +// until the metric can be queued. This method is goroutine-safe. +func (p *persistence) indexMetric(fp clientmodel.Fingerprint, m clientmodel.Metric) { + p.indexingQueue <- indexingOp{fp, m, add} +} + +// unindexMetric queues references to the given metric for removal from the +// indexes used for getFingerprintsForLabelPair, getLabelValuesForLabelName, and +// getFingerprintsModifiedBefore. The index of fingerprints to archived metrics +// is not affected by this removal. (In fact, never call this method for an +// archived metric. To drop an archived metric, call dropArchivedFingerprint.) +// If the queue is full, this method blocks until the metric can be queued. This +// method is goroutine-safe. +func (p *persistence) unindexMetric(fp clientmodel.Fingerprint, m clientmodel.Metric) { + p.indexingQueue <- indexingOp{fp, m, remove} +} + +// waitForIndexing waits until all items in the indexing queue are processed. If +// queue processing is currently on hold (to gather more ops for batching), this +// method will trigger an immediate start of processing. This method is +// goroutine-safe. +func (p *persistence) waitForIndexing() { + wait := make(chan int) + for { + p.indexingFlush <- wait + if <-wait == 0 { + break + } + } +} + +// archiveMetric persists the mapping of the given fingerprint to the given +// metric, together with the first and last timestamp of the series belonging to +// the metric. The caller must have locked the fingerprint. +func (p *persistence) archiveMetric( + fp clientmodel.Fingerprint, m clientmodel.Metric, first, last clientmodel.Timestamp, +) error { + if err := p.archivedFingerprintToMetrics.Put(codable.Fingerprint(fp), codable.Metric(m)); err != nil { + p.setDirty(true) + return err + } + if err := p.archivedFingerprintToTimeRange.Put(codable.Fingerprint(fp), codable.TimeRange{First: first, Last: last}); err != nil { + p.setDirty(true) + return err + } + return nil +} + +// hasArchivedMetric returns whether the archived metric for the given +// fingerprint exists and if yes, what the first and last timestamp in the +// corresponding series is. This method is goroutine-safe. +func (p *persistence) hasArchivedMetric(fp clientmodel.Fingerprint) ( + hasMetric bool, firstTime, lastTime clientmodel.Timestamp, err error, +) { + firstTime, lastTime, hasMetric, err = p.archivedFingerprintToTimeRange.Lookup(fp) + return +} + +// updateArchivedTimeRange updates an archived time range. The caller must make +// sure that the fingerprint is currently archived (the time range will +// otherwise be added without the corresponding metric in the archive). +func (p *persistence) updateArchivedTimeRange( + fp clientmodel.Fingerprint, first, last clientmodel.Timestamp, +) error { + return p.archivedFingerprintToTimeRange.Put(codable.Fingerprint(fp), codable.TimeRange{First: first, Last: last}) +} + +// getFingerprintsModifiedBefore returns the fingerprints of archived timeseries +// that have live samples before the provided timestamp. This method is +// goroutine-safe. +func (p *persistence) getFingerprintsModifiedBefore(beforeTime clientmodel.Timestamp) ([]clientmodel.Fingerprint, error) { + var fp codable.Fingerprint + var tr codable.TimeRange + fps := []clientmodel.Fingerprint{} + p.archivedFingerprintToTimeRange.ForEach(func(kv index.KeyValueAccessor) error { + if err := kv.Value(&tr); err != nil { + return err + } + if tr.First.Before(beforeTime) { + if err := kv.Key(&fp); err != nil { + return err + } + fps = append(fps, clientmodel.Fingerprint(fp)) + } + return nil + }) + return fps, nil +} + +// getArchivedMetric retrieves the archived metric with the given +// fingerprint. This method is goroutine-safe. +func (p *persistence) getArchivedMetric(fp clientmodel.Fingerprint) (clientmodel.Metric, error) { + metric, _, err := p.archivedFingerprintToMetrics.Lookup(fp) + return metric, err +} + +// dropArchivedMetric deletes an archived fingerprint and its corresponding +// metric entirely. It also queues the metric for un-indexing (no need to call +// unindexMetric for the deleted metric.) The caller must have locked the +// fingerprint. +func (p *persistence) dropArchivedMetric(fp clientmodel.Fingerprint) (err error) { + defer func() { + if err != nil { + p.setDirty(true) + } + }() + + metric, err := p.getArchivedMetric(fp) + if err != nil || metric == nil { + return err + } + if err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp)); err != nil { + return err + } + if err := p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp)); err != nil { + return err + } + p.unindexMetric(fp, metric) + return nil +} + +// unarchiveMetric deletes an archived fingerprint and its metric, but (in +// contrast to dropArchivedMetric) does not un-index the metric. If a metric +// was actually deleted, the method returns true and the first time of the +// deleted metric. The caller must have locked the fingerprint. +func (p *persistence) unarchiveMetric(fp clientmodel.Fingerprint) ( + deletedAnything bool, + firstDeletedTime clientmodel.Timestamp, + err error, +) { + defer func() { + if err != nil { + p.setDirty(true) + } + }() + + firstTime, _, has, err := p.archivedFingerprintToTimeRange.Lookup(fp) + if err != nil || !has { + return false, firstTime, err + } + if err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp)); err != nil { + return false, firstTime, err + } + if err := p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp)); err != nil { + return false, firstTime, err + } + return true, firstTime, nil +} + +// close flushes the indexing queue and other buffered data and releases any +// held resources. It also removes the dirty marker file if successful and if +// the persistence is currently not marked as dirty. +func (p *persistence) close() error { + close(p.indexingQueue) + <-p.indexingStopped + + var lastError error + if err := p.archivedFingerprintToMetrics.Close(); err != nil { + lastError = err + glog.Error("Error closing archivedFingerprintToMetric index DB: ", err) + } + if err := p.archivedFingerprintToTimeRange.Close(); err != nil { + lastError = err + glog.Error("Error closing archivedFingerprintToTimeRange index DB: ", err) + } + if err := p.labelPairToFingerprints.Close(); err != nil { + lastError = err + glog.Error("Error closing labelPairToFingerprints index DB: ", err) + } + if err := p.labelNameToLabelValues.Close(); err != nil { + lastError = err + glog.Error("Error closing labelNameToLabelValues index DB: ", err) + } + if lastError == nil && !p.isDirty() { + lastError = os.Remove(p.dirtyFileName()) + } + return lastError +} + +func (p *persistence) dirNameForFingerprint(fp clientmodel.Fingerprint) string { + fpStr := fp.String() + return path.Join(p.basePath, fpStr[0:seriesDirNameLen]) +} + +func (p *persistence) fileNameForFingerprint(fp clientmodel.Fingerprint) string { + fpStr := fp.String() + return path.Join(p.basePath, fpStr[0:seriesDirNameLen], fpStr[seriesDirNameLen:]+seriesFileSuffix) +} + +func (p *persistence) tempFileNameForFingerprint(fp clientmodel.Fingerprint) string { + fpStr := fp.String() + return path.Join(p.basePath, fpStr[0:seriesDirNameLen], fpStr[seriesDirNameLen:]+seriesTempFileSuffix) +} + +func (p *persistence) openChunkFileForWriting(fp clientmodel.Fingerprint) (*os.File, error) { + if err := os.MkdirAll(p.dirNameForFingerprint(fp), 0700); err != nil { + return nil, err + } + return os.OpenFile(p.fileNameForFingerprint(fp), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0640) +} + +func (p *persistence) openChunkFileForReading(fp clientmodel.Fingerprint) (*os.File, error) { + return os.Open(p.fileNameForFingerprint(fp)) +} + +func writeChunkHeader(w io.Writer, c chunk) error { + header := make([]byte, chunkHeaderLen) + header[chunkHeaderTypeOffset] = chunkType(c) + binary.LittleEndian.PutUint64(header[chunkHeaderFirstTimeOffset:], uint64(c.firstTime())) + binary.LittleEndian.PutUint64(header[chunkHeaderLastTimeOffset:], uint64(c.lastTime())) + _, err := w.Write(header) + return err +} + +func (p *persistence) offsetForChunkIndex(i int) int64 { + return int64(i * (chunkHeaderLen + p.chunkLen)) +} + +func (p *persistence) chunkIndexForOffset(offset int64) (int, error) { + if int(offset)%(chunkHeaderLen+p.chunkLen) != 0 { + return -1, fmt.Errorf( + "offset %d is not a multiple of on-disk chunk length %d", + offset, chunkHeaderLen+p.chunkLen, + ) + } + return int(offset) / (chunkHeaderLen + p.chunkLen), nil +} + +func (p *persistence) headsFileName() string { + return path.Join(p.basePath, headsFileName) +} + +func (p *persistence) headsTempFileName() string { + return path.Join(p.basePath, headsTempFileName) +} + +func (p *persistence) processIndexingQueue() { + batchSize := 0 + nameToValues := index.LabelNameLabelValuesMapping{} + pairToFPs := index.LabelPairFingerprintsMapping{} + batchTimeout := time.NewTimer(indexingBatchTimeout) + defer batchTimeout.Stop() + + commitBatch := func() { + p.indexingBatchSizes.Observe(float64(batchSize)) + defer func(begin time.Time) { + p.indexingBatchLatency.Observe(float64(time.Since(begin) / time.Millisecond)) + }(time.Now()) + + if err := p.labelPairToFingerprints.IndexBatch(pairToFPs); err != nil { + glog.Error("Error indexing label pair to fingerprints batch: ", err) + } + if err := p.labelNameToLabelValues.IndexBatch(nameToValues); err != nil { + glog.Error("Error indexing label name to label values batch: ", err) + } + batchSize = 0 + nameToValues = index.LabelNameLabelValuesMapping{} + pairToFPs = index.LabelPairFingerprintsMapping{} + batchTimeout.Reset(indexingBatchTimeout) + } + + var flush chan chan int +loop: + for { + // Only process flush requests if the queue is currently empty. + if len(p.indexingQueue) == 0 { + flush = p.indexingFlush + } else { + flush = nil + } + select { + case <-batchTimeout.C: + // Only commit if we have something to commit _and_ + // nothing is waiting in the queue to be picked up. That + // prevents a death spiral if the LookupSet calls below + // are slow for some reason. + if batchSize > 0 && len(p.indexingQueue) == 0 { + commitBatch() + } else { + batchTimeout.Reset(indexingBatchTimeout) + } + case r := <-flush: + if batchSize > 0 { + commitBatch() + } + r <- len(p.indexingQueue) + case op, ok := <-p.indexingQueue: + if !ok { + if batchSize > 0 { + commitBatch() + } + break loop + } + + batchSize++ + for ln, lv := range op.metric { + lp := metric.LabelPair{Name: ln, Value: lv} + baseFPs, ok := pairToFPs[lp] + if !ok { + var err error + baseFPs, _, err = p.labelPairToFingerprints.LookupSet(lp) + if err != nil { + glog.Errorf("Error looking up label pair %v: %s", lp, err) + continue + } + pairToFPs[lp] = baseFPs + } + baseValues, ok := nameToValues[ln] + if !ok { + var err error + baseValues, _, err = p.labelNameToLabelValues.LookupSet(ln) + if err != nil { + glog.Errorf("Error looking up label name %v: %s", ln, err) + continue + } + nameToValues[ln] = baseValues + } + switch op.opType { + case add: + baseFPs[op.fingerprint] = struct{}{} + baseValues[lv] = struct{}{} + case remove: + delete(baseFPs, op.fingerprint) + if len(baseFPs) == 0 { + delete(baseValues, lv) + } + default: + panic("unknown op type") + } + } + + if batchSize >= indexingMaxBatchSize { + commitBatch() + } + } + } + close(p.indexingStopped) +} + +// exists returns true when the given file or directory exists. +func exists(path string) (bool, error) { + _, err := os.Stat(path) + if err == nil { + return true, nil + } + if os.IsNotExist(err) { + return false, nil + } + + return false, err +} diff --git a/storage/local/persistence_test.go b/storage/local/persistence_test.go new file mode 100644 index 0000000000..a395a63601 --- /dev/null +++ b/storage/local/persistence_test.go @@ -0,0 +1,623 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "reflect" + "testing" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/local/codable" + "github.com/prometheus/prometheus/storage/local/index" + "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/utility/test" +) + +var ( + m1 = clientmodel.Metric{"label": "value1"} + m2 = clientmodel.Metric{"label": "value2"} + m3 = clientmodel.Metric{"label": "value3"} +) + +func newTestPersistence(t *testing.T) (*persistence, test.Closer) { + dir := test.NewTemporaryDirectory("test_persistence", t) + p, err := newPersistence(dir.Path(), 1024, false) + if err != nil { + dir.Close() + t.Fatal(err) + } + return p, test.NewCallbackCloser(func() { + p.close() + dir.Close() + }) +} + +func buildTestChunks() map[clientmodel.Fingerprint][]chunk { + fps := clientmodel.Fingerprints{ + m1.Fingerprint(), + m2.Fingerprint(), + m3.Fingerprint(), + } + fpToChunks := map[clientmodel.Fingerprint][]chunk{} + + for _, fp := range fps { + fpToChunks[fp] = make([]chunk, 0, 10) + for i := 0; i < 10; i++ { + fpToChunks[fp] = append(fpToChunks[fp], newDeltaEncodedChunk(d1, d1, true).add(&metric.SamplePair{ + Timestamp: clientmodel.Timestamp(i), + Value: clientmodel.SampleValue(fp), + })[0]) + } + } + return fpToChunks +} + +func chunksEqual(c1, c2 chunk) bool { + values2 := c2.values() + for v1 := range c1.values() { + v2 := <-values2 + if !v1.Equal(v2) { + return false + } + } + return true +} + +func TestPersistLoadDropChunks(t *testing.T) { + p, closer := newTestPersistence(t) + defer closer.Close() + + fpToChunks := buildTestChunks() + + for fp, chunks := range fpToChunks { + for i, c := range chunks { + index, err := p.persistChunk(fp, c) + if err != nil { + t.Fatal(err) + } + if i != index { + t.Errorf("Want chunk index %d, got %d.", i, index) + } + } + } + + for fp, expectedChunks := range fpToChunks { + indexes := make([]int, 0, len(expectedChunks)) + for i := range expectedChunks { + indexes = append(indexes, i) + } + actualChunks, err := p.loadChunks(fp, indexes, 0) + if err != nil { + t.Fatal(err) + } + for _, i := range indexes { + if !chunksEqual(expectedChunks[i], actualChunks[i]) { + t.Errorf("%d. Chunks not equal.", i) + } + } + // Load all chunk descs. + actualChunkDescs, err := p.loadChunkDescs(fp, 10) + if len(actualChunkDescs) != 10 { + t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 10) + } + for i, cd := range actualChunkDescs { + if cd.firstTime() != clientmodel.Timestamp(i) || cd.lastTime() != clientmodel.Timestamp(i) { + t.Errorf( + "Want ts=%v, got firstTime=%v, lastTime=%v.", + i, cd.firstTime(), cd.lastTime(), + ) + } + + } + // Load chunk descs partially. + actualChunkDescs, err = p.loadChunkDescs(fp, 5) + if len(actualChunkDescs) != 5 { + t.Errorf("Got %d chunkDescs, want %d.", len(actualChunkDescs), 5) + } + for i, cd := range actualChunkDescs { + if cd.firstTime() != clientmodel.Timestamp(i) || cd.lastTime() != clientmodel.Timestamp(i) { + t.Errorf( + "Want ts=%v, got firstTime=%v, lastTime=%v.", + i, cd.firstTime(), cd.lastTime(), + ) + } + + } + } + // Drop half of the chunks. + for fp, expectedChunks := range fpToChunks { + firstTime, numDropped, allDropped, err := p.dropChunks(fp, 5) + if err != nil { + t.Fatal(err) + } + if firstTime != 5 { + t.Errorf("want first time 5, got %d", firstTime) + } + if numDropped != 5 { + t.Errorf("want 5 dropped chunks, got %v", numDropped) + } + if allDropped { + t.Error("all chunks dropped") + } + indexes := make([]int, 5) + for i := range indexes { + indexes[i] = i + } + actualChunks, err := p.loadChunks(fp, indexes, 0) + if err != nil { + t.Fatal(err) + } + for _, i := range indexes { + if !chunksEqual(expectedChunks[i+5], actualChunks[i]) { + t.Errorf("%d. Chunks not equal.", i) + } + } + } + // Drop all the chunks. + for fp := range fpToChunks { + firstTime, numDropped, allDropped, err := p.dropChunks(fp, 100) + if firstTime != 0 { + t.Errorf("want first time 0, got %d", firstTime) + } + if err != nil { + t.Fatal(err) + } + if numDropped != 5 { + t.Errorf("want 5 dropped chunks, got %v", numDropped) + } + if !allDropped { + t.Error("not all chunks dropped") + } + } +} + +func TestCheckpointAndLoadSeriesMapAndHeads(t *testing.T) { + p, closer := newTestPersistence(t) + defer closer.Close() + + fpLocker := newFingerprintLocker(10) + sm := newSeriesMap() + s1 := newMemorySeries(m1, true, 0) + s2 := newMemorySeries(m2, false, 0) + s3 := newMemorySeries(m3, false, 0) + s1.add(m1.Fingerprint(), &metric.SamplePair{Timestamp: 1, Value: 3.14}) + s3.add(m1.Fingerprint(), &metric.SamplePair{Timestamp: 2, Value: 2.7}) + s3.headChunkPersisted = true + sm.put(m1.Fingerprint(), s1) + sm.put(m2.Fingerprint(), s2) + sm.put(m3.Fingerprint(), s3) + + if err := p.checkpointSeriesMapAndHeads(sm, fpLocker); err != nil { + t.Fatal(err) + } + + loadedSM, err := p.loadSeriesMapAndHeads() + if err != nil { + t.Fatal(err) + } + if loadedSM.length() != 2 { + t.Errorf("want 2 series in map, got %d", loadedSM.length()) + } + if loadedS1, ok := loadedSM.get(m1.Fingerprint()); ok { + if !reflect.DeepEqual(loadedS1.metric, m1) { + t.Errorf("want metric %v, got %v", m1, loadedS1.metric) + } + if !reflect.DeepEqual(loadedS1.head().chunk, s1.head().chunk) { + t.Error("head chunks differ") + } + if loadedS1.chunkDescsOffset != 0 { + t.Errorf("want chunkDescsOffset 0, got %d", loadedS1.chunkDescsOffset) + } + if loadedS1.headChunkPersisted { + t.Error("headChunkPersisted is true") + } + } else { + t.Errorf("couldn't find %v in loaded map", m1) + } + if loadedS3, ok := loadedSM.get(m3.Fingerprint()); ok { + if !reflect.DeepEqual(loadedS3.metric, m3) { + t.Errorf("want metric %v, got %v", m3, loadedS3.metric) + } + if loadedS3.head().chunk != nil { + t.Error("head chunk not evicted") + } + if loadedS3.chunkDescsOffset != -1 { + t.Errorf("want chunkDescsOffset -1, got %d", loadedS3.chunkDescsOffset) + } + if !loadedS3.headChunkPersisted { + t.Error("headChunkPersisted is false") + } + } else { + t.Errorf("couldn't find %v in loaded map", m1) + } +} + +func TestGetFingerprintsModifiedBefore(t *testing.T) { + p, closer := newTestPersistence(t) + defer closer.Close() + + m1 := clientmodel.Metric{"n1": "v1"} + m2 := clientmodel.Metric{"n2": "v2"} + m3 := clientmodel.Metric{"n1": "v2"} + p.archiveMetric(1, m1, 2, 4) + p.archiveMetric(2, m2, 1, 6) + p.archiveMetric(3, m3, 5, 5) + + expectedFPs := map[clientmodel.Timestamp][]clientmodel.Fingerprint{ + 0: {}, + 1: {}, + 2: {2}, + 3: {1, 2}, + 4: {1, 2}, + 5: {1, 2}, + 6: {1, 2, 3}, + } + + for ts, want := range expectedFPs { + got, err := p.getFingerprintsModifiedBefore(ts) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(want, got) { + t.Errorf("timestamp: %v, want FPs %v, got %v", ts, want, got) + } + } + + unarchived, firstTime, err := p.unarchiveMetric(1) + if err != nil { + t.Fatal(err) + } + if !unarchived { + t.Fatal("expected actual unarchival") + } + if firstTime != 2 { + t.Errorf("expected first time 2, got %v", firstTime) + } + unarchived, firstTime, err = p.unarchiveMetric(1) + if err != nil { + t.Fatal(err) + } + if unarchived { + t.Fatal("expected no unarchival") + } + + expectedFPs = map[clientmodel.Timestamp][]clientmodel.Fingerprint{ + 0: {}, + 1: {}, + 2: {2}, + 3: {2}, + 4: {2}, + 5: {2}, + 6: {2, 3}, + } + + for ts, want := range expectedFPs { + got, err := p.getFingerprintsModifiedBefore(ts) + if err != nil { + t.Fatal(err) + } + if !reflect.DeepEqual(want, got) { + t.Errorf("timestamp: %v, want FPs %v, got %v", ts, want, got) + } + } +} + +func TestDropArchivedMetric(t *testing.T) { + p, closer := newTestPersistence(t) + defer closer.Close() + + m1 := clientmodel.Metric{"n1": "v1"} + m2 := clientmodel.Metric{"n2": "v2"} + p.archiveMetric(1, m1, 2, 4) + p.archiveMetric(2, m2, 1, 6) + p.indexMetric(1, m1) + p.indexMetric(2, m2) + p.waitForIndexing() + + outFPs, err := p.getFingerprintsForLabelPair(metric.LabelPair{Name: "n1", Value: "v1"}) + if err != nil { + t.Fatal(err) + } + want := clientmodel.Fingerprints{1} + if !reflect.DeepEqual(outFPs, want) { + t.Errorf("want %#v, got %#v", want, outFPs) + } + outFPs, err = p.getFingerprintsForLabelPair(metric.LabelPair{Name: "n2", Value: "v2"}) + if err != nil { + t.Fatal(err) + } + want = clientmodel.Fingerprints{2} + if !reflect.DeepEqual(outFPs, want) { + t.Errorf("want %#v, got %#v", want, outFPs) + } + if archived, _, _, err := p.hasArchivedMetric(1); err != nil || !archived { + t.Error("want FP 1 archived") + } + if archived, _, _, err := p.hasArchivedMetric(2); err != nil || !archived { + t.Error("want FP 2 archived") + } + + if err != p.dropArchivedMetric(1) { + t.Fatal(err) + } + if err != p.dropArchivedMetric(3) { + // Dropping something that has not beet archived is not an error. + t.Fatal(err) + } + p.waitForIndexing() + + outFPs, err = p.getFingerprintsForLabelPair(metric.LabelPair{Name: "n1", Value: "v1"}) + if err != nil { + t.Fatal(err) + } + want = nil + if !reflect.DeepEqual(outFPs, want) { + t.Errorf("want %#v, got %#v", want, outFPs) + } + outFPs, err = p.getFingerprintsForLabelPair(metric.LabelPair{Name: "n2", Value: "v2"}) + if err != nil { + t.Fatal(err) + } + want = clientmodel.Fingerprints{2} + if !reflect.DeepEqual(outFPs, want) { + t.Errorf("want %#v, got %#v", want, outFPs) + } + if archived, _, _, err := p.hasArchivedMetric(1); err != nil || archived { + t.Error("want FP 1 not archived") + } + if archived, _, _, err := p.hasArchivedMetric(2); err != nil || !archived { + t.Error("want FP 2 archived") + } +} + +type incrementalBatch struct { + fpToMetric index.FingerprintMetricMapping + expectedLnToLvs index.LabelNameLabelValuesMapping + expectedLpToFps index.LabelPairFingerprintsMapping +} + +func TestIndexing(t *testing.T) { + batches := []incrementalBatch{ + { + fpToMetric: index.FingerprintMetricMapping{ + 0: { + clientmodel.MetricNameLabel: "metric_0", + "label_1": "value_1", + }, + 1: { + clientmodel.MetricNameLabel: "metric_0", + "label_2": "value_2", + "label_3": "value_3", + }, + 2: { + clientmodel.MetricNameLabel: "metric_1", + "label_1": "value_2", + }, + }, + expectedLnToLvs: index.LabelNameLabelValuesMapping{ + clientmodel.MetricNameLabel: codable.LabelValueSet{ + "metric_0": struct{}{}, + "metric_1": struct{}{}, + }, + "label_1": codable.LabelValueSet{ + "value_1": struct{}{}, + "value_2": struct{}{}, + }, + "label_2": codable.LabelValueSet{ + "value_2": struct{}{}, + }, + "label_3": codable.LabelValueSet{ + "value_3": struct{}{}, + }, + }, + expectedLpToFps: index.LabelPairFingerprintsMapping{ + metric.LabelPair{ + Name: clientmodel.MetricNameLabel, + Value: "metric_0", + }: codable.FingerprintSet{0: struct{}{}, 1: struct{}{}}, + metric.LabelPair{ + Name: clientmodel.MetricNameLabel, + Value: "metric_1", + }: codable.FingerprintSet{2: struct{}{}}, + metric.LabelPair{ + Name: "label_1", + Value: "value_1", + }: codable.FingerprintSet{0: struct{}{}}, + metric.LabelPair{ + Name: "label_1", + Value: "value_2", + }: codable.FingerprintSet{2: struct{}{}}, + metric.LabelPair{ + Name: "label_2", + Value: "value_2", + }: codable.FingerprintSet{1: struct{}{}}, + metric.LabelPair{ + Name: "label_3", + Value: "value_3", + }: codable.FingerprintSet{1: struct{}{}}, + }, + }, { + fpToMetric: index.FingerprintMetricMapping{ + 3: { + clientmodel.MetricNameLabel: "metric_0", + "label_1": "value_3", + }, + 4: { + clientmodel.MetricNameLabel: "metric_2", + "label_2": "value_2", + "label_3": "value_1", + }, + 5: { + clientmodel.MetricNameLabel: "metric_1", + "label_1": "value_3", + }, + }, + expectedLnToLvs: index.LabelNameLabelValuesMapping{ + clientmodel.MetricNameLabel: codable.LabelValueSet{ + "metric_0": struct{}{}, + "metric_1": struct{}{}, + "metric_2": struct{}{}, + }, + "label_1": codable.LabelValueSet{ + "value_1": struct{}{}, + "value_2": struct{}{}, + "value_3": struct{}{}, + }, + "label_2": codable.LabelValueSet{ + "value_2": struct{}{}, + }, + "label_3": codable.LabelValueSet{ + "value_1": struct{}{}, + "value_3": struct{}{}, + }, + }, + expectedLpToFps: index.LabelPairFingerprintsMapping{ + metric.LabelPair{ + Name: clientmodel.MetricNameLabel, + Value: "metric_0", + }: codable.FingerprintSet{0: struct{}{}, 1: struct{}{}, 3: struct{}{}}, + metric.LabelPair{ + Name: clientmodel.MetricNameLabel, + Value: "metric_1", + }: codable.FingerprintSet{2: struct{}{}, 5: struct{}{}}, + metric.LabelPair{ + Name: clientmodel.MetricNameLabel, + Value: "metric_2", + }: codable.FingerprintSet{4: struct{}{}}, + metric.LabelPair{ + Name: "label_1", + Value: "value_1", + }: codable.FingerprintSet{0: struct{}{}}, + metric.LabelPair{ + Name: "label_1", + Value: "value_2", + }: codable.FingerprintSet{2: struct{}{}}, + metric.LabelPair{ + Name: "label_1", + Value: "value_3", + }: codable.FingerprintSet{3: struct{}{}, 5: struct{}{}}, + metric.LabelPair{ + Name: "label_2", + Value: "value_2", + }: codable.FingerprintSet{1: struct{}{}, 4: struct{}{}}, + metric.LabelPair{ + Name: "label_3", + Value: "value_1", + }: codable.FingerprintSet{4: struct{}{}}, + metric.LabelPair{ + Name: "label_3", + Value: "value_3", + }: codable.FingerprintSet{1: struct{}{}}, + }, + }, + } + + p, closer := newTestPersistence(t) + defer closer.Close() + + indexedFpsToMetrics := index.FingerprintMetricMapping{} + for i, b := range batches { + for fp, m := range b.fpToMetric { + p.indexMetric(fp, m) + if err := p.archiveMetric(fp, m, 1, 2); err != nil { + t.Fatal(err) + } + indexedFpsToMetrics[fp] = m + } + verifyIndexedState(i, t, b, indexedFpsToMetrics, p) + } + + for i := len(batches) - 1; i >= 0; i-- { + b := batches[i] + verifyIndexedState(i, t, batches[i], indexedFpsToMetrics, p) + for fp, m := range b.fpToMetric { + p.unindexMetric(fp, m) + unarchived, firstTime, err := p.unarchiveMetric(fp) + if err != nil { + t.Fatal(err) + } + if !unarchived { + t.Errorf("%d. metric not unarchived", i) + } + if firstTime != 1 { + t.Errorf("%d. expected firstTime=1, got %v", i, firstTime) + } + delete(indexedFpsToMetrics, fp) + } + } +} + +func verifyIndexedState(i int, t *testing.T, b incrementalBatch, indexedFpsToMetrics index.FingerprintMetricMapping, p *persistence) { + p.waitForIndexing() + for fp, m := range indexedFpsToMetrics { + // Compare archived metrics with input metrics. + mOut, err := p.getArchivedMetric(fp) + if err != nil { + t.Fatal(err) + } + if !mOut.Equal(m) { + t.Errorf("%d. %v: Got: %s; want %s", i, fp, mOut, m) + } + + // Check that archived metrics are in membership index. + has, first, last, err := p.hasArchivedMetric(fp) + if err != nil { + t.Fatal(err) + } + if !has { + t.Errorf("%d. fingerprint %v not found", i, fp) + } + if first != 1 || last != 2 { + t.Errorf( + "%d. %v: Got first: %d, last %d; want first: %d, last %d", + i, fp, first, last, 1, 2, + ) + } + } + + // Compare label name -> label values mappings. + for ln, lvs := range b.expectedLnToLvs { + outLvs, err := p.getLabelValuesForLabelName(ln) + if err != nil { + t.Fatal(err) + } + + outSet := codable.LabelValueSet{} + for _, lv := range outLvs { + outSet[lv] = struct{}{} + } + + if !reflect.DeepEqual(lvs, outSet) { + t.Errorf("%d. label values don't match. Got: %v; want %v", i, outSet, lvs) + } + } + + // Compare label pair -> fingerprints mappings. + for lp, fps := range b.expectedLpToFps { + outFPs, err := p.getFingerprintsForLabelPair(lp) + if err != nil { + t.Fatal(err) + } + + outSet := codable.FingerprintSet{} + for _, fp := range outFPs { + outSet[fp] = struct{}{} + } + + if !reflect.DeepEqual(fps, outSet) { + t.Errorf("%d. %v: fingerprints don't match. Got: %v; want %v", i, lp, outSet, fps) + } + } +} diff --git a/storage/local/preload.go b/storage/local/preload.go new file mode 100644 index 0000000000..1e1cd6c96b --- /dev/null +++ b/storage/local/preload.go @@ -0,0 +1,111 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "time" + + clientmodel "github.com/prometheus/client_golang/model" +) + +// memorySeriesPreloader is a Preloader for the memorySeriesStorage. +type memorySeriesPreloader struct { + storage *memorySeriesStorage + pinnedChunkDescs []*chunkDesc +} + +// PreloadRange implements Preloader. +func (p *memorySeriesPreloader) PreloadRange( + fp clientmodel.Fingerprint, + from clientmodel.Timestamp, through clientmodel.Timestamp, + stalenessDelta time.Duration, +) error { + cds, err := p.storage.preloadChunksForRange(fp, from, through, stalenessDelta) + if err != nil { + return err + } + p.pinnedChunkDescs = append(p.pinnedChunkDescs, cds...) + return nil +} + +/* +// GetMetricAtTime implements Preloader. +func (p *memorySeriesPreloader) GetMetricAtTime(fp clientmodel.Fingerprint, t clientmodel.Timestamp) error { + cds, err := p.storage.preloadChunks(fp, &timeSelector{ + from: t, + through: t, + }) + if err != nil { + return err + } + p.pinnedChunkDescs = append(p.pinnedChunkDescs, cds...) + return nil +} + +// GetMetricAtInterval implements Preloader. +func (p *memorySeriesPreloader) GetMetricAtInterval(fp clientmodel.Fingerprint, from, through clientmodel.Timestamp, interval time.Duration) error { + cds, err := p.storage.preloadChunks(fp, &timeSelector{ + from: from, + through: through, + interval: interval, + }) + if err != nil { + return err + } + p.pinnedChunkDescs = append(p.pinnedChunkDescs, cds...) + return +} + +// GetMetricRange implements Preloader. +func (p *memorySeriesPreloader) GetMetricRange(fp clientmodel.Fingerprint, t clientmodel.Timestamp, rangeDuration time.Duration) error { + cds, err := p.storage.preloadChunks(fp, &timeSelector{ + from: t, + through: t, + rangeDuration: through.Sub(from), + }) + if err != nil { + return err + } + p.pinnedChunkDescs = append(p.pinnedChunkDescs, cds...) + return +} + +// GetMetricRangeAtInterval implements Preloader. +func (p *memorySeriesPreloader) GetMetricRangeAtInterval(fp clientmodel.Fingerprint, from, through clientmodel.Timestamp, interval, rangeDuration time.Duration) error { + cds, err := p.storage.preloadChunks(fp, &timeSelector{ + from: from, + through: through, + interval: interval, + rangeDuration: rangeDuration, + }) + if err != nil { + return err + } + p.pinnedChunkDescs = append(p.pinnedChunkDescs, cds...) + return +} +*/ + +// Close implements Preloader. +func (p *memorySeriesPreloader) Close() { + // TODO: Idea about a primitive but almost free heuristic to not evict + // "recently used" chunks: Do not unpin the chunks right here, but hand + // over the pinnedChunkDescs to a manager that will delay the unpinning + // based on time and memory pressure. + for _, cd := range p.pinnedChunkDescs { + cd.unpin(p.storage.evictRequests) + } + chunkOps.WithLabelValues(unpin).Add(float64(len(p.pinnedChunkDescs))) + +} diff --git a/storage/local/series.go b/storage/local/series.go new file mode 100644 index 0000000000..8910cbd6f5 --- /dev/null +++ b/storage/local/series.go @@ -0,0 +1,570 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "math" + "sort" + "sync" + "sync/atomic" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/metric" +) + +// chunkDescEvictionFactor is a factor used for chunkDesc eviction (as opposed +// to evictions of chunks, see method evictOlderThan. A chunk takes about 20x +// more memory than a chunkDesc. With a chunkDescEvictionFactor of 10, not more +// than a third of the total memory taken by a series will be used for +// chunkDescs. +const chunkDescEvictionFactor = 10 + +// fingerprintSeriesPair pairs a fingerprint with a memorySeries pointer. +type fingerprintSeriesPair struct { + fp clientmodel.Fingerprint + series *memorySeries +} + +// seriesMap maps fingerprints to memory series. All its methods are +// goroutine-safe. A SeriesMap is effectively is a goroutine-safe version of +// map[clientmodel.Fingerprint]*memorySeries. +type seriesMap struct { + mtx sync.RWMutex + m map[clientmodel.Fingerprint]*memorySeries +} + +// newSeriesMap returns a newly allocated empty seriesMap. To create a seriesMap +// based on a prefilled map, use an explicit initializer. +func newSeriesMap() *seriesMap { + return &seriesMap{m: make(map[clientmodel.Fingerprint]*memorySeries)} +} + +// length returns the number of mappings in the seriesMap. +func (sm *seriesMap) length() int { + sm.mtx.RLock() + defer sm.mtx.RUnlock() + + return len(sm.m) +} + +// get returns a memorySeries for a fingerprint. Return values have the same +// semantics as the native Go map. +func (sm *seriesMap) get(fp clientmodel.Fingerprint) (s *memorySeries, ok bool) { + sm.mtx.RLock() + defer sm.mtx.RUnlock() + + s, ok = sm.m[fp] + return +} + +// put adds a mapping to the seriesMap. It panics if s == nil. +func (sm *seriesMap) put(fp clientmodel.Fingerprint, s *memorySeries) { + sm.mtx.Lock() + defer sm.mtx.Unlock() + + if s == nil { + panic("tried to add nil pointer to seriesMap") + } + sm.m[fp] = s +} + +// del removes a mapping from the series Map. +func (sm *seriesMap) del(fp clientmodel.Fingerprint) { + sm.mtx.Lock() + defer sm.mtx.Unlock() + + delete(sm.m, fp) +} + +// iter returns a channel that produces all mappings in the seriesMap. The +// channel will be closed once all fingerprints have been received. Not +// consuming all fingerprints from the channel will leak a goroutine. The +// semantics of concurrent modification of seriesMap is the similar as the one +// for iterating over a map with a 'range' clause. However, if the next element +// in iteration order is removed after the current element has been received +// from the channel, it will still be produced by the channel. +func (sm *seriesMap) iter() <-chan fingerprintSeriesPair { + ch := make(chan fingerprintSeriesPair) + go func() { + sm.mtx.RLock() + for fp, s := range sm.m { + sm.mtx.RUnlock() + ch <- fingerprintSeriesPair{fp, s} + sm.mtx.RLock() + } + sm.mtx.RUnlock() + close(ch) + }() + return ch +} + +// fpIter returns a channel that produces all fingerprints in the seriesMap. The +// channel will be closed once all fingerprints have been received. Not +// consuming all fingerprints from the channel will leak a goroutine. The +// semantics of concurrent modification of seriesMap is the similar as the one +// for iterating over a map with a 'range' clause. However, if the next element +// in iteration order is removed after the current element has been received +// from the channel, it will still be produced by the channel. +func (sm *seriesMap) fpIter() <-chan clientmodel.Fingerprint { + ch := make(chan clientmodel.Fingerprint) + go func() { + sm.mtx.RLock() + for fp := range sm.m { + sm.mtx.RUnlock() + ch <- fp + sm.mtx.RLock() + } + sm.mtx.RUnlock() + close(ch) + }() + return ch +} + +type memorySeries struct { + metric clientmodel.Metric + // Sorted by start time, overlapping chunk ranges are forbidden. + chunkDescs []*chunkDesc + // The chunkDescs in memory might not have all the chunkDescs for the + // chunks that are persisted to disk. The missing chunkDescs are all + // contiguous and at the tail end. chunkDescsOffset is the index of the + // chunk on disk that corresponds to the first chunkDesc in memory. If + // it is 0, the chunkDescs are all loaded. A value of -1 denotes a + // special case: There are chunks on disk, but the offset to the + // chunkDescs in memory is unknown. Also, there is no overlap between + // chunks on disk and chunks in memory (implying that upon first + // persisting of a chunk in memory, the offset has to be set). + chunkDescsOffset int + // The savedFirstTime field is used as a fallback when the + // chunkDescsOffset is not 0. It can be used to save the firstTime of the + // first chunk before its chunk desc is evicted. In doubt, this field is + // just set to the oldest possible timestamp. + savedFirstTime clientmodel.Timestamp + // Whether the current head chunk has already been scheduled to be + // persisted. If true, the current head chunk must not be modified + // anymore. + headChunkPersisted bool + // Whether the current head chunk is used by an iterator. In that case, + // a non-persisted head chunk has to be cloned before more samples are + // appended. + headChunkUsedByIterator bool +} + +// newMemorySeries returns a pointer to a newly allocated memorySeries for the +// given metric. reallyNew defines if the memorySeries is a genuinely new series +// or (if false) a series for a metric being unarchived, i.e. a series that +// existed before but has been evicted from memory. If reallyNew is false, +// firstTime is ignored (and set to the lowest possible timestamp instead - it +// will be set properly upon the first eviction of chunkDescs). +func newMemorySeries(m clientmodel.Metric, reallyNew bool, firstTime clientmodel.Timestamp) *memorySeries { + if reallyNew { + firstTime = math.MinInt64 + } + s := memorySeries{ + metric: m, + headChunkPersisted: !reallyNew, + savedFirstTime: firstTime, + } + if !reallyNew { + s.chunkDescsOffset = -1 + } + return &s +} + +// add adds a sample pair to the series. +// It returns chunkDescs that must be queued to be persisted. +// The caller must have locked the fingerprint of the series. +func (s *memorySeries) add(fp clientmodel.Fingerprint, v *metric.SamplePair) []*chunkDesc { + if len(s.chunkDescs) == 0 || s.headChunkPersisted { + newHead := newChunkDesc(newDeltaEncodedChunk(d1, d0, true)) + s.chunkDescs = append(s.chunkDescs, newHead) + s.headChunkPersisted = false + } else if s.headChunkUsedByIterator && s.head().getRefCount() > 1 { + // We only need to clone the head chunk if the current head + // chunk was used in an iterator at all and if the refCount is + // still greater than the 1 we always have because the head + // chunk is not yet persisted. The latter is just an + // approximation. We will still clone unnecessarily if an older + // iterator using a previous version of the head chunk is still + // around and keep the head chunk pinned. We needed to track + // pins by version of the head chunk, which is probably not + // worth the effort. + chunkOps.WithLabelValues(clone).Inc() + // No locking needed here because a non-persisted head chunk can + // not get evicted concurrently. + s.head().chunk = s.head().chunk.clone() + s.headChunkUsedByIterator = false + } + + chunks := s.head().add(v) + s.head().chunk = chunks[0] + + var chunkDescsToPersist []*chunkDesc + if len(chunks) > 1 { + chunkDescsToPersist = append(chunkDescsToPersist, s.head()) + for i, c := range chunks[1:] { + cd := newChunkDesc(c) + s.chunkDescs = append(s.chunkDescs, cd) + // The last chunk is still growing. + if i < len(chunks[1:])-1 { + chunkDescsToPersist = append(chunkDescsToPersist, cd) + } + } + } + return chunkDescsToPersist +} + +// evictChunkDescs evicts chunkDescs if there are chunkDescEvictionFactor times +// more than non-evicted chunks. iOldestNotEvicted is the index within the +// current chunkDescs of the oldest chunk that is not evicted. +func (s *memorySeries) evictChunkDescs(iOldestNotEvicted int) { + lenToKeep := chunkDescEvictionFactor * (len(s.chunkDescs) - iOldestNotEvicted) + if lenToKeep < len(s.chunkDescs) { + s.savedFirstTime = s.firstTime() + lenEvicted := len(s.chunkDescs) - lenToKeep + s.chunkDescsOffset += lenEvicted + chunkDescOps.WithLabelValues(evict).Add(float64(lenEvicted)) + atomic.AddInt64(&numMemChunkDescs, -int64(lenEvicted)) + s.chunkDescs = append( + make([]*chunkDesc, 0, lenToKeep), + s.chunkDescs[lenEvicted:]..., + ) + } +} + +// purgeOlderThan removes chunkDescs older than t. It returns the number of +// purged chunkDescs and true if all chunkDescs have been purged. +// +// The caller must have locked the fingerprint of the series. +func (s *memorySeries) purgeOlderThan(t clientmodel.Timestamp) (int, bool) { + keepIdx := len(s.chunkDescs) + for i, cd := range s.chunkDescs { + if !cd.lastTime().Before(t) { + keepIdx = i + break + } + } + if keepIdx > 0 { + s.chunkDescs = append(make([]*chunkDesc, 0, len(s.chunkDescs)-keepIdx), s.chunkDescs[keepIdx:]...) + atomic.AddInt64(&numMemChunkDescs, -int64(keepIdx)) + } + return keepIdx, len(s.chunkDescs) == 0 +} + +// preloadChunks is an internal helper method. +func (s *memorySeries) preloadChunks(indexes []int, mss *memorySeriesStorage) ([]*chunkDesc, error) { + loadIndexes := []int{} + pinnedChunkDescs := make([]*chunkDesc, 0, len(indexes)) + for _, idx := range indexes { + cd := s.chunkDescs[idx] + pinnedChunkDescs = append(pinnedChunkDescs, cd) + cd.pin(mss.evictRequests) // Have to pin everything first to prevent immediate eviction on chunk loading. + if cd.isEvicted() { + loadIndexes = append(loadIndexes, idx) + } + } + chunkOps.WithLabelValues(pin).Add(float64(len(pinnedChunkDescs))) + + if len(loadIndexes) > 0 { + if s.chunkDescsOffset == -1 { + panic("requested loading chunks from persistence in a situation where we must not have persisted data for chunk descriptors in memory") + } + fp := s.metric.Fingerprint() + // TODO: Remove law-of-Demeter violation? + chunks, err := mss.persistence.loadChunks(fp, loadIndexes, s.chunkDescsOffset) + if err != nil { + // Unpin the chunks since we won't return them as pinned chunks now. + for _, cd := range pinnedChunkDescs { + cd.unpin(mss.evictRequests) + } + chunkOps.WithLabelValues(unpin).Add(float64(len(pinnedChunkDescs))) + return nil, err + } + for i, c := range chunks { + s.chunkDescs[loadIndexes[i]].setChunk(c) + } + chunkOps.WithLabelValues(load).Add(float64(len(chunks))) + atomic.AddInt64(&numMemChunks, int64(len(chunks))) + } + + return pinnedChunkDescs, nil +} + +/* +func (s *memorySeries) preloadChunksAtTime(t clientmodel.Timestamp, p *persistence) (chunkDescs, error) { + s.mtx.Lock() + defer s.mtx.Unlock() + + if len(s.chunkDescs) == 0 { + return nil, nil + } + + var pinIndexes []int + // Find first chunk where lastTime() is after or equal to t. + i := sort.Search(len(s.chunkDescs), func(i int) bool { + return !s.chunkDescs[i].lastTime().Before(t) + }) + switch i { + case 0: + pinIndexes = []int{0} + case len(s.chunkDescs): + pinIndexes = []int{i - 1} + default: + if s.chunkDescs[i].contains(t) { + pinIndexes = []int{i} + } else { + pinIndexes = []int{i - 1, i} + } + } + + return s.preloadChunks(pinIndexes, p) +} +*/ + +// preloadChunksForRange loads chunks for the given range from the persistence. +// The caller must have locked the fingerprint of the series. +func (s *memorySeries) preloadChunksForRange( + from clientmodel.Timestamp, through clientmodel.Timestamp, + fp clientmodel.Fingerprint, mss *memorySeriesStorage, +) ([]*chunkDesc, error) { + firstChunkDescTime := clientmodel.Timestamp(math.MaxInt64) + if len(s.chunkDescs) > 0 { + firstChunkDescTime = s.chunkDescs[0].firstTime() + } + if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) { + // TODO: Remove law-of-demeter violation? + cds, err := mss.persistence.loadChunkDescs(fp, firstChunkDescTime) + if err != nil { + return nil, err + } + s.chunkDescs = append(cds, s.chunkDescs...) + s.chunkDescsOffset = 0 + } + + if len(s.chunkDescs) == 0 { + return nil, nil + } + + // Find first chunk with start time after "from". + fromIdx := sort.Search(len(s.chunkDescs), func(i int) bool { + return s.chunkDescs[i].firstTime().After(from) + }) + // Find first chunk with start time after "through". + throughIdx := sort.Search(len(s.chunkDescs), func(i int) bool { + return s.chunkDescs[i].firstTime().After(through) + }) + if fromIdx > 0 { + fromIdx-- + } + if throughIdx == len(s.chunkDescs) { + throughIdx-- + } + + pinIndexes := make([]int, 0, throughIdx-fromIdx+1) + for i := fromIdx; i <= throughIdx; i++ { + pinIndexes = append(pinIndexes, i) + } + return s.preloadChunks(pinIndexes, mss) +} + +// newIterator returns a new SeriesIterator. The caller must have locked the +// fingerprint of the memorySeries. +func (s *memorySeries) newIterator(lockFunc, unlockFunc func()) SeriesIterator { + chunks := make([]chunk, 0, len(s.chunkDescs)) + for i, cd := range s.chunkDescs { + if !cd.isEvicted() { + if i == len(s.chunkDescs)-1 && !s.headChunkPersisted { + s.headChunkUsedByIterator = true + } + chunks = append(chunks, cd.chunk) + } + } + + return &memorySeriesIterator{ + lock: lockFunc, + unlock: unlockFunc, + chunks: chunks, + } +} + +// head returns a pointer to the head chunk descriptor. The caller must have +// locked the fingerprint of the memorySeries. +func (s *memorySeries) head() *chunkDesc { + return s.chunkDescs[len(s.chunkDescs)-1] +} + +// firstTime returns the timestamp of the first sample in the series. The caller +// must have locked the fingerprint of the memorySeries. +func (s *memorySeries) firstTime() clientmodel.Timestamp { + if s.chunkDescsOffset == 0 && len(s.chunkDescs) > 0 { + return s.chunkDescs[0].firstTime() + } + return s.savedFirstTime +} + +// lastTime returns the timestamp of the last sample in the series. The caller +// must have locked the fingerprint of the memorySeries. +func (s *memorySeries) lastTime() clientmodel.Timestamp { + return s.head().lastTime() +} + +// memorySeriesIterator implements SeriesIterator. +type memorySeriesIterator struct { + lock, unlock func() + chunkIt chunkIterator + chunks []chunk +} + +// GetValueAtTime implements SeriesIterator. +func (it *memorySeriesIterator) GetValueAtTime(t clientmodel.Timestamp) metric.Values { + it.lock() + defer it.unlock() + + // The most common case. We are iterating through a chunk. + if it.chunkIt != nil && it.chunkIt.contains(t) { + return it.chunkIt.getValueAtTime(t) + } + + it.chunkIt = nil + + if len(it.chunks) == 0 { + return nil + } + + // Before or exactly on the first sample of the series. + if !t.After(it.chunks[0].firstTime()) { + // return first value of first chunk + return it.chunks[0].newIterator().getValueAtTime(t) + } + // After or exactly on the last sample of the series. + if !t.Before(it.chunks[len(it.chunks)-1].lastTime()) { + // return last value of last chunk + return it.chunks[len(it.chunks)-1].newIterator().getValueAtTime(t) + } + + // Find first chunk where lastTime() is after or equal to t. + i := sort.Search(len(it.chunks), func(i int) bool { + return !it.chunks[i].lastTime().Before(t) + }) + if i == len(it.chunks) { + panic("out of bounds") + } + + if t.Before(it.chunks[i].firstTime()) { + // We ended up between two chunks. + return metric.Values{ + it.chunks[i-1].newIterator().getValueAtTime(t)[0], + it.chunks[i].newIterator().getValueAtTime(t)[0], + } + } + // We ended up in the middle of a chunk. We might stay there for a while, + // so save it as the current chunk iterator. + it.chunkIt = it.chunks[i].newIterator() + return it.chunkIt.getValueAtTime(t) +} + +// GetBoundaryValues implements SeriesIterator. +func (it *memorySeriesIterator) GetBoundaryValues(in metric.Interval) metric.Values { + it.lock() + defer it.unlock() + + // Find the first relevant chunk. + i := sort.Search(len(it.chunks), func(i int) bool { + return !it.chunks[i].lastTime().Before(in.OldestInclusive) + }) + values := make(metric.Values, 0, 2) + for i, c := range it.chunks[i:] { + var chunkIt chunkIterator + if c.firstTime().After(in.NewestInclusive) { + if len(values) == 1 { + // We found the first value before, but are now + // already past the last value. The value we + // want must be the last value of the previous + // chunk. So backtrack... + chunkIt = it.chunks[i-1].newIterator() + values = append(values, chunkIt.getValueAtTime(in.NewestInclusive)[0]) + } + break + } + if len(values) == 0 { + chunkIt = c.newIterator() + firstValues := chunkIt.getValueAtTime(in.OldestInclusive) + switch len(firstValues) { + case 2: + values = append(values, firstValues[1]) + case 1: + values = firstValues + default: + panic("unexpected return from getValueAtTime") + } + } + if c.lastTime().After(in.NewestInclusive) { + if chunkIt == nil { + chunkIt = c.newIterator() + } + values = append(values, chunkIt.getValueAtTime(in.NewestInclusive)[0]) + break + } + } + if len(values) == 1 { + // We found exactly one value. In that case, add the most recent we know. + values = append( + values, + it.chunks[len(it.chunks)-1].newIterator().getValueAtTime(in.NewestInclusive)[0], + ) + } + if len(values) == 2 && values[0].Equal(&values[1]) { + return values[:1] + } + return values +} + +// GetRangeValues implements SeriesIterator. +func (it *memorySeriesIterator) GetRangeValues(in metric.Interval) metric.Values { + it.lock() + defer it.unlock() + + // Find the first relevant chunk. + i := sort.Search(len(it.chunks), func(i int) bool { + return !it.chunks[i].lastTime().Before(in.OldestInclusive) + }) + values := metric.Values{} + for _, c := range it.chunks[i:] { + if c.firstTime().After(in.NewestInclusive) { + break + } + // TODO: actually reuse an iterator between calls if we get multiple ranges + // from the same chunk. + values = append(values, c.newIterator().getRangeValues(in)...) + } + return values +} + +// nopSeriesIterator implements Series Iterator. It never returns any values. +type nopSeriesIterator struct{} + +// GetValueAtTime implements SeriesIterator. +func (_ nopSeriesIterator) GetValueAtTime(t clientmodel.Timestamp) metric.Values { + return metric.Values{} +} + +// GetBoundaryValues implements SeriesIterator. +func (_ nopSeriesIterator) GetBoundaryValues(in metric.Interval) metric.Values { + return metric.Values{} +} + +// GetRangeValues implements SeriesIterator. +func (_ nopSeriesIterator) GetRangeValues(in metric.Interval) metric.Values { + return metric.Values{} +} diff --git a/storage/local/storage.go b/storage/local/storage.go new file mode 100644 index 0000000000..20d8070835 --- /dev/null +++ b/storage/local/storage.go @@ -0,0 +1,825 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package local contains the local time series storage used by Prometheus. +package local + +import ( + "container/list" + "sync/atomic" + "time" + + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" + + clientmodel "github.com/prometheus/client_golang/model" + + "github.com/prometheus/prometheus/storage/metric" +) + +const ( + persistQueueCap = 1024 + evictRequestsCap = 1024 + chunkLen = 1024 + + // See waitForNextFP. + fpMaxWaitDuration = 10 * time.Second + fpMinWaitDuration = 5 * time.Millisecond // ~ hard disk seek time. + fpMaxSweepTime = 6 * time.Hour + + maxEvictInterval = time.Minute + headChunkTimeout = time.Hour // Close head chunk if not touched for that long. +) + +type storageState uint + +const ( + storageStarting storageState = iota + storageServing + storageStopping +) + +type persistRequest struct { + fingerprint clientmodel.Fingerprint + chunkDesc *chunkDesc +} + +type evictRequest struct { + cd *chunkDesc + evict bool +} + +type memorySeriesStorage struct { + fpLocker *fingerprintLocker + fpToSeries *seriesMap + + loopStopping, loopStopped chan struct{} + maxMemoryChunks int + purgeAfter time.Duration + checkpointInterval time.Duration + + persistQueue chan persistRequest + persistStopped chan struct{} + persistence *persistence + + evictList *list.List + evictRequests chan evictRequest + evictStopping, evictStopped chan struct{} + + persistLatency prometheus.Summary + persistErrors *prometheus.CounterVec + persistQueueLength prometheus.Gauge + numSeries prometheus.Gauge + seriesOps *prometheus.CounterVec + ingestedSamplesCount prometheus.Counter + invalidPreloadRequestsCount prometheus.Counter + purgeDuration prometheus.Gauge +} + +// MemorySeriesStorageOptions contains options needed by +// NewMemorySeriesStorage. It is not safe to leave any of those at their zero +// values. +type MemorySeriesStorageOptions struct { + MemoryChunks int // How many chunks to keep in memory. + PersistenceStoragePath string // Location of persistence files. + PersistenceRetentionPeriod time.Duration // Chunks at least that old are purged. + CheckpointInterval time.Duration // How often to checkpoint the series map and head chunks. + Dirty bool // Force the storage to consider itself dirty on startup. +} + +// NewMemorySeriesStorage returns a newly allocated Storage. Storage.Serve still +// has to be called to start the storage. +func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) (Storage, error) { + p, err := newPersistence(o.PersistenceStoragePath, chunkLen, o.Dirty) + if err != nil { + return nil, err + } + glog.Info("Loading series map and head chunks...") + fpToSeries, err := p.loadSeriesMapAndHeads() + if err != nil { + return nil, err + } + glog.Infof("%d series loaded.", fpToSeries.length()) + numSeries := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "memory_series", + Help: "The current number of series in memory.", + }) + numSeries.Set(float64(fpToSeries.length())) + + return &memorySeriesStorage{ + fpLocker: newFingerprintLocker(256), + fpToSeries: fpToSeries, + + loopStopping: make(chan struct{}), + loopStopped: make(chan struct{}), + maxMemoryChunks: o.MemoryChunks, + purgeAfter: o.PersistenceRetentionPeriod, + checkpointInterval: o.CheckpointInterval, + + persistQueue: make(chan persistRequest, persistQueueCap), + persistStopped: make(chan struct{}), + persistence: p, + + evictList: list.New(), + evictRequests: make(chan evictRequest, evictRequestsCap), + evictStopping: make(chan struct{}), + evictStopped: make(chan struct{}), + + persistLatency: prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "persist_latency_microseconds", + Help: "A summary of latencies for persisting each chunk.", + }), + persistErrors: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "persist_errors_total", + Help: "A counter of errors persisting chunks.", + }, + []string{"error"}, + ), + persistQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "persist_queue_length", + Help: "The current number of chunks waiting in the persist queue.", + }), + numSeries: numSeries, + seriesOps: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "series_ops_total", + Help: "The total number of series operations by their type.", + }, + []string{opTypeLabel}, + ), + ingestedSamplesCount: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "ingested_samples_total", + Help: "The total number of samples ingested.", + }), + invalidPreloadRequestsCount: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "invalid_preload_requests_total", + Help: "The total number of preload requests referring to a non-existent series. This is an indication of outdated label indexes.", + }), + }, nil +} + +// Start implements Storage. +func (s *memorySeriesStorage) Start() { + go s.handleEvictList() + go s.handlePersistQueue() + go s.loop() +} + +// Stop implements Storage. +func (s *memorySeriesStorage) Stop() error { + glog.Info("Stopping local storage...") + + glog.Info("Stopping maintenance loop...") + close(s.loopStopping) + <-s.loopStopped + + glog.Info("Stopping persist queue...") + close(s.persistQueue) + <-s.persistStopped + + glog.Info("Stopping chunk eviction...") + close(s.evictStopping) + <-s.evictStopped + + // One final checkpoint of the series map and the head chunks. + if err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker); err != nil { + return err + } + + if err := s.persistence.close(); err != nil { + return err + } + glog.Info("Local storage stopped.") + return nil +} + +// WaitForIndexing implements Storage. +func (s *memorySeriesStorage) WaitForIndexing() { + s.persistence.waitForIndexing() +} + +// NewIterator implements storage. +func (s *memorySeriesStorage) NewIterator(fp clientmodel.Fingerprint) SeriesIterator { + s.fpLocker.Lock(fp) + defer s.fpLocker.Unlock(fp) + + series, ok := s.fpToSeries.get(fp) + if !ok { + // Oops, no series for fp found. That happens if, after + // preloading is done, the whole series is identified as old + // enough for purging and hence purged for good. As there is no + // data left to iterate over, return an iterator that will never + // return any values. + return nopSeriesIterator{} + } + return series.newIterator( + func() { s.fpLocker.Lock(fp) }, + func() { s.fpLocker.Unlock(fp) }, + ) +} + +// NewPreloader implements Storage. +func (s *memorySeriesStorage) NewPreloader() Preloader { + return &memorySeriesPreloader{ + storage: s, + } +} + +// GetFingerprintsForLabelMatchers implements Storage. +func (s *memorySeriesStorage) GetFingerprintsForLabelMatchers(labelMatchers metric.LabelMatchers) clientmodel.Fingerprints { + var result map[clientmodel.Fingerprint]struct{} + for _, matcher := range labelMatchers { + intersection := map[clientmodel.Fingerprint]struct{}{} + switch matcher.Type { + case metric.Equal: + fps, err := s.persistence.getFingerprintsForLabelPair( + metric.LabelPair{ + Name: matcher.Name, + Value: matcher.Value, + }, + ) + if err != nil { + glog.Error("Error getting fingerprints for label pair: ", err) + } + if len(fps) == 0 { + return nil + } + for _, fp := range fps { + if _, ok := result[fp]; ok || result == nil { + intersection[fp] = struct{}{} + } + } + default: + values, err := s.persistence.getLabelValuesForLabelName(matcher.Name) + if err != nil { + glog.Errorf("Error getting label values for label name %q: %v", matcher.Name, err) + } + matches := matcher.Filter(values) + if len(matches) == 0 { + return nil + } + for _, v := range matches { + fps, err := s.persistence.getFingerprintsForLabelPair( + metric.LabelPair{ + Name: matcher.Name, + Value: v, + }, + ) + if err != nil { + glog.Error("Error getting fingerprints for label pair: ", err) + } + for _, fp := range fps { + if _, ok := result[fp]; ok || result == nil { + intersection[fp] = struct{}{} + } + } + } + } + if len(intersection) == 0 { + return nil + } + result = intersection + } + + fps := make(clientmodel.Fingerprints, 0, len(result)) + for fp := range result { + fps = append(fps, fp) + } + return fps +} + +// GetLabelValuesForLabelName implements Storage. +func (s *memorySeriesStorage) GetLabelValuesForLabelName(labelName clientmodel.LabelName) clientmodel.LabelValues { + lvs, err := s.persistence.getLabelValuesForLabelName(labelName) + if err != nil { + glog.Errorf("Error getting label values for label name %q: %v", labelName, err) + } + return lvs +} + +// GetMetricForFingerprint implements Storage. +func (s *memorySeriesStorage) GetMetricForFingerprint(fp clientmodel.Fingerprint) clientmodel.Metric { + s.fpLocker.Lock(fp) + defer s.fpLocker.Unlock(fp) + + series, ok := s.fpToSeries.get(fp) + if ok { + // Copy required here because caller might mutate the returned + // metric. + m := make(clientmodel.Metric, len(series.metric)) + for ln, lv := range series.metric { + m[ln] = lv + } + return m + } + metric, err := s.persistence.getArchivedMetric(fp) + if err != nil { + glog.Errorf("Error retrieving archived metric for fingerprint %v: %v", fp, err) + } + return metric +} + +// AppendSamples implements Storage. +func (s *memorySeriesStorage) AppendSamples(samples clientmodel.Samples) { + for _, sample := range samples { + s.appendSample(sample) + } + + s.ingestedSamplesCount.Add(float64(len(samples))) +} + +func (s *memorySeriesStorage) appendSample(sample *clientmodel.Sample) { + fp := sample.Metric.Fingerprint() + s.fpLocker.Lock(fp) + series := s.getOrCreateSeries(fp, sample.Metric) + chunkDescsToPersist := series.add(fp, &metric.SamplePair{ + Value: sample.Value, + Timestamp: sample.Timestamp, + }) + s.fpLocker.Unlock(fp) + // Queue only outside of the locked area, processing the persistQueue + // requires the same lock! + for _, cd := range chunkDescsToPersist { + s.persistQueue <- persistRequest{fp, cd} + } +} + +func (s *memorySeriesStorage) getOrCreateSeries(fp clientmodel.Fingerprint, m clientmodel.Metric) *memorySeries { + series, ok := s.fpToSeries.get(fp) + if !ok { + unarchived, firstTime, err := s.persistence.unarchiveMetric(fp) + if err != nil { + glog.Errorf("Error unarchiving fingerprint %v: %v", fp, err) + } + if unarchived { + s.seriesOps.WithLabelValues(unarchive).Inc() + } else { + // This was a genuinely new series, so index the metric. + s.persistence.indexMetric(fp, m) + s.seriesOps.WithLabelValues(create).Inc() + } + series = newMemorySeries(m, !unarchived, firstTime) + s.fpToSeries.put(fp, series) + s.numSeries.Inc() + } + return series +} + +func (s *memorySeriesStorage) preloadChunksForRange( + fp clientmodel.Fingerprint, + from clientmodel.Timestamp, through clientmodel.Timestamp, + stalenessDelta time.Duration, +) ([]*chunkDesc, error) { + s.fpLocker.Lock(fp) + defer s.fpLocker.Unlock(fp) + + series, ok := s.fpToSeries.get(fp) + if !ok { + has, first, last, err := s.persistence.hasArchivedMetric(fp) + if err != nil { + return nil, err + } + if !has { + s.invalidPreloadRequestsCount.Inc() + return nil, nil + } + if from.Add(-stalenessDelta).Before(last) && through.Add(stalenessDelta).After(first) { + metric, err := s.persistence.getArchivedMetric(fp) + if err != nil { + return nil, err + } + series = s.getOrCreateSeries(fp, metric) + } else { + return nil, nil + } + } + return series.preloadChunksForRange(from, through, fp, s) +} + +func (s *memorySeriesStorage) handleEvictList() { + ticker := time.NewTicker(maxEvictInterval) + count := 0 +loop: + for { + // To batch up evictions a bit, this tries evictions at least + // once per evict interval, but earlier if the number of evict + // requests with evict==true that have happened since the last + // evict run is more than maxMemoryChunks/1000. + select { + case req := <-s.evictRequests: + if req.evict { + req.cd.evictListElement = s.evictList.PushBack(req.cd) + count++ + if count > s.maxMemoryChunks/1000 { + s.maybeEvict() + count = 0 + } + } else { + if req.cd.evictListElement != nil { + s.evictList.Remove(req.cd.evictListElement) + req.cd.evictListElement = nil + } + } + case <-ticker.C: + if s.evictList.Len() > 0 { + s.maybeEvict() + } + case <-s.evictStopping: + break loop + } + } + ticker.Stop() + glog.Info("Chunk eviction stopped.") + close(s.evictStopped) +} + +// maybeEvict is a local helper method. Must only be called by handleEvictList. +func (s *memorySeriesStorage) maybeEvict() { + numChunksToEvict := int(atomic.LoadInt64(&numMemChunks)) - s.maxMemoryChunks + if numChunksToEvict <= 0 { + return + } + chunkDescsToEvict := make([]*chunkDesc, numChunksToEvict) + for i := range chunkDescsToEvict { + e := s.evictList.Front() + if e == nil { + break + } + cd := e.Value.(*chunkDesc) + cd.evictListElement = nil + chunkDescsToEvict[i] = cd + s.evictList.Remove(e) + } + // Do the actual eviction in a goroutine as we might otherwise deadlock, + // in the following way: A chunk was unpinned completely and therefore + // scheduled for eviction. At the time we actually try to evict it, + // another goroutine is pinning the chunk. The pinning goroutine has + // currently locked the chunk and tries to send the evict request (to + // remove the chunk from the evict list) to the evictRequests + // channel. The send blocks because evictRequests is full. However, the + // goroutine that is supposed to empty the channel is waiting for the + // chunkDesc lock to try to evict the chunk. + go func() { + for _, cd := range chunkDescsToEvict { + if cd == nil { + break + } + cd.maybeEvict() + // We don't care if the eviction succeeds. If the chunk + // was pinned in the meantime, it will be added to the + // evict list once it gets unpinned again. + } + }() +} + +func (s *memorySeriesStorage) handlePersistQueue() { + for req := range s.persistQueue { + s.persistQueueLength.Set(float64(len(s.persistQueue))) + start := time.Now() + s.fpLocker.Lock(req.fingerprint) + offset, err := s.persistence.persistChunk(req.fingerprint, req.chunkDesc.chunk) + if series, seriesInMemory := s.fpToSeries.get(req.fingerprint); err == nil && seriesInMemory && series.chunkDescsOffset == -1 { + // This is the first chunk persisted for a newly created + // series that had prior chunks on disk. Finally, we can + // set the chunkDescsOffset. + series.chunkDescsOffset = offset + } + s.fpLocker.Unlock(req.fingerprint) + s.persistLatency.Observe(float64(time.Since(start)) / float64(time.Microsecond)) + if err != nil { + s.persistErrors.WithLabelValues(err.Error()).Inc() + glog.Error("Error persisting chunk: ", err) + s.persistence.setDirty(true) + continue + } + req.chunkDesc.unpin(s.evictRequests) + chunkOps.WithLabelValues(persistAndUnpin).Inc() + } + glog.Info("Persist queue drained and stopped.") + close(s.persistStopped) +} + +// waitForNextFP waits an estimated duration, after which we want to process +// another fingerprint so that we will process all fingerprints in a tenth of +// s.purgeAfter assuming that the system is doing nothing else, e.g. if we want +// to purge after 40h, we want to cycle through all fingerprints within +// 4h. However, the maximum sweep time is capped at fpMaxSweepTime. Furthermore, +// this method will always wait for at least fpMinWaitDuration and never longer +// than fpMaxWaitDuration. If s.loopStopped is closed, it will return false +// immediately. The estimation is based on the total number of fingerprints as +// passed in. +func (s *memorySeriesStorage) waitForNextFP(numberOfFPs int) bool { + d := fpMaxWaitDuration + if numberOfFPs != 0 { + sweepTime := s.purgeAfter / 10 + if sweepTime > fpMaxSweepTime { + sweepTime = fpMaxSweepTime + } + d = sweepTime / time.Duration(numberOfFPs) + if d < fpMinWaitDuration { + d = fpMinWaitDuration + } + if d > fpMaxWaitDuration { + d = fpMaxWaitDuration + } + } + t := time.NewTimer(d) + select { + case <-t.C: + return true + case <-s.loopStopping: + return false + } +} + +// cycleThroughMemoryFingerprints returns a channel that emits fingerprints for +// series in memory in a throttled fashion. It continues to cycle through all +// fingerprints in memory until s.loopStopping is closed. +func (s *memorySeriesStorage) cycleThroughMemoryFingerprints() chan clientmodel.Fingerprint { + memoryFingerprints := make(chan clientmodel.Fingerprint) + go func() { + var fpIter <-chan clientmodel.Fingerprint + + defer func() { + if fpIter != nil { + for _ = range fpIter { + // Consume the iterator. + } + } + close(memoryFingerprints) + }() + + for { + // Initial wait, also important if there are no FPs yet. + if !s.waitForNextFP(s.fpToSeries.length()) { + return + } + begin := time.Now() + fpIter = s.fpToSeries.fpIter() + for fp := range fpIter { + select { + case memoryFingerprints <- fp: + case <-s.loopStopping: + return + } + s.waitForNextFP(s.fpToSeries.length()) + } + glog.Infof("Completed maintenance sweep through in-memory fingerprints in %v.", time.Since(begin)) + } + }() + + return memoryFingerprints +} + +// cycleThroughArchivedFingerprints returns a channel that emits fingerprints +// for archived series in a throttled fashion. It continues to cycle through all +// archived fingerprints until s.loopStopping is closed. +func (s *memorySeriesStorage) cycleThroughArchivedFingerprints() chan clientmodel.Fingerprint { + archivedFingerprints := make(chan clientmodel.Fingerprint) + go func() { + defer close(archivedFingerprints) + + for { + archivedFPs, err := s.persistence.getFingerprintsModifiedBefore( + clientmodel.TimestampFromTime(time.Now()).Add(-1 * s.purgeAfter), + ) + if err != nil { + glog.Error("Failed to lookup archived fingerprint ranges: ", err) + s.waitForNextFP(0) + continue + } + // Initial wait, also important if there are no FPs yet. + if !s.waitForNextFP(len(archivedFPs)) { + return + } + begin := time.Now() + for _, fp := range archivedFPs { + select { + case archivedFingerprints <- fp: + case <-s.loopStopping: + return + } + s.waitForNextFP(len(archivedFPs)) + } + glog.Infof("Completed maintenance sweep through archived fingerprints in %v.", time.Since(begin)) + } + }() + return archivedFingerprints +} + +func (s *memorySeriesStorage) loop() { + checkpointTicker := time.NewTicker(s.checkpointInterval) + + defer func() { + checkpointTicker.Stop() + glog.Info("Maintenance loop stopped.") + close(s.loopStopped) + }() + + memoryFingerprints := s.cycleThroughMemoryFingerprints() + archivedFingerprints := s.cycleThroughArchivedFingerprints() + +loop: + for { + select { + case <-s.loopStopping: + break loop + case <-checkpointTicker.C: + s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker) + case fp := <-memoryFingerprints: + s.purgeSeries(fp, clientmodel.TimestampFromTime(time.Now()).Add(-1*s.purgeAfter)) + s.maintainSeries(fp) + s.seriesOps.WithLabelValues(memoryMaintenance).Inc() + case fp := <-archivedFingerprints: + s.purgeSeries(fp, clientmodel.TimestampFromTime(time.Now()).Add(-1*s.purgeAfter)) + s.seriesOps.WithLabelValues(archiveMaintenance).Inc() + } + } + // Wait until both channels are closed. + for _ = range memoryFingerprints { + } + for _ = range archivedFingerprints { + } +} + +// maintainSeries closes the head chunk if not touched in a while. It archives a +// series if all chunks are evicted. It evicts chunkDescs if there are too many. +func (s *memorySeriesStorage) maintainSeries(fp clientmodel.Fingerprint) { + var headChunkToPersist *chunkDesc + s.fpLocker.Lock(fp) + defer func() { + s.fpLocker.Unlock(fp) + // Queue outside of lock! + if headChunkToPersist != nil { + s.persistQueue <- persistRequest{fp, headChunkToPersist} + } + }() + + series, ok := s.fpToSeries.get(fp) + if !ok { + return + } + iOldestNotEvicted := -1 + for i, cd := range series.chunkDescs { + if !cd.isEvicted() { + iOldestNotEvicted = i + break + } + } + + // Archive if all chunks are evicted. + if iOldestNotEvicted == -1 { + s.fpToSeries.del(fp) + s.numSeries.Dec() + if err := s.persistence.archiveMetric( + fp, series.metric, series.firstTime(), series.lastTime(), + ); err != nil { + glog.Errorf("Error archiving metric %v: %v", series.metric, err) + } else { + s.seriesOps.WithLabelValues(archive).Inc() + } + return + } + // If we are here, the series is not archived, so check for chunkDesc + // eviction next and then if the head chunk needs to be persisted. + series.evictChunkDescs(iOldestNotEvicted) + if !series.headChunkPersisted && time.Now().Sub(series.head().firstTime().Time()) > headChunkTimeout { + series.headChunkPersisted = true + // Since we cannot modify the head chunk from now on, we + // don't need to bother with cloning anymore. + series.headChunkUsedByIterator = false + headChunkToPersist = series.head() + } +} + +// purgeSeries purges chunks older than beforeTime from a series. If the series +// contains no chunks after the purge, it is dropped entirely. +func (s *memorySeriesStorage) purgeSeries(fp clientmodel.Fingerprint, beforeTime clientmodel.Timestamp) { + s.fpLocker.Lock(fp) + defer s.fpLocker.Unlock(fp) + + if series, ok := s.fpToSeries.get(fp); ok { + // Deal with series in memory. + if !series.firstTime().Before(beforeTime) { + // Oldest sample not old enough. + return + } + newFirstTime, numDropped, allDropped, err := s.persistence.dropChunks(fp, beforeTime) + if err != nil { + glog.Error("Error purging persisted chunks: ", err) + } + numPurged, allPurged := series.purgeOlderThan(beforeTime) + if allPurged && allDropped { + s.fpToSeries.del(fp) + s.numSeries.Dec() + s.seriesOps.WithLabelValues(memoryPurge).Inc() + s.persistence.unindexMetric(fp, series.metric) + } else if series.chunkDescsOffset != -1 { + series.savedFirstTime = newFirstTime + series.chunkDescsOffset += numPurged - numDropped + if series.chunkDescsOffset < 0 { + panic("dropped more chunks from persistence than from memory") + } + } + return + } + // Deal with archived series. + has, firstTime, lastTime, err := s.persistence.hasArchivedMetric(fp) + if err != nil { + glog.Error("Error looking up archived time range: ", err) + return + } + if !has || !firstTime.Before(beforeTime) { + // Oldest sample not old enough, or metric purged or unarchived in the meantime. + return + } + + newFirstTime, _, allDropped, err := s.persistence.dropChunks(fp, beforeTime) + if err != nil { + glog.Error("Error purging persisted chunks: ", err) + } + if allDropped { + if err := s.persistence.dropArchivedMetric(fp); err != nil { + glog.Errorf("Error dropping archived metric for fingerprint %v: %v", fp, err) + return + } + s.seriesOps.WithLabelValues(archivePurge).Inc() + return + } + s.persistence.updateArchivedTimeRange(fp, newFirstTime, lastTime) +} + +// To expose persistQueueCap as metric: +var ( + persistQueueCapDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, subsystem, "persist_queue_capacity"), + "The total capacity of the persist queue.", + nil, nil, + ) + persistQueueCapGauge = prometheus.MustNewConstMetric( + persistQueueCapDesc, prometheus.GaugeValue, persistQueueCap, + ) +) + +// Describe implements prometheus.Collector. +func (s *memorySeriesStorage) Describe(ch chan<- *prometheus.Desc) { + s.persistence.Describe(ch) + + ch <- s.persistLatency.Desc() + s.persistErrors.Describe(ch) + ch <- s.persistQueueLength.Desc() + ch <- s.numSeries.Desc() + s.seriesOps.Describe(ch) + ch <- s.ingestedSamplesCount.Desc() + ch <- s.invalidPreloadRequestsCount.Desc() + + ch <- persistQueueCapDesc + + ch <- numMemChunksDesc + ch <- numMemChunkDescsDesc +} + +// Collect implements prometheus.Collector. +func (s *memorySeriesStorage) Collect(ch chan<- prometheus.Metric) { + s.persistence.Collect(ch) + + ch <- s.persistLatency + s.persistErrors.Collect(ch) + ch <- s.persistQueueLength + ch <- s.numSeries + s.seriesOps.Collect(ch) + ch <- s.ingestedSamplesCount + ch <- s.invalidPreloadRequestsCount + + ch <- persistQueueCapGauge + + count := atomic.LoadInt64(&numMemChunks) + ch <- prometheus.MustNewConstMetric(numMemChunksDesc, prometheus.GaugeValue, float64(count)) + count = atomic.LoadInt64(&numMemChunkDescs) + ch <- prometheus.MustNewConstMetric(numMemChunkDescsDesc, prometheus.GaugeValue, float64(count)) +} diff --git a/storage/local/storage_test.go b/storage/local/storage_test.go new file mode 100644 index 0000000000..182a1f5e0d --- /dev/null +++ b/storage/local/storage_test.go @@ -0,0 +1,658 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "fmt" + "math/rand" + "testing" + "testing/quick" + "time" + + "github.com/golang/glog" + clientmodel "github.com/prometheus/client_golang/model" + "github.com/prometheus/prometheus/storage/metric" + "github.com/prometheus/prometheus/utility/test" +) + +func TestGetFingerprintsForLabelMatchers(t *testing.T) { + +} + +// TestLoop is just a smoke test for the loop method, if we can switch it on and +// off without disaster. +func TestLoop(t *testing.T) { + samples := make(clientmodel.Samples, 1000) + for i := range samples { + samples[i] = &clientmodel.Sample{ + Timestamp: clientmodel.Timestamp(2 * i), + Value: clientmodel.SampleValue(float64(i) * 0.2), + } + } + directory := test.NewTemporaryDirectory("test_storage", t) + defer directory.Close() + o := &MemorySeriesStorageOptions{ + MemoryChunks: 50, + PersistenceRetentionPeriod: 24 * 7 * time.Hour, + PersistenceStoragePath: directory.Path(), + CheckpointInterval: 250 * time.Millisecond, + } + storage, err := NewMemorySeriesStorage(o) + if err != nil { + t.Fatalf("Error creating storage: %s", err) + } + storage.Start() + storage.AppendSamples(samples) + time.Sleep(time.Second) + storage.Stop() +} + +func TestChunk(t *testing.T) { + samples := make(clientmodel.Samples, 500000) + for i := range samples { + samples[i] = &clientmodel.Sample{ + Timestamp: clientmodel.Timestamp(i), + Value: clientmodel.SampleValue(float64(i) * 0.2), + } + } + s, closer := NewTestStorage(t) + defer closer.Close() + + s.AppendSamples(samples) + + for m := range s.(*memorySeriesStorage).fpToSeries.iter() { + s.(*memorySeriesStorage).fpLocker.Lock(m.fp) + + var values metric.Values + for _, cd := range m.series.chunkDescs { + if cd.isEvicted() { + continue + } + for sample := range cd.chunk.values() { + values = append(values, *sample) + } + } + + for i, v := range values { + if samples[i].Timestamp != v.Timestamp { + t.Errorf("%d. Got %v; want %v", i, v.Timestamp, samples[i].Timestamp) + } + if samples[i].Value != v.Value { + t.Errorf("%d. Got %v; want %v", i, v.Value, samples[i].Value) + } + } + s.(*memorySeriesStorage).fpLocker.Unlock(m.fp) + } + glog.Info("test done, closing") +} + +func TestGetValueAtTime(t *testing.T) { + samples := make(clientmodel.Samples, 1000) + for i := range samples { + samples[i] = &clientmodel.Sample{ + Timestamp: clientmodel.Timestamp(2 * i), + Value: clientmodel.SampleValue(float64(i) * 0.2), + } + } + s, closer := NewTestStorage(t) + defer closer.Close() + + s.AppendSamples(samples) + + fp := clientmodel.Metric{}.Fingerprint() + + it := s.NewIterator(fp) + + // #1 Exactly on a sample. + for i, expected := range samples { + actual := it.GetValueAtTime(expected.Timestamp) + + if len(actual) != 1 { + t.Fatalf("1.%d. Expected exactly one result, got %d.", i, len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("1.%d. Got %v; want %v", i, actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("1.%d. Got %v; want %v", i, actual[0].Value, expected.Value) + } + } + + // #2 Between samples. + for i, expected1 := range samples { + if i == len(samples)-1 { + continue + } + expected2 := samples[i+1] + actual := it.GetValueAtTime(expected1.Timestamp + 1) + + if len(actual) != 2 { + t.Fatalf("2.%d. Expected exactly 2 results, got %d.", i, len(actual)) + } + if expected1.Timestamp != actual[0].Timestamp { + t.Errorf("2.%d. Got %v; want %v", i, actual[0].Timestamp, expected1.Timestamp) + } + if expected1.Value != actual[0].Value { + t.Errorf("2.%d. Got %v; want %v", i, actual[0].Value, expected1.Value) + } + if expected2.Timestamp != actual[1].Timestamp { + t.Errorf("2.%d. Got %v; want %v", i, actual[1].Timestamp, expected1.Timestamp) + } + if expected2.Value != actual[1].Value { + t.Errorf("2.%d. Got %v; want %v", i, actual[1].Value, expected1.Value) + } + } + + // #3 Corner cases: Just before the first sample, just after the last. + expected := samples[0] + actual := it.GetValueAtTime(expected.Timestamp - 1) + if len(actual) != 1 { + t.Fatalf("3.1. Expected exactly one result, got %d.", len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("3.1. Got %v; want %v", actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("3.1. Got %v; want %v", actual[0].Value, expected.Value) + } + expected = samples[len(samples)-1] + actual = it.GetValueAtTime(expected.Timestamp + 1) + if len(actual) != 1 { + t.Fatalf("3.2. Expected exactly one result, got %d.", len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("3.2. Got %v; want %v", actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("3.2. Got %v; want %v", actual[0].Value, expected.Value) + } +} + +func TestGetRangeValues(t *testing.T) { + samples := make(clientmodel.Samples, 1000) + for i := range samples { + samples[i] = &clientmodel.Sample{ + Timestamp: clientmodel.Timestamp(2 * i), + Value: clientmodel.SampleValue(float64(i) * 0.2), + } + } + s, closer := NewTestStorage(t) + defer closer.Close() + + s.AppendSamples(samples) + + fp := clientmodel.Metric{}.Fingerprint() + + it := s.NewIterator(fp) + + // #1 Zero length interval at sample. + for i, expected := range samples { + actual := it.GetRangeValues(metric.Interval{ + OldestInclusive: expected.Timestamp, + NewestInclusive: expected.Timestamp, + }) + + if len(actual) != 1 { + t.Fatalf("1.%d. Expected exactly one result, got %d.", i, len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("1.%d. Got %v; want %v.", i, actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("1.%d. Got %v; want %v.", i, actual[0].Value, expected.Value) + } + } + + // #2 Zero length interval off sample. + for i, expected := range samples { + actual := it.GetRangeValues(metric.Interval{ + OldestInclusive: expected.Timestamp + 1, + NewestInclusive: expected.Timestamp + 1, + }) + + if len(actual) != 0 { + t.Fatalf("2.%d. Expected no result, got %d.", i, len(actual)) + } + } + + // #3 2sec interval around sample. + for i, expected := range samples { + actual := it.GetRangeValues(metric.Interval{ + OldestInclusive: expected.Timestamp - 1, + NewestInclusive: expected.Timestamp + 1, + }) + + if len(actual) != 1 { + t.Fatalf("3.%d. Expected exactly one result, got %d.", i, len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("3.%d. Got %v; want %v.", i, actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("3.%d. Got %v; want %v.", i, actual[0].Value, expected.Value) + } + } + + // #4 2sec interval sample to sample. + for i, expected1 := range samples { + if i == len(samples)-1 { + continue + } + expected2 := samples[i+1] + actual := it.GetRangeValues(metric.Interval{ + OldestInclusive: expected1.Timestamp, + NewestInclusive: expected1.Timestamp + 2, + }) + + if len(actual) != 2 { + t.Fatalf("4.%d. Expected exactly 2 results, got %d.", i, len(actual)) + } + if expected1.Timestamp != actual[0].Timestamp { + t.Errorf("4.%d. Got %v for 1st result; want %v.", i, actual[0].Timestamp, expected1.Timestamp) + } + if expected1.Value != actual[0].Value { + t.Errorf("4.%d. Got %v for 1st result; want %v.", i, actual[0].Value, expected1.Value) + } + if expected2.Timestamp != actual[1].Timestamp { + t.Errorf("4.%d. Got %v for 2nd result; want %v.", i, actual[1].Timestamp, expected2.Timestamp) + } + if expected2.Value != actual[1].Value { + t.Errorf("4.%d. Got %v for 2nd result; want %v.", i, actual[1].Value, expected2.Value) + } + } + + // #5 corner cases: Interval ends at first sample, interval starts + // at last sample, interval entirely before/after samples. + expected := samples[0] + actual := it.GetRangeValues(metric.Interval{ + OldestInclusive: expected.Timestamp - 2, + NewestInclusive: expected.Timestamp, + }) + if len(actual) != 1 { + t.Fatalf("5.1. Expected exactly one result, got %d.", len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("5.1. Got %v; want %v.", actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("5.1. Got %v; want %v.", actual[0].Value, expected.Value) + } + expected = samples[len(samples)-1] + actual = it.GetRangeValues(metric.Interval{ + OldestInclusive: expected.Timestamp, + NewestInclusive: expected.Timestamp + 2, + }) + if len(actual) != 1 { + t.Fatalf("5.2. Expected exactly one result, got %d.", len(actual)) + } + if expected.Timestamp != actual[0].Timestamp { + t.Errorf("5.2. Got %v; want %v.", actual[0].Timestamp, expected.Timestamp) + } + if expected.Value != actual[0].Value { + t.Errorf("5.2. Got %v; want %v.", actual[0].Value, expected.Value) + } + firstSample := samples[0] + actual = it.GetRangeValues(metric.Interval{ + OldestInclusive: firstSample.Timestamp - 4, + NewestInclusive: firstSample.Timestamp - 2, + }) + if len(actual) != 0 { + t.Fatalf("5.3. Expected no results, got %d.", len(actual)) + } + lastSample := samples[len(samples)-1] + actual = it.GetRangeValues(metric.Interval{ + OldestInclusive: lastSample.Timestamp + 2, + NewestInclusive: lastSample.Timestamp + 4, + }) + if len(actual) != 0 { + t.Fatalf("5.3. Expected no results, got %d.", len(actual)) + } +} + +func TestEvictAndPurgeSeries(t *testing.T) { + samples := make(clientmodel.Samples, 1000) + for i := range samples { + samples[i] = &clientmodel.Sample{ + Timestamp: clientmodel.Timestamp(2 * i), + Value: clientmodel.SampleValue(float64(i) * 0.2), + } + } + s, closer := NewTestStorage(t) + defer closer.Close() + + ms := s.(*memorySeriesStorage) // Going to test the internal purgeSeries method. + + s.AppendSamples(samples) + + fp := clientmodel.Metric{}.Fingerprint() + + // Purge ~half of the chunks. + ms.purgeSeries(fp, 1000) + it := s.NewIterator(fp) + actual := it.GetBoundaryValues(metric.Interval{ + OldestInclusive: 0, + NewestInclusive: 10000, + }) + if len(actual) != 2 { + t.Fatal("expected two results after purging half of series") + } + if actual[0].Timestamp < 800 || actual[0].Timestamp > 1000 { + t.Errorf("1st timestamp out of expected range: %v", actual[0].Timestamp) + } + want := clientmodel.Timestamp(1998) + if actual[1].Timestamp != want { + t.Errorf("2nd timestamp: want %v, got %v", want, actual[1].Timestamp) + } + + // Purge everything. + ms.purgeSeries(fp, 10000) + it = s.NewIterator(fp) + actual = it.GetBoundaryValues(metric.Interval{ + OldestInclusive: 0, + NewestInclusive: 10000, + }) + if len(actual) != 0 { + t.Fatal("expected zero results after purging the whole series") + } + + // Recreate series. + s.AppendSamples(samples) + + series, ok := ms.fpToSeries.get(fp) + if !ok { + t.Fatal("could not find series") + } + + // Persist head chunk so we can safely archive. + series.headChunkPersisted = true + ms.persistQueue <- persistRequest{fp, series.head()} + time.Sleep(time.Second) // Give time for persisting to happen. + + // Archive metrics. + ms.fpToSeries.del(fp) + if err := ms.persistence.archiveMetric( + fp, series.metric, series.firstTime(), series.lastTime(), + ); err != nil { + t.Fatal(err) + } + + archived, _, _, err := ms.persistence.hasArchivedMetric(fp) + if err != nil { + t.Fatal(err) + } + if !archived { + t.Fatal("not archived") + } + + // Purge ~half of the chunks of an archived series. + ms.purgeSeries(fp, 1000) + archived, _, _, err = ms.persistence.hasArchivedMetric(fp) + if err != nil { + t.Fatal(err) + } + if !archived { + t.Fatal("archived series dropped although only half of the chunks purged") + } + + // Purge everything. + ms.purgeSeries(fp, 10000) + archived, _, _, err = ms.persistence.hasArchivedMetric(fp) + if err != nil { + t.Fatal(err) + } + if archived { + t.Fatal("archived series not dropped") + } +} + +func BenchmarkAppend(b *testing.B) { + samples := make(clientmodel.Samples, b.N) + for i := range samples { + samples[i] = &clientmodel.Sample{ + Metric: clientmodel.Metric{ + clientmodel.MetricNameLabel: clientmodel.LabelValue(fmt.Sprintf("test_metric_%d", i%10)), + "label1": clientmodel.LabelValue(fmt.Sprintf("test_metric_%d", i%10)), + "label2": clientmodel.LabelValue(fmt.Sprintf("test_metric_%d", i%10)), + }, + Timestamp: clientmodel.Timestamp(i), + Value: clientmodel.SampleValue(i), + } + } + b.ResetTimer() + s, closer := NewTestStorage(b) + defer closer.Close() + + s.AppendSamples(samples) +} + +// Append a large number of random samples and then check if we can get them out +// of the storage alright. +func TestFuzz(t *testing.T) { + if testing.Short() { + t.Skip("Skipping test in short mode.") + } + + check := func(seed int64) bool { + rand.Seed(seed) + s, c := NewTestStorage(t) + defer c.Close() + + samples := createRandomSamples() + s.AppendSamples(samples) + + return verifyStorage(t, s, samples, 24*7*time.Hour) + } + + if err := quick.Check(check, nil); err != nil { + t.Fatal(err) + } +} + +// BenchmarkFuzz is the benchmark version of TestFuzz. However, it will run +// several append and verify operations in parallel, if GOMAXPROC is set +// accordingly. Also, the storage options are set such that evictions, +// checkpoints, and purging will happen concurrently, too. This benchmark will +// have a very long runtime (up to minutes). You can use it as an actual +// benchmark. Run it like this: +// +// go test -cpu 1,2,4,8 -short -bench BenchmarkFuzz -benchmem +// +// You can also use it as a test for races. In that case, run it like this (will +// make things even slower): +// +// go test -race -cpu 8 -short -bench BenchmarkFuzz +func BenchmarkFuzz(b *testing.B) { + b.StopTimer() + rand.Seed(42) + directory := test.NewTemporaryDirectory("test_storage", b) + defer directory.Close() + o := &MemorySeriesStorageOptions{ + MemoryChunks: 100, + PersistenceRetentionPeriod: time.Hour, + PersistenceStoragePath: directory.Path(), + CheckpointInterval: 3 * time.Second, + } + s, err := NewMemorySeriesStorage(o) + if err != nil { + b.Fatalf("Error creating storage: %s", err) + } + s.Start() + defer s.Stop() + b.StartTimer() + + b.RunParallel(func(pb *testing.PB) { + var allSamples clientmodel.Samples + for pb.Next() { + newSamples := createRandomSamples() + allSamples = append(allSamples, newSamples[:len(newSamples)/2]...) + s.AppendSamples(newSamples[:len(newSamples)/2]) + verifyStorage(b, s, allSamples, o.PersistenceRetentionPeriod) + allSamples = append(allSamples, newSamples[len(newSamples)/2:]...) + s.AppendSamples(newSamples[len(newSamples)/2:]) + verifyStorage(b, s, allSamples, o.PersistenceRetentionPeriod) + } + }) +} + +func createRandomSamples() clientmodel.Samples { + type valueCreator func() clientmodel.SampleValue + type deltaApplier func(clientmodel.SampleValue) clientmodel.SampleValue + + var ( + maxMetrics = 5 + maxCycles = 500 + maxStreakLength = 500 + maxTimeDelta = 1000 + maxTimeDeltaFactor = 10 + timestamp = clientmodel.Now() - clientmodel.Timestamp(maxTimeDelta*maxTimeDeltaFactor*maxCycles*maxStreakLength/16) // So that some timestamps are in the future. + generators = []struct { + createValue valueCreator + applyDelta []deltaApplier + }{ + { // "Boolean". + createValue: func() clientmodel.SampleValue { + return clientmodel.SampleValue(rand.Intn(2)) + }, + applyDelta: []deltaApplier{ + func(_ clientmodel.SampleValue) clientmodel.SampleValue { + return clientmodel.SampleValue(rand.Intn(2)) + }, + }, + }, + { // Integer with int deltas of various byte length. + createValue: func() clientmodel.SampleValue { + return clientmodel.SampleValue(rand.Int63() - 1<<62) + }, + applyDelta: []deltaApplier{ + func(v clientmodel.SampleValue) clientmodel.SampleValue { + return clientmodel.SampleValue(rand.Intn(1<<8) - 1<<7 + int(v)) + }, + func(v clientmodel.SampleValue) clientmodel.SampleValue { + return clientmodel.SampleValue(rand.Intn(1<<16) - 1<<15 + int(v)) + }, + func(v clientmodel.SampleValue) clientmodel.SampleValue { + return clientmodel.SampleValue(rand.Intn(1<<32) - 1<<31 + int(v)) + }, + }, + }, + { // Float with float32 and float64 deltas. + createValue: func() clientmodel.SampleValue { + return clientmodel.SampleValue(rand.NormFloat64()) + }, + applyDelta: []deltaApplier{ + func(v clientmodel.SampleValue) clientmodel.SampleValue { + return v + clientmodel.SampleValue(float32(rand.NormFloat64())) + }, + func(v clientmodel.SampleValue) clientmodel.SampleValue { + return v + clientmodel.SampleValue(rand.NormFloat64()) + }, + }, + }, + } + ) + + result := clientmodel.Samples{} + + metrics := []clientmodel.Metric{} + for n := rand.Intn(maxMetrics); n >= 0; n-- { + metrics = append(metrics, clientmodel.Metric{ + clientmodel.LabelName(fmt.Sprintf("labelname_%d", n+1)): clientmodel.LabelValue(fmt.Sprintf("labelvalue_%d", rand.Int())), + }) + } + + for n := rand.Intn(maxCycles); n >= 0; n-- { + // Pick a metric for this cycle. + metric := metrics[rand.Intn(len(metrics))] + timeDelta := rand.Intn(maxTimeDelta) + 1 + generator := generators[rand.Intn(len(generators))] + createValue := generator.createValue + applyDelta := generator.applyDelta[rand.Intn(len(generator.applyDelta))] + incTimestamp := func() { timestamp += clientmodel.Timestamp(timeDelta * (rand.Intn(maxTimeDeltaFactor) + 1)) } + switch rand.Intn(4) { + case 0: // A single sample. + result = append(result, &clientmodel.Sample{ + Metric: metric, + Value: createValue(), + Timestamp: timestamp, + }) + incTimestamp() + case 1: // A streak of random sample values. + for n := rand.Intn(maxStreakLength); n >= 0; n-- { + result = append(result, &clientmodel.Sample{ + Metric: metric, + Value: createValue(), + Timestamp: timestamp, + }) + incTimestamp() + } + case 2: // A streak of sample values with incremental changes. + value := createValue() + for n := rand.Intn(maxStreakLength); n >= 0; n-- { + result = append(result, &clientmodel.Sample{ + Metric: metric, + Value: value, + Timestamp: timestamp, + }) + incTimestamp() + value = applyDelta(value) + } + case 3: // A streak of constant sample values. + value := createValue() + for n := rand.Intn(maxStreakLength); n >= 0; n-- { + result = append(result, &clientmodel.Sample{ + Metric: metric, + Value: value, + Timestamp: timestamp, + }) + incTimestamp() + } + } + } + + return result +} + +func verifyStorage(t testing.TB, s Storage, samples clientmodel.Samples, maxAge time.Duration) bool { + result := true + for _, i := range rand.Perm(len(samples)) { + sample := samples[i] + if sample.Timestamp.Before(clientmodel.TimestampFromTime(time.Now().Add(-maxAge))) { + continue + // TODO: Once we have a guaranteed cutoff at the + // retention period, we can verify here that no results + // are returned. + } + fp := sample.Metric.Fingerprint() + p := s.NewPreloader() + p.PreloadRange(fp, sample.Timestamp, sample.Timestamp, time.Hour) + found := s.NewIterator(fp).GetValueAtTime(sample.Timestamp) + if len(found) != 1 { + t.Errorf("Sample %#v: Expected exactly one value, found %d.", sample, len(found)) + result = false + p.Close() + continue + } + want := float64(sample.Value) + got := float64(found[0].Value) + if want != got || sample.Timestamp != found[0].Timestamp { + t.Errorf( + "Value (or timestamp) mismatch, want %f (at time %v), got %f (at time %v).", + want, sample.Timestamp, got, found[0].Timestamp, + ) + result = false + } + p.Close() + } + return result +} diff --git a/storage/local/test_helpers.go b/storage/local/test_helpers.go new file mode 100644 index 0000000000..7e4538387b --- /dev/null +++ b/storage/local/test_helpers.go @@ -0,0 +1,58 @@ +// Copyright 2014 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package local + +import ( + "testing" + "time" + + "github.com/prometheus/prometheus/utility/test" +) + +type testStorageCloser struct { + storage Storage + directory test.Closer +} + +func (t *testStorageCloser) Close() { + t.storage.Stop() + t.directory.Close() +} + +// NewTestStorage creates a storage instance backed by files in a temporary +// directory. The returned storage is already in serving state. Upon closing the +// returned test.Closer, the temporary directory is cleaned up. +func NewTestStorage(t testing.TB) (Storage, test.Closer) { + directory := test.NewTemporaryDirectory("test_storage", t) + o := &MemorySeriesStorageOptions{ + MemoryChunks: 1000000, + PersistenceRetentionPeriod: 24 * 7 * time.Hour, + PersistenceStoragePath: directory.Path(), + CheckpointInterval: time.Hour, + } + storage, err := NewMemorySeriesStorage(o) + if err != nil { + directory.Close() + t.Fatalf("Error creating storage: %s", err) + } + + storage.Start() + + closer := &testStorageCloser{ + storage: storage, + directory: directory, + } + + return storage, closer +} diff --git a/storage/metric/interface.go b/storage/metric/interface.go deleted file mode 100644 index 1bdcece9f8..0000000000 --- a/storage/metric/interface.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package metric - -import ( - "time" - - clientmodel "github.com/prometheus/client_golang/model" - - "github.com/prometheus/prometheus/stats" -) - -// Persistence is a system for storing metric samples in a persistence -// layer. -type Persistence interface { - // A storage system may rely on external resources and thusly should be - // closed when finished. - Close() - - // Record a group of new samples in the storage layer. Multiple samples for - // the same fingerprint need to be submitted in chronological order, from - // oldest to newest (both in the same call to AppendSamples and across - // multiple calls). - AppendSamples(clientmodel.Samples) error - - // Get all of the metric fingerprints that are associated with the - // provided label matchers. - GetFingerprintsForLabelMatchers(LabelMatchers) (clientmodel.Fingerprints, error) - - // Get all of the label values that are associated with a given label name. - GetLabelValuesForLabelName(clientmodel.LabelName) (clientmodel.LabelValues, error) - - // Get the metric associated with the provided fingerprint. - GetMetricForFingerprint(*clientmodel.Fingerprint) (clientmodel.Metric, error) - - // Get all label values that are associated with a given label name. - GetAllValuesForLabel(clientmodel.LabelName) (clientmodel.LabelValues, error) -} - -// PreloadingPersistence is a Persistence which supports building -// preloaded views. -type PreloadingPersistence interface { - Persistence - // NewViewRequestBuilder furnishes a ViewRequestBuilder for remarking what - // types of queries to perform. - NewViewRequestBuilder() ViewRequestBuilder -} - -// View provides a view of the values in the datastore subject to the request -// of a preloading operation. -type View interface { - // Get the two values that are immediately adjacent to a given time. - GetValueAtTime(*clientmodel.Fingerprint, clientmodel.Timestamp) Values - // Get the boundary values of an interval: the first value older than - // the interval start, and the first value younger than the interval - // end. - GetBoundaryValues(*clientmodel.Fingerprint, Interval) Values - // Get all values contained within a provided interval. - GetRangeValues(*clientmodel.Fingerprint, Interval) Values -} - -// ViewablePersistence is a Persistence that is able to present the -// samples it has stored as a View. -type ViewablePersistence interface { - Persistence - View -} - -// ViewRequestBuilder represents the summation of all datastore queries that -// shall be performed to extract values. Call the Get... methods to record the -// queries. Once done, use HasOp and PopOp to retrieve the resulting -// operations. The operations are sorted by their fingerprint (and, for equal -// fingerprints, by the StartsAt timestamp of their operation). -type ViewRequestBuilder interface { - // GetMetricAtTime records a query to get, for the given Fingerprint, - // either the value at that time if there is a match or the one or two - // values adjacent thereto. - GetMetricAtTime(fingerprint *clientmodel.Fingerprint, time clientmodel.Timestamp) - // GetMetricAtInterval records a query to get, for the given - // Fingerprint, either the value at that interval from From through - // Through if there is a match or the one or two values adjacent for - // each point. - GetMetricAtInterval(fingerprint *clientmodel.Fingerprint, from, through clientmodel.Timestamp, interval time.Duration) - // GetMetricRange records a query to get, for the given Fingerprint, the - // values that occur inclusively from From through Through. - GetMetricRange(fingerprint *clientmodel.Fingerprint, from, through clientmodel.Timestamp) - // GetMetricRangeAtInterval records a query to get value ranges at - // intervals for the given Fingerprint: - // - // |----| |----| |----| |----| - // ^ ^ ^ ^ ^ ^ - // | \------------/ \----/ | - // from interval rangeDuration through - GetMetricRangeAtInterval(fp *clientmodel.Fingerprint, from, through clientmodel.Timestamp, interval, rangeDuration time.Duration) - // Execute materializes a View, subject to a timeout. - Execute(deadline time.Duration, queryStats *stats.TimerGroup) (View, error) - // PopOp emits the next operation in the queue (sorted by - // fingerprint). If called while HasOps returns false, the - // behavior is undefined. - PopOp() Op - // HasOp returns true if there is at least one more operation in the - // queue. - HasOp() bool -} - -// Op encapsulates a primitive query operation. -type Op interface { - // Fingerprint returns the fingerprint of the metric this operation - // operates on. - Fingerprint() *clientmodel.Fingerprint - // ExtractSamples extracts samples from a stream of values and advances - // the operation time. - ExtractSamples(Values) Values - // Consumed returns whether the operator has consumed all data it needs. - Consumed() bool - // CurrentTime gets the current operation time. In a newly created op, - // this is the starting time of the operation. During ongoing execution - // of the op, the current time is advanced accordingly. Once no - // subsequent work associated with the operation remains, nil is - // returned. - CurrentTime() clientmodel.Timestamp -} - -// CurationState contains high-level curation state information for the -// heads-up-display. -type CurationState struct { - Active bool - Name string - Limit time.Duration - Fingerprint *clientmodel.Fingerprint -} diff --git a/storage/metric/sample.go b/storage/metric/sample.go index 4061cb1d0c..2291fd743f 100644 --- a/storage/metric/sample.go +++ b/storage/metric/sample.go @@ -14,16 +14,14 @@ package metric import ( - "bytes" "fmt" - "sort" clientmodel "github.com/prometheus/client_golang/model" ) // MarshalJSON implements json.Marshaler. func (s SamplePair) MarshalJSON() ([]byte, error) { - return []byte(fmt.Sprintf("{\"Value\": \"%f\", \"Timestamp\": %d}", s.Value, s.Timestamp)), nil + return []byte(fmt.Sprintf("{\"Value\": \"%f\", \"Timestamp\": %s}", s.Value, s.Timestamp.String())), nil } // SamplePair pairs a SampleValue with a Timestamp. @@ -46,96 +44,9 @@ func (s *SamplePair) String() string { return fmt.Sprintf("SamplePair at %s of %s", s.Timestamp, s.Value) } -// Values is a sortable slice of SamplePairs (as in: it implements -// sort.Interface). Sorting happens by Timestamp. +// Values is a slice of SamplePairs. type Values []SamplePair -// Len implements sort.Interface. -func (v Values) Len() int { - return len(v) -} - -// Less implements sort.Interface. -func (v Values) Less(i, j int) bool { - return v[i].Timestamp.Before(v[j].Timestamp) -} - -// Swap implements sort.Interface. -func (v Values) Swap(i, j int) { - v[i], v[j] = v[j], v[i] -} - -// Equal returns true if these Values are of the same length as o, and each -// value is equal to the corresponding value in o (i.e. at the same index). -func (v Values) Equal(o Values) bool { - if len(v) != len(o) { - return false - } - - for i, expected := range v { - if !expected.Equal(&o[i]) { - return false - } - } - - return true -} - -// FirstTimeAfter indicates whether the first sample of a set is after a given -// timestamp. -func (v Values) FirstTimeAfter(t clientmodel.Timestamp) bool { - return v[0].Timestamp.After(t) -} - -// LastTimeBefore indicates whether the last sample of a set is before a given -// timestamp. -func (v Values) LastTimeBefore(t clientmodel.Timestamp) bool { - return v[len(v)-1].Timestamp.Before(t) -} - -// InsideInterval indicates whether a given range of sorted values could contain -// a value for a given time. -func (v Values) InsideInterval(t clientmodel.Timestamp) bool { - switch { - case v.Len() == 0: - return false - case t.Before(v[0].Timestamp): - return false - case !v[v.Len()-1].Timestamp.Before(t): - return false - default: - return true - } -} - -// TruncateBefore returns a subslice of the original such that extraneous -// samples in the collection that occur before the provided time are -// dropped. The original slice is not mutated. -func (v Values) TruncateBefore(t clientmodel.Timestamp) Values { - index := sort.Search(len(v), func(i int) bool { - timestamp := v[i].Timestamp - - return !timestamp.Before(t) - }) - - return v[index:] -} - -func (v Values) String() string { - buffer := bytes.Buffer{} - - fmt.Fprintf(&buffer, "[") - for i, value := range v { - fmt.Fprintf(&buffer, "%d. %s", i, value) - if i != len(v)-1 { - fmt.Fprintf(&buffer, "\n") - } - } - fmt.Fprintf(&buffer, "]") - - return buffer.String() -} - // SampleSet is Values with a Metric attached. type SampleSet struct { Metric clientmodel.Metric diff --git a/storage/metric/tiered/compaction_regression_test.go b/storage/metric/tiered/compaction_regression_test.go deleted file mode 100644 index 1a78f34ed2..0000000000 --- a/storage/metric/tiered/compaction_regression_test.go +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "flag" - "fmt" - "sort" - "testing" - "time" - - "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/storage/metric" - - clientmodel "github.com/prometheus/client_golang/model" -) - -type nopCurationStateUpdater struct{} - -func (n *nopCurationStateUpdater) UpdateCurationState(*metric.CurationState) {} - -func generateTestSamples(endTime clientmodel.Timestamp, numTs int, samplesPerTs int, interval time.Duration) clientmodel.Samples { - samples := make(clientmodel.Samples, 0, numTs*samplesPerTs) - - startTime := endTime.Add(-interval * time.Duration(samplesPerTs-1)) - for ts := 0; ts < numTs; ts++ { - metric := clientmodel.Metric{} - metric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(fmt.Sprintf("metric_%d", ts)) - for i := 0; i < samplesPerTs; i++ { - sample := &clientmodel.Sample{ - Metric: metric, - Value: clientmodel.SampleValue(ts + 1000*i), - Timestamp: startTime.Add(interval * time.Duration(i)), - } - samples = append(samples, sample) - } - } - sort.Sort(samples) - return samples -} - -type compactionChecker struct { - t *testing.T - sampleIdx int - numChunks int - expectedSamples clientmodel.Samples -} - -func (c *compactionChecker) Operate(key, value interface{}) *storage.OperatorError { - c.numChunks++ - sampleKey := key.(*SampleKey) - if sampleKey.FirstTimestamp.After(sampleKey.LastTimestamp) { - c.t.Fatalf("Chunk FirstTimestamp (%v) is after LastTimestamp (%v): %v", sampleKey.FirstTimestamp.Unix(), sampleKey.LastTimestamp.Unix(), sampleKey) - } - fp := &clientmodel.Fingerprint{} - for _, sample := range value.(metric.Values) { - if sample.Timestamp.Before(sampleKey.FirstTimestamp) || sample.Timestamp.After(sampleKey.LastTimestamp) { - c.t.Fatalf("Sample not within chunk boundaries: chunk FirstTimestamp (%v), chunk LastTimestamp (%v) vs. sample Timestamp (%v)", sampleKey.FirstTimestamp.Unix(), sampleKey.LastTimestamp.Unix(), sample.Timestamp) - } - - expected := c.expectedSamples[c.sampleIdx] - - fp.LoadFromMetric(expected.Metric) - if !sampleKey.Fingerprint.Equal(fp) { - c.t.Fatalf("%d. Expected fingerprint %s, got %s", c.sampleIdx, fp, sampleKey.Fingerprint) - } - - sp := &metric.SamplePair{ - Value: expected.Value, - Timestamp: expected.Timestamp, - } - if !sample.Equal(sp) { - c.t.Fatalf("%d. Expected sample %s, got %s", c.sampleIdx, sp, sample) - } - c.sampleIdx++ - } - return nil -} - -func checkStorageSaneAndEquivalent(t *testing.T, name string, ts *TieredStorage, samples clientmodel.Samples, expectedNumChunks int) { - cc := &compactionChecker{ - expectedSamples: samples, - t: t, - } - entire, err := ts.DiskStorage.MetricSamples.ForEach(&MetricSamplesDecoder{}, &AcceptAllFilter{}, cc) - if err != nil { - t.Fatalf("%s: Error checking samples: %s", name, err) - } - if !entire { - t.Fatalf("%s: Didn't scan entire corpus", name) - } - if cc.numChunks != expectedNumChunks { - t.Fatalf("%s: Expected %d chunks, got %d", name, expectedNumChunks, cc.numChunks) - } -} - -type compactionTestScenario struct { - leveldbChunkSize int - numTimeseries int - samplesPerTs int - - ignoreYoungerThan time.Duration - maximumMutationPoolBatch int - minimumGroupSize int - - uncompactedChunks int - compactedChunks int -} - -func (s compactionTestScenario) test(t *testing.T) { - defer flag.Set("leveldbChunkSize", flag.Lookup("leveldbChunkSize").Value.String()) - flag.Set("leveldbChunkSize", fmt.Sprintf("%d", s.leveldbChunkSize)) - - ts, closer := NewTestTieredStorage(t) - defer closer.Close() - - // 1. Store test values. - samples := generateTestSamples(testInstant, s.numTimeseries, s.samplesPerTs, time.Minute) - ts.AppendSamples(samples) - ts.Flush() - - // 2. Check sanity of uncompacted values. - checkStorageSaneAndEquivalent(t, "Before compaction", ts, samples, s.uncompactedChunks) - - // 3. Compact test storage. - processor := NewCompactionProcessor(&CompactionProcessorOptions{ - MaximumMutationPoolBatch: s.maximumMutationPoolBatch, - MinimumGroupSize: s.minimumGroupSize, - }) - defer processor.Close() - - curator := NewCurator(&CuratorOptions{ - Stop: make(chan struct{}), - ViewQueue: ts.ViewQueue, - }) - defer curator.Close() - - err := curator.Run(s.ignoreYoungerThan, testInstant, processor, ts.DiskStorage.CurationRemarks, ts.DiskStorage.MetricSamples, ts.DiskStorage.MetricHighWatermarks, &nopCurationStateUpdater{}) - if err != nil { - t.Fatalf("Failed to run curator: %s", err) - } - - // 4. Check sanity of compacted values. - checkStorageSaneAndEquivalent(t, "After compaction", ts, samples, s.compactedChunks) -} - -func TestCompaction(t *testing.T) { - scenarios := []compactionTestScenario{ - // BEFORE COMPACTION: - // - // Chunk size | Fingerprint | Samples - // 5 | A | 1 .. 5 - // 5 | A | 6 .. 10 - // 5 | A | 11 .. 15 - // 5 | B | 1 .. 5 - // 5 | B | 6 .. 10 - // 5 | B | 11 .. 15 - // 5 | C | 1 .. 5 - // 5 | C | 6 .. 10 - // 5 | C | 11 .. 15 - // - // AFTER COMPACTION: - // - // Chunk size | Fingerprint | Samples - // 10 | A | 1 .. 10 - // 5 | A | 11 .. 15 - // 10 | B | 1 .. 10 - // 5 | B | 11 .. 15 - // 10 | C | 1 .. 10 - // 5 | C | 11 .. 15 - { - leveldbChunkSize: 5, - numTimeseries: 3, - samplesPerTs: 15, - - ignoreYoungerThan: time.Minute, - maximumMutationPoolBatch: 30, - minimumGroupSize: 10, - - uncompactedChunks: 9, - compactedChunks: 6, - }, - // BEFORE COMPACTION: - // - // Chunk size | Fingerprint | Samples - // 5 | A | 1 .. 5 - // 5 | A | 6 .. 10 - // 5 | A | 11 .. 15 - // 5 | B | 1 .. 5 - // 5 | B | 6 .. 10 - // 5 | B | 11 .. 15 - // 5 | C | 1 .. 5 - // 5 | C | 6 .. 10 - // 5 | C | 11 .. 15 - // - // AFTER COMPACTION: - // - // Chunk size | Fingerprint | Samples - // 10 | A | 1 .. 15 - // 10 | B | 1 .. 15 - // 10 | C | 1 .. 15 - { - leveldbChunkSize: 5, - numTimeseries: 3, - samplesPerTs: 15, - - ignoreYoungerThan: time.Minute, - maximumMutationPoolBatch: 30, - minimumGroupSize: 30, - - uncompactedChunks: 9, - compactedChunks: 3, - }, - // BEFORE COMPACTION: - // - // Chunk size | Fingerprint | Samples - // 5 | A | 1 .. 5 - // 5 | A | 6 .. 10 - // 5 | A | 11 .. 15 - // 5 | A | 16 .. 20 - // 5 | B | 1 .. 5 - // 5 | B | 6 .. 10 - // 5 | B | 11 .. 15 - // 5 | B | 16 .. 20 - // 5 | C | 1 .. 5 - // 5 | C | 6 .. 10 - // 5 | C | 11 .. 15 - // 5 | C | 16 .. 20 - // - // AFTER COMPACTION: - // - // Chunk size | Fingerprint | Samples - // 10 | A | 1 .. 15 - // 10 | A | 16 .. 20 - // 10 | B | 1 .. 15 - // 10 | B | 16 .. 20 - // 10 | C | 1 .. 15 - // 10 | C | 16 .. 20 - { - leveldbChunkSize: 5, - numTimeseries: 3, - samplesPerTs: 20, - - ignoreYoungerThan: time.Minute, - maximumMutationPoolBatch: 30, - minimumGroupSize: 10, - - uncompactedChunks: 12, - compactedChunks: 6, - }, - } - - for _, s := range scenarios { - s.test(t) - } -} diff --git a/storage/metric/tiered/curator.go b/storage/metric/tiered/curator.go deleted file mode 100644 index ace1d58eb5..0000000000 --- a/storage/metric/tiered/curator.go +++ /dev/null @@ -1,509 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "bytes" - "errors" - "fmt" - "strings" - "time" - - "code.google.com/p/goprotobuf/proto" - "github.com/golang/glog" - "github.com/prometheus/client_golang/prometheus" - - clientmodel "github.com/prometheus/client_golang/model" - - "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/storage/metric" - "github.com/prometheus/prometheus/storage/raw" - "github.com/prometheus/prometheus/storage/raw/leveldb" - - dto "github.com/prometheus/prometheus/model/generated" -) - -const curationYieldPeriod = 250 * time.Millisecond - -var errIllegalIterator = errors.New("iterator invalid") - -// Constants for instrumentation. -const ( - cutOff = "recency_threshold" - processorName = "processor" -) - -var ( - curationDurations = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "curation_durations_milliseconds", - Help: "Histogram of time spent in curation.", - Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99}, - }, - []string{cutOff, processorName, result}, - ) - curationFilterOperations = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "curation_filter_operations_total", - Help: "The number of curation filter operations completed.", - }, - []string{cutOff, processorName, result}, - ) -) - -func init() { - prometheus.MustRegister(curationDurations) - prometheus.MustRegister(curationFilterOperations) -} - -// CurationStateUpdater receives updates about the curation state. -type CurationStateUpdater interface { - UpdateCurationState(*metric.CurationState) -} - -// CuratorOptions bundles the parameters needed to create a Curator. -type CuratorOptions struct { - Stop chan struct{} - - ViewQueue chan viewJob -} - -// Curator is responsible for effectuating a given curation policy across the -// stored samples on-disk. This is useful to compact sparse sample values into -// single sample entities to reduce keyspace load on the datastore. -type Curator struct { - stop chan struct{} - - viewQueue chan viewJob - - dtoSampleKeys *dtoSampleKeyList - sampleKeys *sampleKeyList -} - -// NewCurator returns an initialized Curator. -func NewCurator(o *CuratorOptions) *Curator { - return &Curator{ - stop: o.Stop, - - viewQueue: o.ViewQueue, - - dtoSampleKeys: newDtoSampleKeyList(10), - sampleKeys: newSampleKeyList(10), - } -} - -// watermarkScanner converts (dto.Fingerprint, dto.MetricHighWatermark) doubles -// into (model.Fingerprint, model.Watermark) doubles. -// -// watermarkScanner determines whether to include or exclude candidate -// values from the curation process by virtue of how old the high watermark is. -// -// watermarkScanner scans over the curator.samples table for metrics whose -// high watermark has been determined to be allowable for curation. This type -// is individually responsible for compaction. -// -// The scanning starts from CurationRemark.LastCompletionTimestamp and goes -// forward until the stop point or end of the series is reached. -type watermarkScanner struct { - // curationState is the data store for curation remarks. - curationState CurationRemarker - // ignoreYoungerThan is passed into the curation remark for the given series. - ignoreYoungerThan time.Duration - // processor is responsible for executing a given stategy on the - // to-be-operated-on series. - processor Processor - // sampleIterator is a snapshotted iterator for the time series. - sampleIterator leveldb.Iterator - // samples - samples raw.Persistence - // stopAt is a cue for when to stop mutating a given series. - stopAt clientmodel.Timestamp - - // stop functions as the global stop channel for all future operations. - stop chan struct{} - // status is the outbound channel for notifying the status page of its state. - status CurationStateUpdater - - firstBlock, lastBlock *SampleKey - - ViewQueue chan viewJob - - dtoSampleKeys *dtoSampleKeyList - sampleKeys *sampleKeyList -} - -// Run facilitates the curation lifecycle. -// -// recencyThreshold represents the most recent time up to which values will be -// curated. -// curationState is the on-disk store where the curation remarks are made for -// how much progress has been made. -func (c *Curator) Run(ignoreYoungerThan time.Duration, instant clientmodel.Timestamp, processor Processor, curationState CurationRemarker, samples *leveldb.LevelDBPersistence, watermarks HighWatermarker, status CurationStateUpdater) (err error) { - defer func(t time.Time) { - duration := float64(time.Since(t) / time.Millisecond) - - labels := prometheus.Labels{ - cutOff: fmt.Sprint(ignoreYoungerThan), - processorName: processor.Name(), - result: success, - } - if err != nil { - labels[result] = failure - } - - curationDurations.With(labels).Observe(duration) - }(time.Now()) - - defer status.UpdateCurationState(&metric.CurationState{Active: false}) - - iterator, err := samples.NewIterator(true) - if err != nil { - return err - } - defer iterator.Close() - - if !iterator.SeekToLast() { - glog.Info("Empty database; skipping curation.") - - return - } - - keyDto, _ := c.dtoSampleKeys.Get() - defer c.dtoSampleKeys.Give(keyDto) - - lastBlock, _ := c.sampleKeys.Get() - defer c.sampleKeys.Give(lastBlock) - - if err := iterator.Key(keyDto); err != nil { - panic(err) - } - - lastBlock.Load(keyDto) - - if !iterator.SeekToFirst() { - glog.Info("Empty database; skipping curation.") - - return - } - - firstBlock, _ := c.sampleKeys.Get() - defer c.sampleKeys.Give(firstBlock) - - if err := iterator.Key(keyDto); err != nil { - panic(err) - } - - firstBlock.Load(keyDto) - - scanner := &watermarkScanner{ - curationState: curationState, - ignoreYoungerThan: ignoreYoungerThan, - processor: processor, - status: status, - stop: c.stop, - stopAt: instant.Add(-1 * ignoreYoungerThan), - - sampleIterator: iterator, - samples: samples, - - firstBlock: firstBlock, - lastBlock: lastBlock, - - ViewQueue: c.viewQueue, - - dtoSampleKeys: c.dtoSampleKeys, - sampleKeys: c.sampleKeys, - } - - // Right now, the ability to stop a curation is limited to the beginning of - // each fingerprint cycle. It is impractical to cease the work once it has - // begun for a given series. - _, err = watermarks.ForEach(scanner, scanner, scanner) - - return -} - -// Close needs to be called to cleanly dispose of a curator. -func (c *Curator) Close() { - c.dtoSampleKeys.Close() - c.sampleKeys.Close() -} - -func (w *watermarkScanner) DecodeKey(in interface{}) (interface{}, error) { - key := &dto.Fingerprint{} - bytes := in.([]byte) - - if err := proto.Unmarshal(bytes, key); err != nil { - return nil, err - } - - fingerprint := &clientmodel.Fingerprint{} - loadFingerprint(fingerprint, key) - - return fingerprint, nil -} - -func (w *watermarkScanner) DecodeValue(in interface{}) (interface{}, error) { - value := &dto.MetricHighWatermark{} - bytes := in.([]byte) - - if err := proto.Unmarshal(bytes, value); err != nil { - return nil, err - } - - watermark := &watermarks{} - watermark.load(value) - - return watermark, nil -} - -func (w *watermarkScanner) shouldStop() bool { - select { - case _, ok := <-w.stop: - if ok { - panic("channel should be closed only") - } - return true - default: - return false - } -} - -func (w *watermarkScanner) Filter(key, value interface{}) (r storage.FilterResult) { - fingerprint := key.(*clientmodel.Fingerprint) - - defer func() { - labels := prometheus.Labels{ - cutOff: fmt.Sprint(w.ignoreYoungerThan), - result: strings.ToLower(r.String()), - processorName: w.processor.Name(), - } - - curationFilterOperations.With(labels).Inc() - - w.status.UpdateCurationState(&metric.CurationState{ - Active: true, - Name: w.processor.Name(), - Limit: w.ignoreYoungerThan, - Fingerprint: fingerprint, - }) - }() - - if w.shouldStop() { - return storage.Stop - } - - k := &curationKey{ - Fingerprint: fingerprint, - ProcessorMessageRaw: w.processor.Signature(), - ProcessorMessageTypeName: w.processor.Name(), - IgnoreYoungerThan: w.ignoreYoungerThan, - } - - curationRemark, present, err := w.curationState.Get(k) - if err != nil { - return - } - if !present { - return storage.Accept - } - if !curationRemark.Before(w.stopAt) { - return storage.Skip - } - watermark := value.(*watermarks) - if !curationRemark.Before(watermark.High) { - return storage.Skip - } - curationConsistent, err := w.curationConsistent(fingerprint, watermark) - if err != nil { - return - } - if curationConsistent { - return storage.Skip - } - - return storage.Accept -} - -// curationConsistent determines whether the given metric is in a dirty state -// and needs curation. -func (w *watermarkScanner) curationConsistent(f *clientmodel.Fingerprint, watermark *watermarks) (bool, error) { - k := &curationKey{ - Fingerprint: f, - ProcessorMessageRaw: w.processor.Signature(), - ProcessorMessageTypeName: w.processor.Name(), - IgnoreYoungerThan: w.ignoreYoungerThan, - } - curationRemark, present, err := w.curationState.Get(k) - if err != nil { - return false, err - } - if !present { - return false, nil - } - if !curationRemark.Before(watermark.High) { - return true, nil - } - - return false, nil -} - -func (w *watermarkScanner) Operate(key, _ interface{}) (oErr *storage.OperatorError) { - fingerprint := key.(*clientmodel.Fingerprint) - - glog.Infof("Curating %s...", fingerprint) - - if len(w.ViewQueue) > 0 { - glog.Warning("Deferred due to view queue.") - time.Sleep(curationYieldPeriod) - } - - if fingerprint.Less(w.firstBlock.Fingerprint) { - glog.Warning("Skipped since before keyspace.") - return nil - } - if w.lastBlock.Fingerprint.Less(fingerprint) { - glog.Warning("Skipped since after keyspace.") - return nil - } - - curationState, _, err := w.curationState.Get(&curationKey{ - Fingerprint: fingerprint, - ProcessorMessageRaw: w.processor.Signature(), - ProcessorMessageTypeName: w.processor.Name(), - IgnoreYoungerThan: w.ignoreYoungerThan, - }) - if err != nil { - glog.Warning("Unable to get curation state: %s", err) - // An anomaly with the curation remark is likely not fatal in the sense that - // there was a decoding error with the entity and shouldn't be cause to stop - // work. The process will simply start from a pessimistic work time and - // work forward. With an idempotent processor, this is safe. - return &storage.OperatorError{Error: err, Continuable: true} - } - - keySet, _ := w.sampleKeys.Get() - defer w.sampleKeys.Give(keySet) - - keySet.Fingerprint = fingerprint - keySet.FirstTimestamp = curationState - - // Invariant: The fingerprint tests above ensure that we have the same - // fingerprint. - keySet.Constrain(w.firstBlock, w.lastBlock) - - seeker := &iteratorSeekerState{ - i: w.sampleIterator, - - obj: keySet, - - first: w.firstBlock, - last: w.lastBlock, - - dtoSampleKeys: w.dtoSampleKeys, - sampleKeys: w.sampleKeys, - } - - for state := seeker.initialize; state != nil; state = state() { - } - - if seeker.err != nil { - glog.Warningf("Got error in state machine: %s", seeker.err) - - return &storage.OperatorError{Error: seeker.err, Continuable: !seeker.iteratorInvalid} - } - - if seeker.iteratorInvalid { - glog.Warningf("Got illegal iterator in state machine: %s", err) - - return &storage.OperatorError{Error: errIllegalIterator, Continuable: false} - } - - if !seeker.seriesOperable { - return - } - - lastTime, err := w.processor.Apply(w.sampleIterator, w.samples, w.stopAt, fingerprint) - if err != nil { - // We can't divine the severity of a processor error without refactoring the - // interface. - return &storage.OperatorError{Error: err, Continuable: false} - } - - if err = w.curationState.Update(&curationKey{ - Fingerprint: fingerprint, - ProcessorMessageRaw: w.processor.Signature(), - ProcessorMessageTypeName: w.processor.Name(), - IgnoreYoungerThan: w.ignoreYoungerThan, - }, lastTime); err != nil { - // Under the assumption that the processors are idempotent, they can be - // re-run; thusly, the commitment of the curation remark is no cause - // to cease further progress. - return &storage.OperatorError{Error: err, Continuable: true} - } - - return nil -} - -// curationKey provides a representation of dto.CurationKey with associated -// business logic methods attached to it to enhance code readability. -type curationKey struct { - Fingerprint *clientmodel.Fingerprint - ProcessorMessageRaw []byte - ProcessorMessageTypeName string - IgnoreYoungerThan time.Duration -} - -// Equal answers whether the two curationKeys are equivalent. -func (c *curationKey) Equal(o *curationKey) bool { - switch { - case !c.Fingerprint.Equal(o.Fingerprint): - return false - case bytes.Compare(c.ProcessorMessageRaw, o.ProcessorMessageRaw) != 0: - return false - case c.ProcessorMessageTypeName != o.ProcessorMessageTypeName: - return false - case c.IgnoreYoungerThan != o.IgnoreYoungerThan: - return false - } - - return true -} - -func (c *curationKey) dump(d *dto.CurationKey) { - d.Reset() - - // BUG(matt): Avenue for simplification. - fingerprintDTO := &dto.Fingerprint{} - - dumpFingerprint(fingerprintDTO, c.Fingerprint) - - d.Fingerprint = fingerprintDTO - d.ProcessorMessageRaw = c.ProcessorMessageRaw - d.ProcessorMessageTypeName = proto.String(c.ProcessorMessageTypeName) - d.IgnoreYoungerThan = proto.Int64(int64(c.IgnoreYoungerThan)) -} - -func (c *curationKey) load(d *dto.CurationKey) { - // BUG(matt): Avenue for simplification. - c.Fingerprint = &clientmodel.Fingerprint{} - - loadFingerprint(c.Fingerprint, d.Fingerprint) - - c.ProcessorMessageRaw = d.ProcessorMessageRaw - c.ProcessorMessageTypeName = d.GetProcessorMessageTypeName() - c.IgnoreYoungerThan = time.Duration(d.GetIgnoreYoungerThan()) -} diff --git a/storage/metric/tiered/dto.go b/storage/metric/tiered/dto.go deleted file mode 100644 index c96b69f0be..0000000000 --- a/storage/metric/tiered/dto.go +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "sort" - - "code.google.com/p/goprotobuf/proto" - - clientmodel "github.com/prometheus/client_golang/model" - - dto "github.com/prometheus/prometheus/model/generated" -) - -func dumpFingerprint(d *dto.Fingerprint, f *clientmodel.Fingerprint) { - d.Reset() - - d.Signature = proto.String(f.String()) -} - -func loadFingerprint(f *clientmodel.Fingerprint, d *dto.Fingerprint) { - f.LoadFromString(d.GetSignature()) -} - -func dumpMetric(d *dto.Metric, m clientmodel.Metric) { - d.Reset() - - metricLength := len(m) - labelNames := make([]string, 0, metricLength) - - for labelName := range m { - labelNames = append(labelNames, string(labelName)) - } - - sort.Strings(labelNames) - - pairs := make([]*dto.LabelPair, 0, metricLength) - - for _, labelName := range labelNames { - l := clientmodel.LabelName(labelName) - labelValue := m[l] - labelPair := &dto.LabelPair{ - Name: proto.String(string(labelName)), - Value: proto.String(string(labelValue)), - } - - pairs = append(pairs, labelPair) - } - - d.LabelPair = pairs -} - -func dumpLabelName(d *dto.LabelName, l clientmodel.LabelName) { - d.Reset() - - d.Name = proto.String(string(l)) -} diff --git a/storage/metric/tiered/end_to_end_test.go b/storage/metric/tiered/end_to_end_test.go deleted file mode 100644 index a321d08834..0000000000 --- a/storage/metric/tiered/end_to_end_test.go +++ /dev/null @@ -1,548 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "sort" - "testing" - "time" - - clientmodel "github.com/prometheus/client_golang/model" - - "github.com/prometheus/prometheus/storage/metric" -) - -func GetFingerprintsForLabelSetTests(p metric.Persistence, t testing.TB) { - metrics := []clientmodel.Metric{ - { - clientmodel.MetricNameLabel: "test_metric", - "method": "get", - "result": "success", - }, - { - clientmodel.MetricNameLabel: "test_metric", - "method": "get", - "result": "failure", - }, - { - clientmodel.MetricNameLabel: "test_metric", - "method": "post", - "result": "success", - }, - { - clientmodel.MetricNameLabel: "test_metric", - "method": "post", - "result": "failure", - }, - } - - newTestLabelMatcher := func(matchType metric.MatchType, name clientmodel.LabelName, value clientmodel.LabelValue) *metric.LabelMatcher { - m, err := metric.NewLabelMatcher(matchType, name, value) - if err != nil { - t.Fatalf("Couldn't create label matcher: %v", err) - } - return m - } - - scenarios := []struct { - in metric.LabelMatchers - outIndexes []int - }{ - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "test_metric"), - }, - outIndexes: []int{0, 1, 2, 3}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "non_existent_metric"), - }, - outIndexes: []int{}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "non_existent_metric"), - newTestLabelMatcher(metric.Equal, "result", "success"), - }, - outIndexes: []int{}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "test_metric"), - newTestLabelMatcher(metric.Equal, "result", "success"), - }, - outIndexes: []int{0, 2}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "test_metric"), - newTestLabelMatcher(metric.NotEqual, "result", "success"), - }, - outIndexes: []int{1, 3}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "test_metric"), - newTestLabelMatcher(metric.RegexMatch, "result", "foo|success|bar"), - }, - outIndexes: []int{0, 2}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "test_metric"), - newTestLabelMatcher(metric.RegexNoMatch, "result", "foo|success|bar"), - }, - outIndexes: []int{1, 3}, - }, - { - in: metric.LabelMatchers{ - newTestLabelMatcher(metric.Equal, clientmodel.MetricNameLabel, "test_metric"), - newTestLabelMatcher(metric.RegexNoMatch, "result", "foo|success|bar"), - newTestLabelMatcher(metric.RegexMatch, "method", "os"), - }, - outIndexes: []int{3}, - }, - } - - for _, m := range metrics { - testAppendSamples(p, &clientmodel.Sample{ - Value: 0, - Timestamp: 0, - Metric: m, - }, t) - } - - for i, s := range scenarios { - actualFps, err := p.GetFingerprintsForLabelMatchers(s.in) - if err != nil { - t.Fatalf("%d. Couldn't get fingerprints for label matchers: %v", i, err) - } - - expectedFps := clientmodel.Fingerprints{} - for _, i := range s.outIndexes { - fp := &clientmodel.Fingerprint{} - fp.LoadFromMetric(metrics[i]) - expectedFps = append(expectedFps, fp) - } - - sort.Sort(actualFps) - sort.Sort(expectedFps) - - if len(actualFps) != len(expectedFps) { - t.Fatalf("%d. Got %d fingerprints; want %d", i, len(actualFps), len(expectedFps)) - } - - for j, actualFp := range actualFps { - if !actualFp.Equal(expectedFps[j]) { - t.Fatalf("%d.%d. Got fingerprint %v; want %v", i, j, actualFp, expectedFps[j]) - } - } - } -} - -func GetLabelValuesForLabelNameTests(p metric.Persistence, t testing.TB) { - testAppendSamples(p, &clientmodel.Sample{ - Value: 0, - Timestamp: 0, - Metric: clientmodel.Metric{ - clientmodel.MetricNameLabel: "my_metric", - "request_type": "create", - "result": "success", - }, - }, t) - - testAppendSamples(p, &clientmodel.Sample{ - Value: 0, - Timestamp: 0, - Metric: clientmodel.Metric{ - clientmodel.MetricNameLabel: "my_metric", - "request_type": "delete", - "outcome": "failure", - }, - }, t) - - expectedIndex := map[clientmodel.LabelName]clientmodel.LabelValues{ - clientmodel.MetricNameLabel: {"my_metric"}, - "request_type": {"create", "delete"}, - "result": {"success"}, - "outcome": {"failure"}, - } - - for name, expected := range expectedIndex { - actual, err := p.GetLabelValuesForLabelName(name) - if err != nil { - t.Fatalf("Error getting values for label %s: %v", name, err) - } - if len(actual) != len(expected) { - t.Fatalf("Number of values don't match for label %s: got %d; want %d", name, len(actual), len(expected)) - } - for i := range expected { - inActual := false - for _, a := range actual { - if expected[i] == a { - inActual = true - break - } - } - if !inActual { - t.Fatalf("%d. Expected label value %s not in output", i, expected[i]) - } - } - } -} - -func GetMetricForFingerprintTests(p metric.Persistence, t testing.TB) { - testAppendSamples(p, &clientmodel.Sample{ - Value: 0, - Timestamp: 0, - Metric: clientmodel.Metric{ - "request_type": "your_mom", - }, - }, t) - - testAppendSamples(p, &clientmodel.Sample{ - Value: 0, - Timestamp: 0, - Metric: clientmodel.Metric{ - "request_type": "your_dad", - "one-off": "value", - }, - }, t) - - result, err := p.GetFingerprintsForLabelMatchers(metric.LabelMatchers{{ - Type: metric.Equal, - Name: "request_type", - Value: "your_mom", - }}) - - if err != nil { - t.Error(err) - } - - if len(result) != 1 { - t.Errorf("Expected one element.") - } - - m, err := p.GetMetricForFingerprint(result[0]) - if err != nil { - t.Error(err) - } - - if m == nil { - t.Fatal("Did not expect nil.") - } - - if len(m) != 1 { - t.Errorf("Expected one-dimensional metric.") - } - - if m["request_type"] != "your_mom" { - t.Errorf("Expected metric to match.") - } - - result, err = p.GetFingerprintsForLabelMatchers(metric.LabelMatchers{{ - Type: metric.Equal, - Name: "request_type", - Value: "your_dad", - }}) - - if err != nil { - t.Error(err) - } - - if len(result) != 1 { - t.Errorf("Expected one element.") - } - - m, err = p.GetMetricForFingerprint(result[0]) - - if m == nil { - t.Fatal("Did not expect nil.") - } - - if err != nil { - t.Error(err) - } - - if len(m) != 2 { - t.Errorf("Expected two-dimensional metric.") - } - - if m["request_type"] != "your_dad" { - t.Errorf("Expected metric to match.") - } - - if m["one-off"] != "value" { - t.Errorf("Expected metric to match.") - } - - // Verify that mutating a returned metric does not result in the mutated - // metric to be returned at the next GetMetricForFingerprint() call. - m["one-off"] = "new value" - m, err = p.GetMetricForFingerprint(result[0]) - - if m == nil { - t.Fatal("Did not expect nil.") - } - - if err != nil { - t.Error(err) - } - - if len(m) != 2 { - t.Errorf("Expected two-dimensional metric.") - } - - if m["request_type"] != "your_dad" { - t.Errorf("Expected metric to match.") - } - - if m["one-off"] != "value" { - t.Errorf("Expected metric to match.") - } -} - -func AppendRepeatingValuesTests(p metric.Persistence, t testing.TB) { - m := clientmodel.Metric{ - clientmodel.MetricNameLabel: "errors_total", - "controller": "foo", - "operation": "bar", - } - - increments := 10 - repetitions := 500 - - for i := 0; i < increments; i++ { - for j := 0; j < repetitions; j++ { - time := clientmodel.Timestamp(0).Add(time.Duration(i) * time.Hour).Add(time.Duration(j) * time.Second) - testAppendSamples(p, &clientmodel.Sample{ - Value: clientmodel.SampleValue(i), - Timestamp: time, - Metric: m, - }, t) - } - } - - v, ok := p.(metric.View) - if !ok { - // It's purely a benchmark for a Persistence that is not viewable. - return - } - - matchers := labelMatchersFromLabelSet(clientmodel.LabelSet{ - clientmodel.MetricNameLabel: "errors_total", - "controller": "foo", - "operation": "bar", - }) - - for i := 0; i < increments; i++ { - for j := 0; j < repetitions; j++ { - fingerprints, err := p.GetFingerprintsForLabelMatchers(matchers) - if err != nil { - t.Fatal(err) - } - if len(fingerprints) != 1 { - t.Fatalf("expected %d fingerprints, got %d", 1, len(fingerprints)) - } - - time := clientmodel.Timestamp(0).Add(time.Duration(i) * time.Hour).Add(time.Duration(j) * time.Second) - samples := v.GetValueAtTime(fingerprints[0], time) - if len(samples) == 0 { - t.Fatal("expected at least one sample.") - } - - expected := clientmodel.SampleValue(i) - - for _, sample := range samples { - if sample.Value != expected { - t.Fatalf("expected %v value, got %v", expected, sample.Value) - } - } - } - } -} - -func AppendsRepeatingValuesTests(p metric.Persistence, t testing.TB) { - m := clientmodel.Metric{ - clientmodel.MetricNameLabel: "errors_total", - "controller": "foo", - "operation": "bar", - } - - increments := 10 - repetitions := 500 - - s := clientmodel.Samples{} - for i := 0; i < increments; i++ { - for j := 0; j < repetitions; j++ { - time := clientmodel.Timestamp(0).Add(time.Duration(i) * time.Hour).Add(time.Duration(j) * time.Second) - s = append(s, &clientmodel.Sample{ - Value: clientmodel.SampleValue(i), - Timestamp: time, - Metric: m, - }) - } - } - - p.AppendSamples(s) - - v, ok := p.(metric.View) - if !ok { - // It's purely a benchmark for a MetricPersistance that is not viewable. - return - } - - matchers := labelMatchersFromLabelSet(clientmodel.LabelSet{ - clientmodel.MetricNameLabel: "errors_total", - "controller": "foo", - "operation": "bar", - }) - - for i := 0; i < increments; i++ { - for j := 0; j < repetitions; j++ { - fingerprints, err := p.GetFingerprintsForLabelMatchers(matchers) - if err != nil { - t.Fatal(err) - } - if len(fingerprints) != 1 { - t.Fatalf("expected %d fingerprints, got %d", 1, len(fingerprints)) - } - - time := clientmodel.Timestamp(0).Add(time.Duration(i) * time.Hour).Add(time.Duration(j) * time.Second) - samples := v.GetValueAtTime(fingerprints[0], time) - if len(samples) == 0 { - t.Fatal("expected at least one sample.") - } - - expected := clientmodel.SampleValue(i) - - for _, sample := range samples { - if sample.Value != expected { - t.Fatalf("expected %v value, got %v", expected, sample.Value) - } - } - } - } -} - -// Test Definitions Below - -var testLevelDBGetFingerprintsForLabelSet = buildLevelDBTestPersistence("get_fingerprints_for_labelset", GetFingerprintsForLabelSetTests) - -func TestLevelDBGetFingerprintsForLabelSet(t *testing.T) { - testLevelDBGetFingerprintsForLabelSet(t) -} - -func BenchmarkLevelDBGetFingerprintsForLabelSet(b *testing.B) { - for i := 0; i < b.N; i++ { - testLevelDBGetFingerprintsForLabelSet(b) - } -} - -var testLevelDBGetLabelValuesForLabelName = buildLevelDBTestPersistence("get_label_values_for_labelname", GetLabelValuesForLabelNameTests) - -func TestLevelDBGetFingerprintsForLabelName(t *testing.T) { - testLevelDBGetLabelValuesForLabelName(t) -} - -func BenchmarkLevelDBGetLabelValuesForLabelName(b *testing.B) { - for i := 0; i < b.N; i++ { - testLevelDBGetLabelValuesForLabelName(b) - } -} - -var testLevelDBGetMetricForFingerprint = buildLevelDBTestPersistence("get_metric_for_fingerprint", GetMetricForFingerprintTests) - -func TestLevelDBGetMetricForFingerprint(t *testing.T) { - testLevelDBGetMetricForFingerprint(t) -} - -func BenchmarkLevelDBGetMetricForFingerprint(b *testing.B) { - for i := 0; i < b.N; i++ { - testLevelDBGetMetricForFingerprint(b) - } -} - -var testLevelDBAppendRepeatingValues = buildLevelDBTestPersistence("append_repeating_values", AppendRepeatingValuesTests) - -func TestLevelDBAppendRepeatingValues(t *testing.T) { - testLevelDBAppendRepeatingValues(t) -} - -func BenchmarkLevelDBAppendRepeatingValues(b *testing.B) { - for i := 0; i < b.N; i++ { - testLevelDBAppendRepeatingValues(b) - } -} - -var testLevelDBAppendsRepeatingValues = buildLevelDBTestPersistence("appends_repeating_values", AppendsRepeatingValuesTests) - -func TestLevelDBAppendsRepeatingValues(t *testing.T) { - testLevelDBAppendsRepeatingValues(t) -} - -func BenchmarkLevelDBAppendsRepeatingValues(b *testing.B) { - for i := 0; i < b.N; i++ { - testLevelDBAppendsRepeatingValues(b) - } -} - -var testMemoryGetFingerprintsForLabelSet = buildMemoryTestPersistence(GetFingerprintsForLabelSetTests) - -func TestMemoryGetFingerprintsForLabelSet(t *testing.T) { - testMemoryGetFingerprintsForLabelSet(t) -} - -func BenchmarkMemoryGetFingerprintsForLabelSet(b *testing.B) { - for i := 0; i < b.N; i++ { - testMemoryGetFingerprintsForLabelSet(b) - } -} - -var testMemoryGetLabelValuesForLabelName = buildMemoryTestPersistence(GetLabelValuesForLabelNameTests) - -func TestMemoryGetLabelValuesForLabelName(t *testing.T) { - testMemoryGetLabelValuesForLabelName(t) -} - -func BenchmarkMemoryGetLabelValuesForLabelName(b *testing.B) { - for i := 0; i < b.N; i++ { - testMemoryGetLabelValuesForLabelName(b) - } -} - -var testMemoryGetMetricForFingerprint = buildMemoryTestPersistence(GetMetricForFingerprintTests) - -func TestMemoryGetMetricForFingerprint(t *testing.T) { - testMemoryGetMetricForFingerprint(t) -} - -func BenchmarkMemoryGetMetricForFingerprint(b *testing.B) { - for i := 0; i < b.N; i++ { - testMemoryGetMetricForFingerprint(b) - } -} - -var testMemoryAppendRepeatingValues = buildMemoryTestPersistence(AppendRepeatingValuesTests) - -func TestMemoryAppendRepeatingValues(t *testing.T) { - testMemoryAppendRepeatingValues(t) -} - -func BenchmarkMemoryAppendRepeatingValues(b *testing.B) { - for i := 0; i < b.N; i++ { - testMemoryAppendRepeatingValues(b) - } -} diff --git a/storage/metric/tiered/freelist.go b/storage/metric/tiered/freelist.go deleted file mode 100644 index 18e2c02127..0000000000 --- a/storage/metric/tiered/freelist.go +++ /dev/null @@ -1,212 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "time" - - "github.com/prometheus/prometheus/utility" - - clientmodel "github.com/prometheus/client_golang/model" - dto "github.com/prometheus/prometheus/model/generated" -) - -type dtoSampleKeyList struct { - l utility.FreeList -} - -func newDtoSampleKeyList(cap int) *dtoSampleKeyList { - return &dtoSampleKeyList{ - l: utility.NewFreeList(cap), - } -} - -func (l *dtoSampleKeyList) Get() (*dto.SampleKey, bool) { - if v, ok := l.l.Get(); ok { - return v.(*dto.SampleKey), ok - } - - return &dto.SampleKey{}, false -} - -func (l *dtoSampleKeyList) Give(v *dto.SampleKey) bool { - v.Reset() - - return l.l.Give(v) -} - -func (l *dtoSampleKeyList) Close() { - l.l.Close() -} - -type sampleKeyList struct { - l utility.FreeList -} - -var defaultSampleKey = &SampleKey{} - -func newSampleKeyList(cap int) *sampleKeyList { - return &sampleKeyList{ - l: utility.NewFreeList(cap), - } -} - -func (l *sampleKeyList) Get() (*SampleKey, bool) { - if v, ok := l.l.Get(); ok { - return v.(*SampleKey), ok - } - - return &SampleKey{}, false -} - -func (l *sampleKeyList) Give(v *SampleKey) bool { - *v = *defaultSampleKey - - return l.l.Give(v) -} - -func (l *sampleKeyList) Close() { - l.l.Close() -} - -type valueAtTimeList struct { - l utility.FreeList -} - -func (l *valueAtTimeList) Get(fp *clientmodel.Fingerprint, time clientmodel.Timestamp) *getValuesAtTimeOp { - var op *getValuesAtTimeOp - v, ok := l.l.Get() - if ok { - op = v.(*getValuesAtTimeOp) - } else { - op = &getValuesAtTimeOp{} - } - op.fp = *fp - op.current = time - return op -} - -var pGetValuesAtTimeOp = &getValuesAtTimeOp{} - -func (l *valueAtTimeList) Give(v *getValuesAtTimeOp) bool { - *v = *pGetValuesAtTimeOp - - return l.l.Give(v) -} - -func newValueAtTimeList(cap int) *valueAtTimeList { - return &valueAtTimeList{ - l: utility.NewFreeList(cap), - } -} - -type valueAtIntervalList struct { - l utility.FreeList -} - -func (l *valueAtIntervalList) Get(fp *clientmodel.Fingerprint, from, through clientmodel.Timestamp, interval time.Duration) *getValuesAtIntervalOp { - var op *getValuesAtIntervalOp - v, ok := l.l.Get() - if ok { - op = v.(*getValuesAtIntervalOp) - } else { - op = &getValuesAtIntervalOp{} - } - op.fp = *fp - op.current = from - op.through = through - op.interval = interval - return op -} - -var pGetValuesAtIntervalOp = &getValuesAtIntervalOp{} - -func (l *valueAtIntervalList) Give(v *getValuesAtIntervalOp) bool { - *v = *pGetValuesAtIntervalOp - - return l.l.Give(v) -} - -func newValueAtIntervalList(cap int) *valueAtIntervalList { - return &valueAtIntervalList{ - l: utility.NewFreeList(cap), - } -} - -type valueAlongRangeList struct { - l utility.FreeList -} - -func (l *valueAlongRangeList) Get(fp *clientmodel.Fingerprint, from, through clientmodel.Timestamp) *getValuesAlongRangeOp { - var op *getValuesAlongRangeOp - v, ok := l.l.Get() - if ok { - op = v.(*getValuesAlongRangeOp) - } else { - op = &getValuesAlongRangeOp{} - } - op.fp = *fp - op.current = from - op.through = through - return op -} - -var pGetValuesAlongRangeOp = &getValuesAlongRangeOp{} - -func (l *valueAlongRangeList) Give(v *getValuesAlongRangeOp) bool { - *v = *pGetValuesAlongRangeOp - - return l.l.Give(v) -} - -func newValueAlongRangeList(cap int) *valueAlongRangeList { - return &valueAlongRangeList{ - l: utility.NewFreeList(cap), - } -} - -type valueAtIntervalAlongRangeList struct { - l utility.FreeList -} - -func (l *valueAtIntervalAlongRangeList) Get(fp *clientmodel.Fingerprint, from, through clientmodel.Timestamp, interval, rangeDuration time.Duration) *getValueRangeAtIntervalOp { - var op *getValueRangeAtIntervalOp - v, ok := l.l.Get() - if ok { - op = v.(*getValueRangeAtIntervalOp) - } else { - op = &getValueRangeAtIntervalOp{} - } - op.fp = *fp - op.current = from - op.rangeThrough = from.Add(rangeDuration) - op.rangeDuration = rangeDuration - op.interval = interval - op.through = through - return op -} - -var pGetValueRangeAtIntervalOp = &getValueRangeAtIntervalOp{} - -func (l *valueAtIntervalAlongRangeList) Give(v *getValueRangeAtIntervalOp) bool { - *v = *pGetValueRangeAtIntervalOp - - return l.l.Give(v) -} - -func newValueAtIntervalAlongRangeList(cap int) *valueAtIntervalAlongRangeList { - return &valueAtIntervalAlongRangeList{ - l: utility.NewFreeList(cap), - } -} diff --git a/storage/metric/tiered/freelist_test.go b/storage/metric/tiered/freelist_test.go deleted file mode 100644 index c575583cc2..0000000000 --- a/storage/metric/tiered/freelist_test.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2014 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "testing" - - "github.com/prometheus/client_golang/model" -) - -// TestValueAtTimeListGet tests if the timestamp is set properly in the op -// retrieved from the free list and if the 'consumed' member is zeroed properly. -func TestValueAtTimeListGet(t *testing.T) { - l := newValueAtTimeList(1) - op := l.Get(&model.Fingerprint{}, 42) - op.consumed = true - l.Give(op) - - op2 := l.Get(&model.Fingerprint{}, 4711) - if op2.Consumed() { - t.Error("Op retrieved from freelist is already consumed.") - } - if got, expected := op2.CurrentTime(), model.Timestamp(4711); got != expected { - t.Errorf("op2.CurrentTime() = %d; want %d.", got, expected) - } -} diff --git a/storage/metric/tiered/index.go b/storage/metric/tiered/index.go deleted file mode 100644 index cc3eb45bf7..0000000000 --- a/storage/metric/tiered/index.go +++ /dev/null @@ -1,689 +0,0 @@ -// Copyright 2013 Prometheus Team -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tiered - -import ( - "io" - "sort" - "sync" - - "code.google.com/p/goprotobuf/proto" - - clientmodel "github.com/prometheus/client_golang/model" - "github.com/prometheus/prometheus/storage/metric" - "github.com/prometheus/prometheus/storage/raw" - "github.com/prometheus/prometheus/storage/raw/leveldb" - "github.com/prometheus/prometheus/utility" - - dto "github.com/prometheus/prometheus/model/generated" -) - -// FingerprintMetricMapping is an in-memory map of Fingerprints to Metrics. -type FingerprintMetricMapping map[clientmodel.Fingerprint]clientmodel.Metric - -// FingerprintMetricIndex models a database mapping Fingerprints to Metrics. -type FingerprintMetricIndex interface { - raw.Database - raw.Pruner - - IndexBatch(FingerprintMetricMapping) error - Lookup(*clientmodel.Fingerprint) (m clientmodel.Metric, ok bool, err error) -} - -// LevelDBFingerprintMetricIndex implements FingerprintMetricIndex using -// leveldb. -type LevelDBFingerprintMetricIndex struct { - *leveldb.LevelDBPersistence -} - -// IndexBatch implements FingerprintMetricIndex. -func (i *LevelDBFingerprintMetricIndex) IndexBatch(mapping FingerprintMetricMapping) error { - b := leveldb.NewBatch() - defer b.Close() - - for f, m := range mapping { - k := &dto.Fingerprint{} - dumpFingerprint(k, &f) - v := &dto.Metric{} - dumpMetric(v, m) - - b.Put(k, v) - } - - return i.LevelDBPersistence.Commit(b) -} - -// Lookup implements FingerprintMetricIndex. -func (i *LevelDBFingerprintMetricIndex) Lookup(f *clientmodel.Fingerprint) (m clientmodel.Metric, ok bool, err error) { - k := &dto.Fingerprint{} - dumpFingerprint(k, f) - v := &dto.Metric{} - if ok, err := i.LevelDBPersistence.Get(k, v); !ok { - return nil, false, nil - } else if err != nil { - return nil, false, err - } - - m = clientmodel.Metric{} - - for _, pair := range v.LabelPair { - m[clientmodel.LabelName(pair.GetName())] = clientmodel.LabelValue(pair.GetValue()) - } - - return m, true, nil -} - -// NewLevelDBFingerprintMetricIndex returns a LevelDBFingerprintMetricIndex -// object ready to use. -func NewLevelDBFingerprintMetricIndex(o leveldb.LevelDBOptions) (*LevelDBFingerprintMetricIndex, error) { - s, err := leveldb.NewLevelDBPersistence(o) - if err != nil { - return nil, err - } - - return &LevelDBFingerprintMetricIndex{ - LevelDBPersistence: s, - }, nil -} - -// LabelNameLabelValuesMapping is an in-memory map of LabelNames to -// LabelValues. -type LabelNameLabelValuesMapping map[clientmodel.LabelName]clientmodel.LabelValues - -// LabelNameLabelValuesIndex models a database mapping LabelNames to -// LabelValues. -type LabelNameLabelValuesIndex interface { - raw.Database - raw.Pruner - - IndexBatch(LabelNameLabelValuesMapping) error - Lookup(clientmodel.LabelName) (values clientmodel.LabelValues, ok bool, err error) - Has(clientmodel.LabelName) (ok bool, err error) -} - -// LevelDBLabelNameLabelValuesIndex implements LabelNameLabelValuesIndex using -// leveldb. -type LevelDBLabelNameLabelValuesIndex struct { - *leveldb.LevelDBPersistence -} - -// IndexBatch implements LabelNameLabelValuesIndex. -func (i *LevelDBLabelNameLabelValuesIndex) IndexBatch(b LabelNameLabelValuesMapping) error { - batch := leveldb.NewBatch() - defer batch.Close() - - for labelName, labelValues := range b { - sort.Sort(labelValues) - - key := &dto.LabelName{ - Name: proto.String(string(labelName)), - } - value := &dto.LabelValueCollection{} - value.Member = make([]string, 0, len(labelValues)) - for _, labelValue := range labelValues { - value.Member = append(value.Member, string(labelValue)) - } - - batch.Put(key, value) - } - - return i.LevelDBPersistence.Commit(batch) -} - -// Lookup implements LabelNameLabelValuesIndex. -func (i *LevelDBLabelNameLabelValuesIndex) Lookup(l clientmodel.LabelName) (values clientmodel.LabelValues, ok bool, err error) { - k := &dto.LabelName{} - dumpLabelName(k, l) - v := &dto.LabelValueCollection{} - ok, err = i.LevelDBPersistence.Get(k, v) - if err != nil { - return nil, false, err - } - if !ok { - return nil, false, nil - } - - for _, m := range v.Member { - values = append(values, clientmodel.LabelValue(m)) - } - - return values, true, nil -} - -// Has implements LabelNameLabelValuesIndex. -func (i *LevelDBLabelNameLabelValuesIndex) Has(l clientmodel.LabelName) (ok bool, err error) { - return i.LevelDBPersistence.Has(&dto.LabelName{ - Name: proto.String(string(l)), - }) -} - -// NewLevelDBLabelNameLabelValuesIndex returns a LevelDBLabelNameLabelValuesIndex -// ready to use. -func NewLevelDBLabelNameLabelValuesIndex(o leveldb.LevelDBOptions) (*LevelDBLabelNameLabelValuesIndex, error) { - s, err := leveldb.NewLevelDBPersistence(o) - if err != nil { - return nil, err - } - - return &LevelDBLabelNameLabelValuesIndex{ - LevelDBPersistence: s, - }, nil -} - -// LabelPairFingerprintMapping is an in-memory map of LabelPairs to -// Fingerprints. -type LabelPairFingerprintMapping map[metric.LabelPair]clientmodel.Fingerprints - -// LabelPairFingerprintIndex models a database mapping LabelPairs to -// Fingerprints. -type LabelPairFingerprintIndex interface { - raw.Database - raw.ForEacher - raw.Pruner - - IndexBatch(LabelPairFingerprintMapping) error - Lookup(*metric.LabelPair) (m clientmodel.Fingerprints, ok bool, err error) - Has(*metric.LabelPair) (ok bool, err error) -} - -// LevelDBLabelPairFingerprintIndex implements LabelPairFingerprintIndex using -// leveldb. -type LevelDBLabelPairFingerprintIndex struct { - *leveldb.LevelDBPersistence -} - -// IndexBatch implements LabelPairFingerprintMapping. -func (i *LevelDBLabelPairFingerprintIndex) IndexBatch(m LabelPairFingerprintMapping) error { - batch := leveldb.NewBatch() - defer batch.Close() - - for pair, fps := range m { - sort.Sort(fps) - - key := &dto.LabelPair{ - Name: proto.String(string(pair.Name)), - Value: proto.String(string(pair.Value)), - } - value := &dto.FingerprintCollection{} - for _, fp := range fps { - f := &dto.Fingerprint{} - dumpFingerprint(f, fp) - value.Member = append(value.Member, f) - } - - batch.Put(key, value) - } - - return i.LevelDBPersistence.Commit(batch) -} - -// Lookup implements LabelPairFingerprintMapping. -func (i *LevelDBLabelPairFingerprintIndex) Lookup(p *metric.LabelPair) (m clientmodel.Fingerprints, ok bool, err error) { - k := &dto.LabelPair{ - Name: proto.String(string(p.Name)), - Value: proto.String(string(p.Value)), - } - v := &dto.FingerprintCollection{} - - ok, err = i.LevelDBPersistence.Get(k, v) - - if !ok { - return nil, false, nil - } - if err != nil { - return nil, false, err - } - - for _, pair := range v.Member { - fp := &clientmodel.Fingerprint{} - loadFingerprint(fp, pair) - m = append(m, fp) - } - - return m, true, nil -} - -// Has implements LabelPairFingerprintMapping. -func (i *LevelDBLabelPairFingerprintIndex) Has(p *metric.LabelPair) (ok bool, err error) { - k := &dto.LabelPair{ - Name: proto.String(string(p.Name)), - Value: proto.String(string(p.Value)), - } - - return i.LevelDBPersistence.Has(k) -} - -// NewLevelDBLabelSetFingerprintIndex returns a LevelDBLabelPairFingerprintIndex -// object ready to use. -func NewLevelDBLabelSetFingerprintIndex(o leveldb.LevelDBOptions) (*LevelDBLabelPairFingerprintIndex, error) { - s, err := leveldb.NewLevelDBPersistence(o) - if err != nil { - return nil, err - } - - return &LevelDBLabelPairFingerprintIndex{ - LevelDBPersistence: s, - }, nil -} - -// MetricMembershipIndex models a database tracking the existence of Metrics. -type MetricMembershipIndex interface { - raw.Database - raw.Pruner - - IndexBatch(FingerprintMetricMapping) error - Has(clientmodel.Metric) (ok bool, err error) -} - -// LevelDBMetricMembershipIndex implements MetricMembershipIndex using leveldb. -type LevelDBMetricMembershipIndex struct { - *leveldb.LevelDBPersistence -} - -var existenceIdentity = &dto.MembershipIndexValue{} - -// IndexBatch implements MetricMembershipIndex. -func (i *LevelDBMetricMembershipIndex) IndexBatch(b FingerprintMetricMapping) error { - batch := leveldb.NewBatch() - defer batch.Close() - - for _, m := range b { - k := &dto.Metric{} - dumpMetric(k, m) - batch.Put(k, existenceIdentity) - } - - return i.LevelDBPersistence.Commit(batch) -} - -// Has implements MetricMembershipIndex. -func (i *LevelDBMetricMembershipIndex) Has(m clientmodel.Metric) (ok bool, err error) { - k := &dto.Metric{} - dumpMetric(k, m) - - return i.LevelDBPersistence.Has(k) -} - -// NewLevelDBMetricMembershipIndex returns a LevelDBMetricMembershipIndex object -// ready to use. -func NewLevelDBMetricMembershipIndex(o leveldb.LevelDBOptions) (*LevelDBMetricMembershipIndex, error) { - s, err := leveldb.NewLevelDBPersistence(o) - if err != nil { - return nil, err - } - - return &LevelDBMetricMembershipIndex{ - LevelDBPersistence: s, - }, nil -} - -// MetricIndexer indexes facets of a clientmodel.Metric. -type MetricIndexer interface { - // IndexMetric makes no assumptions about the concurrency safety of the - // underlying implementer. - IndexMetrics(FingerprintMetricMapping) error -} - -// IndexerObserver listens and receives changes to a given -// FingerprintMetricMapping. -type IndexerObserver interface { - Observe(FingerprintMetricMapping) error -} - -// IndexerProxy receives IndexMetric requests and proxies them to the underlying -// MetricIndexer. Upon success of the underlying receiver, the registered -// IndexObservers are called serially. -// -// If an error occurs in the underlying MetricIndexer or any of the observers, -// this proxy will not work any further and return the offending error in this -// call or any subsequent ones. -type IndexerProxy struct { - err error - - i MetricIndexer - observers []IndexerObserver -} - -// IndexMetrics proxies the given FingerprintMetricMapping to the underlying -// MetricIndexer and calls all registered observers with it. -func (p *IndexerProxy) IndexMetrics(b FingerprintMetricMapping) error { - if p.err != nil { - return p.err - } - if p.err = p.i.IndexMetrics(b); p.err != nil { - return p.err - } - - for _, o := range p.observers { - if p.err = o.Observe(b); p.err != nil { - return p.err - } - } - - return nil -} - -// Close closes the underlying indexer. -func (p *IndexerProxy) Close() error { - if p.err != nil { - return p.err - } - if closer, ok := p.i.(io.Closer); ok { - p.err = closer.Close() - return p.err - } - return nil -} - -// Flush flushes the underlying index requests before closing. -func (p *IndexerProxy) Flush() error { - if p.err != nil { - return p.err - } - if flusher, ok := p.i.(flusher); ok { - p.err = flusher.Flush() - return p.err - } - return nil -} - -// NewIndexerProxy builds an IndexerProxy for the given configuration. -func NewIndexerProxy(i MetricIndexer, o ...IndexerObserver) *IndexerProxy { - return &IndexerProxy{ - i: i, - observers: o, - } -} - -// SynchronizedIndexer provides naive locking for any MetricIndexer. -type SynchronizedIndexer struct { - mu sync.Mutex - i MetricIndexer -} - -// IndexMetrics calls IndexMetrics of the wrapped MetricIndexer after acquiring -// a lock. -func (i *SynchronizedIndexer) IndexMetrics(b FingerprintMetricMapping) error { - i.mu.Lock() - defer i.mu.Unlock() - - return i.i.IndexMetrics(b) -} - -type flusher interface { - Flush() error -} - -// Flush calls Flush of the wrapped MetricIndexer after acquiring a lock. If the -// wrapped MetricIndexer has no Flush method, this is a no-op. -func (i *SynchronizedIndexer) Flush() error { - if flusher, ok := i.i.(flusher); ok { - i.mu.Lock() - defer i.mu.Unlock() - - return flusher.Flush() - } - - return nil -} - -// Close calls Close of the wrapped MetricIndexer after acquiring a lock. If the -// wrapped MetricIndexer has no Close method, this is a no-op. -func (i *SynchronizedIndexer) Close() error { - if closer, ok := i.i.(io.Closer); ok { - i.mu.Lock() - defer i.mu.Unlock() - - return closer.Close() - } - - return nil -} - -// NewSynchronizedIndexer returns a SynchronizedIndexer wrapping the given -// MetricIndexer. -func NewSynchronizedIndexer(i MetricIndexer) *SynchronizedIndexer { - return &SynchronizedIndexer{ - i: i, - } -} - -// BufferedIndexer provides unsynchronized index buffering. -// -// If an error occurs in the underlying MetricIndexer or any of the observers, -// this proxy will not work any further and return the offending error. -type BufferedIndexer struct { - i MetricIndexer - - limit int - - buf []FingerprintMetricMapping - - err error -} - -// IndexMetrics writes the entries in the given FingerprintMetricMapping to the -// index. -func (i *BufferedIndexer) IndexMetrics(b FingerprintMetricMapping) error { - if i.err != nil { - return i.err - } - - if len(i.buf) < i.limit { - i.buf = append(i.buf, b) - - return nil - } - - i.err = i.Flush() - - return i.err -} - -// Flush writes all pending entries to the index. -func (i *BufferedIndexer) Flush() error { - if i.err != nil { - return i.err - } - - if len(i.buf) == 0 { - return nil - } - - superset := FingerprintMetricMapping{} - for _, b := range i.buf { - for fp, m := range b { - if _, ok := superset[fp]; ok { - continue - } - - superset[fp] = m - } - } - - i.buf = make([]FingerprintMetricMapping, 0, i.limit) - - i.err = i.i.IndexMetrics(superset) - - return i.err -} - -// Close flushes and closes the underlying buffer. -func (i *BufferedIndexer) Close() error { - if err := i.Flush(); err != nil { - return err - } - - if closer, ok := i.i.(io.Closer); ok { - return closer.Close() - } - - return nil -} - -// NewBufferedIndexer returns a BufferedIndexer ready to use. -func NewBufferedIndexer(i MetricIndexer, limit int) *BufferedIndexer { - return &BufferedIndexer{ - i: i, - limit: limit, - buf: make([]FingerprintMetricMapping, 0, limit), - } -} - -// TotalIndexer is a MetricIndexer that indexes all standard facets of a metric -// that a user or the Prometheus subsystem would want to query against: -// -// "