mirror of
https://github.com/prometheus/prometheus.git
synced 2025-03-05 20:59:13 -08:00
Merge branch 'master' into bootstrap4
This commit is contained in:
commit
cd569b51d9
|
@ -1,5 +1,3 @@
|
|||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
# Whenever the Go version is updated here, .circleci/config.yml and .promu.yml
|
||||
|
|
|
@ -36,7 +36,8 @@ GO_VERSION ?= $(shell $(GO) version)
|
|||
GO_VERSION_NUMBER ?= $(word 3, $(GO_VERSION))
|
||||
PRE_GO_111 ?= $(shell echo $(GO_VERSION_NUMBER) | grep -E 'go1\.(10|[0-9])\.')
|
||||
|
||||
unexport GOVENDOR
|
||||
GOVENDOR :=
|
||||
GO111MODULE :=
|
||||
ifeq (, $(PRE_GO_111))
|
||||
ifneq (,$(wildcard go.mod))
|
||||
# Enforce Go modules support just in case the directory is inside GOPATH (and for Travis CI).
|
||||
|
@ -57,8 +58,6 @@ $(warning Some recipes may not work as expected as the current Go runtime is '$(
|
|||
# This repository isn't using Go modules (yet).
|
||||
GOVENDOR := $(FIRST_GOPATH)/bin/govendor
|
||||
endif
|
||||
|
||||
unexport GO111MODULE
|
||||
endif
|
||||
PROMU := $(FIRST_GOPATH)/bin/promu
|
||||
STATICCHECK := $(FIRST_GOPATH)/bin/staticcheck
|
||||
|
|
|
@ -12,8 +12,9 @@ Release cadence of first pre-releases being cut is 6 weeks.
|
|||
| v2.5 | 2018-10-24 | Frederic Branczyk (GitHub: @brancz) |
|
||||
| v2.6 | 2018-12-05 | Simon Pasquier (GitHub: @simonpasquier) |
|
||||
| v2.7 | 2019-01-16 | Goutham Veeramachaneni (GitHub: @gouthamve) |
|
||||
| v2.8 | 2019-02-27 | **searching for volunteer** |
|
||||
| v2.8 | 2019-02-27 | Ganesh Vernekar (GitHub: @codesome) |
|
||||
| v2.9 | 2019-04-10 | **searching for volunteer** |
|
||||
| v2.10 | 2019-05-22 | **searching for volunteer** |
|
||||
|
||||
If you are interested in volunteering please create a pull request against the [prometheus/prometheus](https://github.com/prometheus/prometheus) repository and propose yourself for the release series of your choice.
|
||||
|
||||
|
|
|
@ -307,7 +307,7 @@ func main() {
|
|||
|
||||
var (
|
||||
localStorage = &tsdb.ReadyStorage{}
|
||||
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), localStorage.StartTime, time.Duration(cfg.RemoteFlushDeadline))
|
||||
remoteStorage = remote.NewStorage(log.With(logger, "component", "remote"), prometheus.DefaultRegisterer, localStorage.StartTime, cfg.localStoragePath, time.Duration(cfg.RemoteFlushDeadline))
|
||||
fanoutStorage = storage.NewFanout(logger, localStorage, remoteStorage)
|
||||
)
|
||||
|
||||
|
|
|
@ -107,9 +107,10 @@ var (
|
|||
MinShards: 1,
|
||||
MaxSamplesPerSend: 100,
|
||||
|
||||
// By default, buffer 100 batches, which at 100ms per batch is 10s. At
|
||||
// 1000 shards, this will buffer 10M samples total.
|
||||
Capacity: 100 * 100,
|
||||
// Each shard will have a max of 10 samples pending in it's channel, plus the pending
|
||||
// samples that have been enqueued. Theoretically we should only ever have about 110 samples
|
||||
// per shard pending. At 1000 shards that's 110k.
|
||||
Capacity: 10,
|
||||
BatchSendDeadline: model.Duration(5 * time.Second),
|
||||
|
||||
// Max number of times to retry a batch on recoverable errors.
|
||||
|
|
|
@ -215,7 +215,7 @@ func (d *Discovery) refresh() (tg *targetgroup.Group, err error) {
|
|||
dr := DiscoveryResponse{}
|
||||
err = json.Unmarshal(data, &dr)
|
||||
if err != nil {
|
||||
return tg, fmt.Errorf("an error occurred unmarshaling the disovery response json. %s", err)
|
||||
return tg, fmt.Errorf("an error occurred unmarshaling the discovery response json. %s", err)
|
||||
}
|
||||
|
||||
for _, container := range dr.Containers {
|
||||
|
|
|
@ -81,6 +81,25 @@ func (ls *Labels) UnmarshalJSON(b []byte) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// MatchLabels returns a subset of Labels that matches/does not match with the provided label names based on the 'on' boolean.
|
||||
// If on is set to true, it returns the subset of labels that match with the provided label names and its inverse when 'on' is set to false.
|
||||
func (ls Labels) MatchLabels(on bool, names ...string) Labels {
|
||||
matchedLabels := Labels{}
|
||||
|
||||
nameSet := map[string]struct{}{}
|
||||
for _, n := range names {
|
||||
nameSet[n] = struct{}{}
|
||||
}
|
||||
|
||||
for _, v := range ls {
|
||||
if _, ok := nameSet[v.Name]; on == ok {
|
||||
matchedLabels = append(matchedLabels, v)
|
||||
}
|
||||
}
|
||||
|
||||
return matchedLabels
|
||||
}
|
||||
|
||||
// Hash returns a hash value for the label set.
|
||||
func (ls Labels) Hash() uint64 {
|
||||
b := make([]byte, 0, 1024)
|
||||
|
|
104
pkg/labels/labels_test.go
Normal file
104
pkg/labels/labels_test.go
Normal file
|
@ -0,0 +1,104 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package labels
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestLabels_MatchLabels(t *testing.T) {
|
||||
labels := Labels{
|
||||
{
|
||||
Name: "__name__",
|
||||
Value: "ALERTS",
|
||||
},
|
||||
{
|
||||
Name: "alertname",
|
||||
Value: "HTTPRequestRateLow",
|
||||
},
|
||||
{
|
||||
Name: "alertstate",
|
||||
Value: "pending",
|
||||
},
|
||||
{
|
||||
Name: "instance",
|
||||
Value: "0",
|
||||
},
|
||||
{
|
||||
Name: "job",
|
||||
Value: "app-server",
|
||||
},
|
||||
{
|
||||
Name: "severity",
|
||||
Value: "critical",
|
||||
},
|
||||
}
|
||||
|
||||
providedNames := []string{
|
||||
"__name__",
|
||||
"alertname",
|
||||
"alertstate",
|
||||
"instance",
|
||||
}
|
||||
|
||||
got := labels.MatchLabels(true, providedNames...)
|
||||
expected := Labels{
|
||||
{
|
||||
Name: "__name__",
|
||||
Value: "ALERTS",
|
||||
},
|
||||
{
|
||||
Name: "alertname",
|
||||
Value: "HTTPRequestRateLow",
|
||||
},
|
||||
{
|
||||
Name: "alertstate",
|
||||
Value: "pending",
|
||||
},
|
||||
{
|
||||
Name: "instance",
|
||||
Value: "0",
|
||||
},
|
||||
}
|
||||
|
||||
assertSlice(t, got, expected)
|
||||
|
||||
// Now try with 'on' set to false.
|
||||
got = labels.MatchLabels(false, providedNames...)
|
||||
|
||||
expected = Labels{
|
||||
{
|
||||
Name: "job",
|
||||
Value: "app-server",
|
||||
},
|
||||
{
|
||||
Name: "severity",
|
||||
Value: "critical",
|
||||
},
|
||||
}
|
||||
|
||||
assertSlice(t, got, expected)
|
||||
}
|
||||
|
||||
func assertSlice(t *testing.T, got, expected Labels) {
|
||||
if len(expected) != len(got) {
|
||||
t.Errorf("expected the length of matched label names to be %d, but got %d", len(expected), len(got))
|
||||
}
|
||||
|
||||
for i, expectedLabel := range expected {
|
||||
if expectedLabel.Name != got[i].Name {
|
||||
t.Errorf("expected to get Label with name %s, but got %s instead", expectedLabel.Name, got[i].Name)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1434,9 +1434,16 @@ func (ev *evaluator) VectorBinop(op ItemType, lhs, rhs Vector, matching *VectorM
|
|||
sig := sigf(rs.Metric)
|
||||
// The rhs is guaranteed to be the 'one' side. Having multiple samples
|
||||
// with the same signature means that the matching is many-to-many.
|
||||
if _, found := rightSigs[sig]; found {
|
||||
if duplSample, found := rightSigs[sig]; found {
|
||||
// oneSide represents which side of the vector represents the 'one' in the many-to-one relationship.
|
||||
oneSide := "right"
|
||||
if matching.Card == CardOneToMany {
|
||||
oneSide = "left"
|
||||
}
|
||||
matchedLabels := rs.Metric.MatchLabels(matching.On, matching.MatchingLabels...)
|
||||
// Many-to-many matching not allowed.
|
||||
ev.errorf("many-to-many matching not allowed: matching labels must be unique on one side")
|
||||
ev.errorf("found duplicate series for the match group %s on the %s hand-side of the operation: [%s, %s]"+
|
||||
";many-to-many matching not allowed: matching labels must be unique on one side", matchedLabels.String(), oneSide, rs.Metric.String(), duplSample.Metric.String())
|
||||
}
|
||||
rightSigs[sig] = rs
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
package scrape
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sync"
|
||||
"time"
|
||||
|
@ -104,18 +105,18 @@ func (m *Manager) reload() {
|
|||
m.mtxScrape.Lock()
|
||||
var wg sync.WaitGroup
|
||||
for setName, groups := range m.targetSets {
|
||||
var sp *scrapePool
|
||||
existing, ok := m.scrapePools[setName]
|
||||
if !ok {
|
||||
if _, ok := m.scrapePools[setName]; !ok {
|
||||
scrapeConfig, ok := m.scrapeConfigs[setName]
|
||||
if !ok {
|
||||
level.Error(m.logger).Log("msg", "error reloading target set", "err", "invalid config id:"+setName)
|
||||
continue
|
||||
}
|
||||
sp = newScrapePool(scrapeConfig, m.append, log.With(m.logger, "scrape_pool", setName))
|
||||
sp, err := newScrapePool(scrapeConfig, m.append, log.With(m.logger, "scrape_pool", setName))
|
||||
if err != nil {
|
||||
level.Error(m.logger).Log("msg", "error creating new scrape pool", "err", err, "scrape_pool", setName)
|
||||
continue
|
||||
}
|
||||
m.scrapePools[setName] = sp
|
||||
} else {
|
||||
sp = existing
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
|
@ -123,7 +124,7 @@ func (m *Manager) reload() {
|
|||
go func(sp *scrapePool, groups []*targetgroup.Group) {
|
||||
sp.Sync(groups)
|
||||
wg.Done()
|
||||
}(sp, groups)
|
||||
}(m.scrapePools[setName], groups)
|
||||
|
||||
}
|
||||
m.mtxScrape.Unlock()
|
||||
|
@ -158,16 +159,24 @@ func (m *Manager) ApplyConfig(cfg *config.Config) error {
|
|||
}
|
||||
m.scrapeConfigs = c
|
||||
|
||||
// Cleanup and reload pool if config has changed.
|
||||
// Cleanup and reload pool if the configuration has changed.
|
||||
var failed bool
|
||||
for name, sp := range m.scrapePools {
|
||||
if cfg, ok := m.scrapeConfigs[name]; !ok {
|
||||
sp.stop()
|
||||
delete(m.scrapePools, name)
|
||||
} else if !reflect.DeepEqual(sp.config, cfg) {
|
||||
sp.reload(cfg)
|
||||
err := sp.reload(cfg)
|
||||
if err != nil {
|
||||
level.Error(m.logger).Log("msg", "error reloading scrape pool", "err", err, "scrape_pool", name)
|
||||
failed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if failed {
|
||||
return fmt.Errorf("failed to apply the new configuration")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -222,47 +222,115 @@ func TestPopulateLabels(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestScrapeManagerReloadNoChange tests that no scrape reload happens when there is no config change.
|
||||
func TestManagerReloadNoChange(t *testing.T) {
|
||||
tsetName := "test"
|
||||
func loadConfiguration(t *testing.T, c string) *config.Config {
|
||||
t.Helper()
|
||||
|
||||
cfgText := `
|
||||
cfg := &config.Config{}
|
||||
if err := yaml.UnmarshalStrict([]byte(c), cfg); err != nil {
|
||||
t.Fatalf("Unable to load YAML config: %s", err)
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
func noopLoop() loop {
|
||||
return &testLoop{
|
||||
startFunc: func(interval, timeout time.Duration, errc chan<- error) {},
|
||||
stopFunc: func() {},
|
||||
}
|
||||
}
|
||||
|
||||
func TestManagerApplyConfig(t *testing.T) {
|
||||
// Valid initial configuration.
|
||||
cfgText1 := `
|
||||
scrape_configs:
|
||||
- job_name: '` + tsetName + `'
|
||||
- job_name: job1
|
||||
static_configs:
|
||||
- targets: ["foo:9090"]
|
||||
- targets: ["bar:9090"]
|
||||
`
|
||||
cfg := &config.Config{}
|
||||
if err := yaml.UnmarshalStrict([]byte(cfgText), cfg); err != nil {
|
||||
t.Fatalf("Unable to load YAML config cfgYaml: %s", err)
|
||||
}
|
||||
// Invalid configuration.
|
||||
cfgText2 := `
|
||||
scrape_configs:
|
||||
- job_name: job1
|
||||
scheme: https
|
||||
static_configs:
|
||||
- targets: ["foo:9090"]
|
||||
tls_config:
|
||||
ca_file: /not/existing/ca/file
|
||||
`
|
||||
// Valid configuration.
|
||||
cfgText3 := `
|
||||
scrape_configs:
|
||||
- job_name: job1
|
||||
scheme: https
|
||||
static_configs:
|
||||
- targets: ["foo:9090"]
|
||||
`
|
||||
var (
|
||||
cfg1 = loadConfiguration(t, cfgText1)
|
||||
cfg2 = loadConfiguration(t, cfgText2)
|
||||
cfg3 = loadConfiguration(t, cfgText3)
|
||||
|
||||
ch = make(chan struct{}, 1)
|
||||
)
|
||||
|
||||
scrapeManager := NewManager(nil, nil)
|
||||
// Load the current config.
|
||||
scrapeManager.ApplyConfig(cfg)
|
||||
|
||||
// As reload never happens, new loop should never be called.
|
||||
newLoop := func(_ *Target, s scraper, _ int, _ bool, _ []*relabel.Config) loop {
|
||||
t.Fatal("reload happened")
|
||||
return nil
|
||||
ch <- struct{}{}
|
||||
return noopLoop()
|
||||
}
|
||||
|
||||
sp := &scrapePool{
|
||||
appendable: &nopAppendable{},
|
||||
activeTargets: map[uint64]*Target{},
|
||||
loops: map[uint64]loop{
|
||||
1: &testLoop{},
|
||||
1: noopLoop(),
|
||||
},
|
||||
newLoop: newLoop,
|
||||
logger: nil,
|
||||
config: cfg.ScrapeConfigs[0],
|
||||
config: cfg1.ScrapeConfigs[0],
|
||||
}
|
||||
scrapeManager.scrapePools = map[string]*scrapePool{
|
||||
tsetName: sp,
|
||||
"job1": sp,
|
||||
}
|
||||
|
||||
scrapeManager.ApplyConfig(cfg)
|
||||
// Apply the initial configuration.
|
||||
if err := scrapeManager.ApplyConfig(cfg1); err != nil {
|
||||
t.Fatalf("unable to apply configuration: %s", err)
|
||||
}
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("reload happened")
|
||||
default:
|
||||
}
|
||||
|
||||
// Apply a configuration for which the reload fails.
|
||||
if err := scrapeManager.ApplyConfig(cfg2); err == nil {
|
||||
t.Fatalf("expecting error but got none")
|
||||
}
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("reload happened")
|
||||
default:
|
||||
}
|
||||
|
||||
// Apply a configuration for which the reload succeeds.
|
||||
if err := scrapeManager.ApplyConfig(cfg3); err != nil {
|
||||
t.Fatalf("unable to apply configuration: %s", err)
|
||||
}
|
||||
select {
|
||||
case <-ch:
|
||||
default:
|
||||
t.Fatal("reload didn't happen")
|
||||
}
|
||||
|
||||
// Re-applying the same configuration shouldn't trigger a reload.
|
||||
if err := scrapeManager.ApplyConfig(cfg3); err != nil {
|
||||
t.Fatalf("unable to apply configuration: %s", err)
|
||||
}
|
||||
select {
|
||||
case <-ch:
|
||||
t.Fatal("reload happened")
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
func TestManagerTargetsUpdates(t *testing.T) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
|
@ -61,6 +62,30 @@ var (
|
|||
},
|
||||
[]string{"interval"},
|
||||
)
|
||||
targetScrapePools = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pools_total",
|
||||
Help: "Total number of scrape pool creation atttempts.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolsFailed = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pools_failed_total",
|
||||
Help: "Total number of scrape pool creations that failed.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolReloads = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_reloads_total",
|
||||
Help: "Total number of scrape loop reloads.",
|
||||
},
|
||||
)
|
||||
targetScrapePoolReloadsFailed = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Name: "prometheus_target_scrape_pool_reloads_failed_total",
|
||||
Help: "Total number of failed scrape loop reloads.",
|
||||
},
|
||||
)
|
||||
targetSyncIntervalLength = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Name: "prometheus_target_sync_length_seconds",
|
||||
|
@ -105,6 +130,10 @@ var (
|
|||
func init() {
|
||||
prometheus.MustRegister(targetIntervalLength)
|
||||
prometheus.MustRegister(targetReloadIntervalLength)
|
||||
prometheus.MustRegister(targetScrapePools)
|
||||
prometheus.MustRegister(targetScrapePoolsFailed)
|
||||
prometheus.MustRegister(targetScrapePoolReloads)
|
||||
prometheus.MustRegister(targetScrapePoolReloadsFailed)
|
||||
prometheus.MustRegister(targetSyncIntervalLength)
|
||||
prometheus.MustRegister(targetScrapePoolSyncsCounter)
|
||||
prometheus.MustRegister(targetScrapeSampleLimit)
|
||||
|
@ -136,15 +165,16 @@ const maxAheadTime = 10 * time.Minute
|
|||
|
||||
type labelsMutator func(labels.Labels) labels.Labels
|
||||
|
||||
func newScrapePool(cfg *config.ScrapeConfig, app Appendable, logger log.Logger) *scrapePool {
|
||||
func newScrapePool(cfg *config.ScrapeConfig, app Appendable, logger log.Logger) (*scrapePool, error) {
|
||||
targetScrapePools.Inc()
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
|
||||
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName)
|
||||
if err != nil {
|
||||
// Any errors that could occur here should be caught during config validation.
|
||||
level.Error(logger).Log("msg", "Error creating HTTP client", "err", err)
|
||||
targetScrapePoolsFailed.Inc()
|
||||
return nil, errors.Wrap(err, "error creating HTTP client")
|
||||
}
|
||||
|
||||
buffers := pool.New(1e3, 100e6, 3, func(sz int) interface{} { return make([]byte, 0, sz) })
|
||||
|
@ -182,7 +212,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app Appendable, logger log.Logger)
|
|||
)
|
||||
}
|
||||
|
||||
return sp
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
func (sp *scrapePool) ActiveTargets() []*Target {
|
||||
|
@ -227,7 +257,8 @@ func (sp *scrapePool) stop() {
|
|||
// reload the scrape pool with the given scrape configuration. The target state is preserved
|
||||
// but all scrape loops are restarted with the new scrape configuration.
|
||||
// This method returns after all scrape loops that were stopped have stopped scraping.
|
||||
func (sp *scrapePool) reload(cfg *config.ScrapeConfig) {
|
||||
func (sp *scrapePool) reload(cfg *config.ScrapeConfig) error {
|
||||
targetScrapePoolReloads.Inc()
|
||||
start := time.Now()
|
||||
|
||||
sp.mtx.Lock()
|
||||
|
@ -235,8 +266,8 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) {
|
|||
|
||||
client, err := config_util.NewClientFromConfig(cfg.HTTPClientConfig, cfg.JobName)
|
||||
if err != nil {
|
||||
// Any errors that could occur here should be caught during config validation.
|
||||
level.Error(sp.logger).Log("msg", "Error creating HTTP client", "err", err)
|
||||
targetScrapePoolReloadsFailed.Inc()
|
||||
return errors.Wrap(err, "error creating HTTP client")
|
||||
}
|
||||
sp.config = cfg
|
||||
sp.client = client
|
||||
|
@ -272,6 +303,7 @@ func (sp *scrapePool) reload(cfg *config.ScrapeConfig) {
|
|||
targetReloadIntervalLength.WithLabelValues(interval.String()).Observe(
|
||||
time.Since(start).Seconds(),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sync converts target groups into actual scrape targets and synchronizes
|
||||
|
|
|
@ -29,16 +29,14 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/relabel"
|
||||
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/discovery/targetgroup"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/relabel"
|
||||
"github.com/prometheus/prometheus/pkg/textparse"
|
||||
"github.com/prometheus/prometheus/pkg/timestamp"
|
||||
"github.com/prometheus/prometheus/pkg/value"
|
||||
|
@ -48,9 +46,9 @@ import (
|
|||
|
||||
func TestNewScrapePool(t *testing.T) {
|
||||
var (
|
||||
app = &nopAppendable{}
|
||||
cfg = &config.ScrapeConfig{}
|
||||
sp = newScrapePool(cfg, app, nil)
|
||||
app = &nopAppendable{}
|
||||
cfg = &config.ScrapeConfig{}
|
||||
sp, _ = newScrapePool(cfg, app, nil)
|
||||
)
|
||||
|
||||
if a, ok := sp.appendable.(*nopAppendable); !ok || a != app {
|
||||
|
@ -85,7 +83,7 @@ func TestDroppedTargetsList(t *testing.T) {
|
|||
},
|
||||
},
|
||||
}
|
||||
sp = newScrapePool(cfg, app, nil)
|
||||
sp, _ = newScrapePool(cfg, app, nil)
|
||||
expectedLabelSetString = "{__address__=\"127.0.0.1:9090\", __metrics_path__=\"\", __scheme__=\"\", job=\"dropMe\"}"
|
||||
expectedLength = 1
|
||||
)
|
||||
|
@ -307,7 +305,7 @@ func TestScrapePoolReload(t *testing.T) {
|
|||
func TestScrapePoolAppender(t *testing.T) {
|
||||
cfg := &config.ScrapeConfig{}
|
||||
app := &nopAppendable{}
|
||||
sp := newScrapePool(cfg, app, nil)
|
||||
sp, _ := newScrapePool(cfg, app, nil)
|
||||
|
||||
loop := sp.newLoop(&Target{}, nil, 0, false, nil)
|
||||
appl, ok := loop.(*scrapeLoop)
|
||||
|
@ -350,7 +348,7 @@ func TestScrapePoolRaces(t *testing.T) {
|
|||
newConfig := func() *config.ScrapeConfig {
|
||||
return &config.ScrapeConfig{ScrapeInterval: interval, ScrapeTimeout: timeout}
|
||||
}
|
||||
sp := newScrapePool(newConfig(), &nopAppendable{}, nil)
|
||||
sp, _ := newScrapePool(newConfig(), &nopAppendable{}, nil)
|
||||
tgts := []*targetgroup.Group{
|
||||
{
|
||||
Targets: []model.LabelSet{
|
||||
|
@ -880,7 +878,7 @@ func TestScrapeLoopAppendSampleLimit(t *testing.T) {
|
|||
t.Fatalf("Did not see expected sample limit error: %s", err)
|
||||
}
|
||||
|
||||
// Check that the Counter has been incremented a simgle time for the scrape,
|
||||
// Check that the Counter has been incremented a single time for the scrape,
|
||||
// not multiple times for each sample.
|
||||
metric := dto.Metric{}
|
||||
err = targetScrapeSampleLimit.Write(&metric)
|
||||
|
|
|
@ -70,15 +70,10 @@ type recoverableError struct {
|
|||
error
|
||||
}
|
||||
|
||||
// Store sends a batch of samples to the HTTP endpoint.
|
||||
func (c *Client) Store(ctx context.Context, req *prompb.WriteRequest) error {
|
||||
data, err := proto.Marshal(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
compressed := snappy.Encode(nil, data)
|
||||
httpReq, err := http.NewRequest("POST", c.url.String(), bytes.NewReader(compressed))
|
||||
// Store sends a batch of samples to the HTTP endpoint, the request is the proto marshalled
|
||||
// and encoded bytes from codec.go.
|
||||
func (c *Client) Store(ctx context.Context, req []byte) error {
|
||||
httpReq, err := http.NewRequest("POST", c.url.String(), bytes.NewReader(req))
|
||||
if err != nil {
|
||||
// Errors from NewRequest are from unparseable URLs, so are not
|
||||
// recoverable.
|
||||
|
|
|
@ -26,7 +26,6 @@ import (
|
|||
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
)
|
||||
|
||||
var longErrMessage = strings.Repeat("error message", maxErrMsgLen)
|
||||
|
@ -74,7 +73,7 @@ func TestStoreHTTPErrorHandling(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = c.Store(context.Background(), &prompb.WriteRequest{})
|
||||
err = c.Store(context.Background(), []byte{})
|
||||
if !reflect.DeepEqual(err, test.err) {
|
||||
t.Errorf("%d. Unexpected error; want %v, got %v", i, test.err, err)
|
||||
}
|
||||
|
|
|
@ -80,28 +80,6 @@ func EncodeReadResponse(resp *prompb.ReadResponse, w http.ResponseWriter) error
|
|||
return err
|
||||
}
|
||||
|
||||
// ToWriteRequest converts an array of samples into a WriteRequest proto.
|
||||
func ToWriteRequest(samples []*model.Sample) *prompb.WriteRequest {
|
||||
req := &prompb.WriteRequest{
|
||||
Timeseries: make([]prompb.TimeSeries, 0, len(samples)),
|
||||
}
|
||||
|
||||
for _, s := range samples {
|
||||
ts := prompb.TimeSeries{
|
||||
Labels: MetricToLabelProtos(s.Metric),
|
||||
Samples: []prompb.Sample{
|
||||
{
|
||||
Value: float64(s.Value),
|
||||
Timestamp: int64(s.Timestamp),
|
||||
},
|
||||
},
|
||||
}
|
||||
req.Timeseries = append(req.Timeseries, ts)
|
||||
}
|
||||
|
||||
return req
|
||||
}
|
||||
|
||||
// ToQuery builds a Query proto.
|
||||
func ToQuery(from, to int64, matchers []*labels.Matcher, p *storage.SelectParams) (*prompb.Query, error) {
|
||||
ms, err := toLabelMatchers(matchers)
|
||||
|
@ -364,21 +342,6 @@ func fromLabelMatchers(matchers []*prompb.LabelMatcher) ([]*labels.Matcher, erro
|
|||
return result, nil
|
||||
}
|
||||
|
||||
// MetricToLabelProtos builds a []*prompb.Label from a model.Metric
|
||||
func MetricToLabelProtos(metric model.Metric) []prompb.Label {
|
||||
labels := make([]prompb.Label, 0, len(metric))
|
||||
for k, v := range metric {
|
||||
labels = append(labels, prompb.Label{
|
||||
Name: string(k),
|
||||
Value: string(v),
|
||||
})
|
||||
}
|
||||
sort.Slice(labels, func(i int, j int) bool {
|
||||
return labels[i].Name < labels[j].Name
|
||||
})
|
||||
return labels
|
||||
}
|
||||
|
||||
// LabelProtosToMetric unpack a []*prompb.Label to a model.Metric
|
||||
func LabelProtosToMetric(labelPairs []*prompb.Label) model.Metric {
|
||||
metric := make(model.Metric, len(labelPairs))
|
||||
|
@ -400,6 +363,26 @@ func labelProtosToLabels(labelPairs []prompb.Label) labels.Labels {
|
|||
return result
|
||||
}
|
||||
|
||||
func labelsetToLabelsProto(ls model.LabelSet) []prompb.Label {
|
||||
result := make([]prompb.Label, 0, len(ls))
|
||||
keys := make([]string, 0, len(ls))
|
||||
|
||||
for k := range ls {
|
||||
keys = append(keys, string(k))
|
||||
}
|
||||
sort.Strings(keys)
|
||||
|
||||
for _, k := range keys {
|
||||
ln := model.LabelName(k)
|
||||
result = append(result, prompb.Label{
|
||||
Name: k,
|
||||
Value: string(ls[ln]),
|
||||
})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func labelsToLabelsProto(labels labels.Labels) []prompb.Label {
|
||||
result := make([]prompb.Label, 0, len(labels))
|
||||
for _, l := range labels {
|
||||
|
@ -410,11 +393,3 @@ func labelsToLabelsProto(labels labels.Labels) []prompb.Label {
|
|||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func labelsToMetric(ls labels.Labels) model.Metric {
|
||||
metric := make(model.Metric, len(ls))
|
||||
for _, l := range ls {
|
||||
metric[model.LabelName(l.Name)] = model.LabelValue(l.Value)
|
||||
}
|
||||
return metric
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ package remote
|
|||
import (
|
||||
"context"
|
||||
"math"
|
||||
"strconv"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
@ -24,12 +25,16 @@ import (
|
|||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
pkgrelabel "github.com/prometheus/prometheus/pkg/relabel"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/relabel"
|
||||
"github.com/prometheus/tsdb"
|
||||
)
|
||||
|
||||
// String constants for instrumentation.
|
||||
|
@ -66,7 +71,16 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "failed_samples_total",
|
||||
Help: "Total number of samples which failed on send to remote storage.",
|
||||
Help: "Total number of samples which failed on send to remote storage, non-recoverable errors.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
retriedSamplesTotal = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "retried_samples_total",
|
||||
Help: "Total number of samples which failed on send to remote storage but were retried because the send error was recoverable.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
|
@ -75,7 +89,16 @@ var (
|
|||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "dropped_samples_total",
|
||||
Help: "Total number of samples which were dropped due to the queue being full.",
|
||||
Help: "Total number of samples which were dropped after being read from the WAL before being sent via remote write.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
enqueueRetriesTotal = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "enqueue_retries_total",
|
||||
Help: "Total number of times enqueue has failed because a shards queue was full.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
|
@ -89,12 +112,30 @@ var (
|
|||
},
|
||||
[]string{queue},
|
||||
)
|
||||
queueLength = prometheus.NewGaugeVec(
|
||||
queueLastSendTimestamp = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "queue_length",
|
||||
Help: "The number of processed samples queued to be sent to the remote storage.",
|
||||
Name: "queue_last_send_timestamp",
|
||||
Help: "Timestamp of the last successful send by this queue.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
queueHighestSentTimestamp = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "queue_highest_sent_timestamp",
|
||||
Help: "Timestamp from a WAL sample, the highest timestamp successfully sent by this queue.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
queuePendingSamples = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: subsystem,
|
||||
Name: "pending_samples",
|
||||
Help: "The number of samples pending in the queues shards to be sent to the remote storage.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
|
@ -121,9 +162,13 @@ var (
|
|||
func init() {
|
||||
prometheus.MustRegister(succeededSamplesTotal)
|
||||
prometheus.MustRegister(failedSamplesTotal)
|
||||
prometheus.MustRegister(retriedSamplesTotal)
|
||||
prometheus.MustRegister(droppedSamplesTotal)
|
||||
prometheus.MustRegister(enqueueRetriesTotal)
|
||||
prometheus.MustRegister(sentBatchDuration)
|
||||
prometheus.MustRegister(queueLength)
|
||||
prometheus.MustRegister(queueLastSendTimestamp)
|
||||
prometheus.MustRegister(queueHighestSentTimestamp)
|
||||
prometheus.MustRegister(queuePendingSamples)
|
||||
prometheus.MustRegister(shardCapacity)
|
||||
prometheus.MustRegister(numShards)
|
||||
}
|
||||
|
@ -132,25 +177,41 @@ func init() {
|
|||
// external timeseries database.
|
||||
type StorageClient interface {
|
||||
// Store stores the given samples in the remote storage.
|
||||
Store(context.Context, *prompb.WriteRequest) error
|
||||
Store(context.Context, []byte) error
|
||||
// Name identifies the remote storage implementation.
|
||||
Name() string
|
||||
}
|
||||
|
||||
// QueueManager manages a queue of samples to be sent to the Storage
|
||||
// indicated by the provided StorageClient.
|
||||
// indicated by the provided StorageClient. Implements writeTo interface
|
||||
// used by WAL Watcher.
|
||||
type QueueManager struct {
|
||||
logger log.Logger
|
||||
|
||||
flushDeadline time.Duration
|
||||
cfg config.QueueConfig
|
||||
externalLabels model.LabelSet
|
||||
relabelConfigs []*pkgrelabel.Config
|
||||
client StorageClient
|
||||
queueName string
|
||||
logLimiter *rate.Limiter
|
||||
flushDeadline time.Duration
|
||||
cfg config.QueueConfig
|
||||
externalLabels model.LabelSet
|
||||
relabelConfigs []*pkgrelabel.Config
|
||||
client StorageClient
|
||||
queueName string
|
||||
logLimiter *rate.Limiter
|
||||
watcher *WALWatcher
|
||||
lastSendTimestampMetric prometheus.Gauge
|
||||
highestSentTimestampMetric prometheus.Gauge
|
||||
pendingSamplesMetric prometheus.Gauge
|
||||
enqueueRetriesMetric prometheus.Counter
|
||||
|
||||
lastSendTimestamp int64
|
||||
highestSentTimestamp int64
|
||||
timestampLock sync.Mutex
|
||||
|
||||
highestTimestampIn *int64 // highest timestamp of any sample ingested by remote storage via scrape (Appender)
|
||||
|
||||
seriesMtx sync.Mutex
|
||||
seriesLabels map[uint64][]prompb.Label
|
||||
seriesSegmentIndexes map[uint64]int
|
||||
droppedSeries map[uint64]struct{}
|
||||
|
||||
shardsMtx sync.RWMutex
|
||||
shards *shards
|
||||
numShards int
|
||||
reshardChan chan int
|
||||
|
@ -162,7 +223,7 @@ type QueueManager struct {
|
|||
}
|
||||
|
||||
// NewQueueManager builds a new QueueManager.
|
||||
func NewQueueManager(logger log.Logger, cfg config.QueueConfig, externalLabels model.LabelSet, relabelConfigs []*pkgrelabel.Config, client StorageClient, flushDeadline time.Duration) *QueueManager {
|
||||
func NewQueueManager(logger log.Logger, walDir string, samplesIn *ewmaRate, highestTimestampIn *int64, cfg config.QueueConfig, externalLabels model.LabelSet, relabelConfigs []*pkgrelabel.Config, client StorageClient, flushDeadline time.Duration, startTime int64) *QueueManager {
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
} else {
|
||||
|
@ -177,16 +238,29 @@ func NewQueueManager(logger log.Logger, cfg config.QueueConfig, externalLabels m
|
|||
client: client,
|
||||
queueName: client.Name(),
|
||||
|
||||
highestTimestampIn: highestTimestampIn,
|
||||
|
||||
seriesLabels: make(map[uint64][]prompb.Label),
|
||||
seriesSegmentIndexes: make(map[uint64]int),
|
||||
droppedSeries: make(map[uint64]struct{}),
|
||||
|
||||
logLimiter: rate.NewLimiter(logRateLimit, logBurst),
|
||||
numShards: cfg.MinShards,
|
||||
reshardChan: make(chan int),
|
||||
quit: make(chan struct{}),
|
||||
|
||||
samplesIn: newEWMARate(ewmaWeight, shardUpdateDuration),
|
||||
samplesIn: samplesIn,
|
||||
samplesOut: newEWMARate(ewmaWeight, shardUpdateDuration),
|
||||
samplesOutDuration: newEWMARate(ewmaWeight, shardUpdateDuration),
|
||||
}
|
||||
t.shards = t.newShards(t.numShards)
|
||||
|
||||
t.lastSendTimestampMetric = queueLastSendTimestamp.WithLabelValues(t.queueName)
|
||||
t.highestSentTimestampMetric = queueHighestSentTimestamp.WithLabelValues(t.queueName)
|
||||
t.pendingSamplesMetric = queuePendingSamples.WithLabelValues(t.queueName)
|
||||
t.enqueueRetriesMetric = enqueueRetriesTotal.WithLabelValues(t.queueName)
|
||||
t.watcher = NewWALWatcher(logger, client.Name(), t, walDir, startTime)
|
||||
t.shards = t.newShards()
|
||||
|
||||
numShards.WithLabelValues(t.queueName).Set(float64(t.numShards))
|
||||
shardCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.Capacity))
|
||||
|
||||
|
@ -195,76 +269,144 @@ func NewQueueManager(logger log.Logger, cfg config.QueueConfig, externalLabels m
|
|||
succeededSamplesTotal.WithLabelValues(t.queueName)
|
||||
failedSamplesTotal.WithLabelValues(t.queueName)
|
||||
droppedSamplesTotal.WithLabelValues(t.queueName)
|
||||
retriedSamplesTotal.WithLabelValues(t.queueName)
|
||||
// Reset pending samples metric to 0.
|
||||
t.pendingSamplesMetric.Set(0)
|
||||
|
||||
return t
|
||||
}
|
||||
|
||||
// Append queues a sample to be sent to the remote storage. It drops the
|
||||
// sample on the floor if the queue is full.
|
||||
// Always returns nil.
|
||||
func (t *QueueManager) Append(s *model.Sample) error {
|
||||
snew := *s
|
||||
snew.Metric = s.Metric.Clone()
|
||||
// Append queues a sample to be sent to the remote storage. Blocks until all samples are
|
||||
// enqueued on their shards or a shutdown signal is received.
|
||||
func (t *QueueManager) Append(s []tsdb.RefSample) bool {
|
||||
type enqueuable struct {
|
||||
ts prompb.TimeSeries
|
||||
ref uint64
|
||||
}
|
||||
|
||||
for ln, lv := range t.externalLabels {
|
||||
if _, ok := s.Metric[ln]; !ok {
|
||||
snew.Metric[ln] = lv
|
||||
tempSamples := make([]enqueuable, 0, len(s))
|
||||
t.seriesMtx.Lock()
|
||||
for _, sample := range s {
|
||||
// If we have no labels for the series, due to relabelling or otherwise, don't send the sample.
|
||||
if _, ok := t.seriesLabels[sample.Ref]; !ok {
|
||||
droppedSamplesTotal.WithLabelValues(t.queueName).Inc()
|
||||
if _, ok := t.droppedSeries[sample.Ref]; !ok && t.logLimiter.Allow() {
|
||||
level.Info(t.logger).Log("msg", "dropped sample for series that was not explicitly dropped via relabelling", "ref", sample.Ref)
|
||||
}
|
||||
continue
|
||||
}
|
||||
tempSamples = append(tempSamples, enqueuable{
|
||||
ts: prompb.TimeSeries{
|
||||
Labels: t.seriesLabels[sample.Ref],
|
||||
Samples: []prompb.Sample{
|
||||
prompb.Sample{
|
||||
Value: float64(sample.V),
|
||||
Timestamp: sample.T,
|
||||
},
|
||||
},
|
||||
},
|
||||
ref: sample.Ref,
|
||||
})
|
||||
}
|
||||
t.seriesMtx.Unlock()
|
||||
|
||||
outer:
|
||||
for _, sample := range tempSamples {
|
||||
// This will only loop if the queues are being resharded.
|
||||
backoff := t.cfg.MinBackoff
|
||||
for {
|
||||
select {
|
||||
case <-t.quit:
|
||||
return false
|
||||
default:
|
||||
}
|
||||
|
||||
if t.shards.enqueue(sample.ref, sample.ts) {
|
||||
continue outer
|
||||
}
|
||||
|
||||
t.enqueueRetriesMetric.Inc()
|
||||
time.Sleep(time.Duration(backoff))
|
||||
backoff = backoff * 2
|
||||
if backoff > t.cfg.MaxBackoff {
|
||||
backoff = t.cfg.MaxBackoff
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
snew.Metric = model.Metric(
|
||||
relabel.Process(model.LabelSet(snew.Metric), t.relabelConfigs...))
|
||||
|
||||
if snew.Metric == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
t.shardsMtx.RLock()
|
||||
enqueued := t.shards.enqueue(&snew)
|
||||
t.shardsMtx.RUnlock()
|
||||
|
||||
if enqueued {
|
||||
queueLength.WithLabelValues(t.queueName).Inc()
|
||||
} else {
|
||||
droppedSamplesTotal.WithLabelValues(t.queueName).Inc()
|
||||
if t.logLimiter.Allow() {
|
||||
level.Warn(t.logger).Log("msg", "Remote storage queue full, discarding sample. Multiple subsequent messages of this kind may be suppressed.")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// NeedsThrottling implements storage.SampleAppender. It will always return
|
||||
// false as a remote storage drops samples on the floor if backlogging instead
|
||||
// of asking for throttling.
|
||||
func (*QueueManager) NeedsThrottling() bool {
|
||||
return false
|
||||
return true
|
||||
}
|
||||
|
||||
// Start the queue manager sending samples to the remote storage.
|
||||
// Does not block.
|
||||
func (t *QueueManager) Start() {
|
||||
t.shards.start(t.numShards)
|
||||
t.watcher.Start()
|
||||
|
||||
t.wg.Add(2)
|
||||
go t.updateShardsLoop()
|
||||
go t.reshardLoop()
|
||||
|
||||
t.shardsMtx.Lock()
|
||||
defer t.shardsMtx.Unlock()
|
||||
t.shards.start()
|
||||
}
|
||||
|
||||
// Stop stops sending samples to the remote storage and waits for pending
|
||||
// sends to complete.
|
||||
func (t *QueueManager) Stop() {
|
||||
level.Info(t.logger).Log("msg", "Stopping remote storage...")
|
||||
defer level.Info(t.logger).Log("msg", "Remote storage stopped.")
|
||||
|
||||
close(t.quit)
|
||||
t.shards.stop()
|
||||
t.watcher.Stop()
|
||||
t.wg.Wait()
|
||||
}
|
||||
|
||||
t.shardsMtx.Lock()
|
||||
defer t.shardsMtx.Unlock()
|
||||
t.shards.stop(t.flushDeadline)
|
||||
// StoreSeries keeps track of which series we know about for lookups when sending samples to remote.
|
||||
func (t *QueueManager) StoreSeries(series []tsdb.RefSeries, index int) {
|
||||
temp := make(map[uint64][]prompb.Label, len(series))
|
||||
for _, s := range series {
|
||||
ls := make(model.LabelSet, len(s.Labels))
|
||||
for _, label := range s.Labels {
|
||||
ls[model.LabelName(label.Name)] = model.LabelValue(label.Value)
|
||||
}
|
||||
t.processExternalLabels(ls)
|
||||
rl := relabel.Process(ls, t.relabelConfigs...)
|
||||
if len(rl) == 0 {
|
||||
t.droppedSeries[s.Ref] = struct{}{}
|
||||
continue
|
||||
}
|
||||
temp[s.Ref] = labelsetToLabelsProto(rl)
|
||||
}
|
||||
|
||||
level.Info(t.logger).Log("msg", "Remote storage stopped.")
|
||||
t.seriesMtx.Lock()
|
||||
defer t.seriesMtx.Unlock()
|
||||
for ref, labels := range temp {
|
||||
t.seriesLabels[ref] = labels
|
||||
t.seriesSegmentIndexes[ref] = index
|
||||
}
|
||||
}
|
||||
|
||||
// SeriesReset is used when reading a checkpoint. WAL Watcher should have
|
||||
// stored series records with the checkpoints index number, so we can now
|
||||
// delete any ref ID's lower than that # from the two maps.
|
||||
func (t *QueueManager) SeriesReset(index int) {
|
||||
t.seriesMtx.Lock()
|
||||
defer t.seriesMtx.Unlock()
|
||||
|
||||
// Check for series that are in segments older than the checkpoint
|
||||
// that were not also present in the checkpoint.
|
||||
for k, v := range t.seriesSegmentIndexes {
|
||||
if v < index {
|
||||
delete(t.seriesLabels, k)
|
||||
delete(t.seriesSegmentIndexes, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QueueManager) processExternalLabels(ls model.LabelSet) {
|
||||
for ln, lv := range t.externalLabels {
|
||||
if _, ok := ls[ln]; !ok {
|
||||
ls[ln] = lv
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QueueManager) updateShardsLoop() {
|
||||
|
@ -275,6 +417,12 @@ func (t *QueueManager) updateShardsLoop() {
|
|||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
now := time.Now().Unix()
|
||||
threshold := int64(time.Duration(2 * t.cfg.BatchSendDeadline).Seconds())
|
||||
if now-t.lastSendTimestamp > threshold {
|
||||
level.Debug(t.logger).Log("msg", "Skipping resharding, last successful send was beyond threshold")
|
||||
continue
|
||||
}
|
||||
t.calculateDesiredShards()
|
||||
case <-t.quit:
|
||||
return
|
||||
|
@ -351,107 +499,150 @@ func (t *QueueManager) reshardLoop() {
|
|||
for {
|
||||
select {
|
||||
case numShards := <-t.reshardChan:
|
||||
t.reshard(numShards)
|
||||
// We start the newShards after we have stopped (the therefore completely
|
||||
// flushed) the oldShards, to guarantee we only every deliver samples in
|
||||
// order.
|
||||
t.shards.stop()
|
||||
t.shards.start(numShards)
|
||||
case <-t.quit:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (t *QueueManager) reshard(n int) {
|
||||
numShards.WithLabelValues(t.queueName).Set(float64(n))
|
||||
|
||||
t.shardsMtx.Lock()
|
||||
newShards := t.newShards(n)
|
||||
oldShards := t.shards
|
||||
t.shards = newShards
|
||||
t.shardsMtx.Unlock()
|
||||
|
||||
oldShards.stop(t.flushDeadline)
|
||||
|
||||
// We start the newShards after we have stopped (the therefore completely
|
||||
// flushed) the oldShards, to guarantee we only every deliver samples in
|
||||
// order.
|
||||
newShards.start()
|
||||
}
|
||||
|
||||
type shards struct {
|
||||
qm *QueueManager
|
||||
queues []chan *model.Sample
|
||||
done chan struct{}
|
||||
running int32
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
func (t *QueueManager) newShards(numShards int) *shards {
|
||||
queues := make([]chan *model.Sample, numShards)
|
||||
for i := 0; i < numShards; i++ {
|
||||
queues[i] = make(chan *model.Sample, t.cfg.Capacity)
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
func (t *QueueManager) newShards() *shards {
|
||||
s := &shards{
|
||||
qm: t,
|
||||
queues: queues,
|
||||
done: make(chan struct{}),
|
||||
running: int32(numShards),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
qm: t,
|
||||
done: make(chan struct{}),
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *shards) start() {
|
||||
for i := 0; i < len(s.queues); i++ {
|
||||
go s.runShard(i)
|
||||
// Check and set highestSentTimestamp
|
||||
func (t *QueueManager) setHighestSentTimestamp(highest int64) {
|
||||
t.timestampLock.Lock()
|
||||
defer t.timestampLock.Unlock()
|
||||
if highest > t.highestSentTimestamp {
|
||||
t.highestSentTimestamp = highest
|
||||
t.highestSentTimestampMetric.Set(float64(t.highestSentTimestamp))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *shards) stop(deadline time.Duration) {
|
||||
// Attempt a clean shutdown.
|
||||
for _, shard := range s.queues {
|
||||
close(shard)
|
||||
func (t *QueueManager) setLastSendTimestamp(now time.Time) {
|
||||
t.timestampLock.Lock()
|
||||
defer t.timestampLock.Unlock()
|
||||
t.lastSendTimestampMetric.Set(float64(now.UnixNano()) / 1e9)
|
||||
t.lastSendTimestamp = now.Unix()
|
||||
}
|
||||
|
||||
type shards struct {
|
||||
mtx sync.RWMutex // With the WAL, this is never actually contended.
|
||||
|
||||
qm *QueueManager
|
||||
queues []chan prompb.TimeSeries
|
||||
|
||||
// Emulate a wait group with a channel and an atomic int, as you
|
||||
// cannot select on a wait group.
|
||||
done chan struct{}
|
||||
running int32
|
||||
|
||||
// Soft shutdown context will prevent new enqueues and deadlocks.
|
||||
softShutdown chan struct{}
|
||||
|
||||
// Hard shutdown context is used to terminate outgoing HTTP connections
|
||||
// after giving them a chance to terminate.
|
||||
hardShutdown context.CancelFunc
|
||||
}
|
||||
|
||||
// start the shards; must be called before any call to enqueue.
|
||||
func (s *shards) start(n int) {
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
|
||||
newQueues := make([]chan prompb.TimeSeries, n)
|
||||
for i := 0; i < n; i++ {
|
||||
newQueues[i] = make(chan prompb.TimeSeries, s.qm.cfg.Capacity)
|
||||
}
|
||||
|
||||
s.queues = newQueues
|
||||
|
||||
var hardShutdownCtx context.Context
|
||||
hardShutdownCtx, s.hardShutdown = context.WithCancel(context.Background())
|
||||
s.softShutdown = make(chan struct{})
|
||||
s.running = int32(n)
|
||||
s.done = make(chan struct{})
|
||||
for i := 0; i < n; i++ {
|
||||
go s.runShard(hardShutdownCtx, i, newQueues[i])
|
||||
}
|
||||
numShards.WithLabelValues(s.qm.queueName).Set(float64(n))
|
||||
}
|
||||
|
||||
// stop the shards; subsequent call to enqueue will return false.
|
||||
func (s *shards) stop() {
|
||||
// Attempt a clean shutdown, but only wait flushDeadline for all the shards
|
||||
// to cleanly exit. As we're doing RPCs, enqueue can block indefinately.
|
||||
// We must be able so call stop concurrently, hence we can only take the
|
||||
// RLock here.
|
||||
s.mtx.RLock()
|
||||
close(s.softShutdown)
|
||||
s.mtx.RUnlock()
|
||||
|
||||
// Enqueue should now be unblocked, so we can take the write lock. This
|
||||
// also ensures we don't race with writes to the queues, and get a panic:
|
||||
// send on closed channel.
|
||||
s.mtx.Lock()
|
||||
defer s.mtx.Unlock()
|
||||
for _, queue := range s.queues {
|
||||
close(queue)
|
||||
}
|
||||
select {
|
||||
case <-s.done:
|
||||
return
|
||||
case <-time.After(deadline):
|
||||
case <-time.After(s.qm.flushDeadline):
|
||||
level.Error(s.qm.logger).Log("msg", "Failed to flush all samples on shutdown")
|
||||
}
|
||||
|
||||
// Force an unclean shutdown.
|
||||
s.cancel()
|
||||
s.hardShutdown()
|
||||
<-s.done
|
||||
}
|
||||
|
||||
func (s *shards) enqueue(sample *model.Sample) bool {
|
||||
s.qm.samplesIn.incr(1)
|
||||
|
||||
fp := sample.Metric.FastFingerprint()
|
||||
shard := uint64(fp) % uint64(len(s.queues))
|
||||
// enqueue a sample. If we are currently in the process of shutting down or resharding,
|
||||
// will return false; in this case, you should back off and retry.
|
||||
func (s *shards) enqueue(ref uint64, sample prompb.TimeSeries) bool {
|
||||
s.mtx.RLock()
|
||||
defer s.mtx.RUnlock()
|
||||
|
||||
select {
|
||||
case <-s.softShutdown:
|
||||
return false
|
||||
default:
|
||||
}
|
||||
|
||||
shard := uint64(ref) % uint64(len(s.queues))
|
||||
select {
|
||||
case <-s.softShutdown:
|
||||
return false
|
||||
case s.queues[shard] <- sample:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (s *shards) runShard(i int) {
|
||||
func (s *shards) runShard(ctx context.Context, i int, queue chan prompb.TimeSeries) {
|
||||
defer func() {
|
||||
if atomic.AddInt32(&s.running, -1) == 0 {
|
||||
close(s.done)
|
||||
}
|
||||
}()
|
||||
|
||||
queue := s.queues[i]
|
||||
shardNum := strconv.Itoa(i)
|
||||
|
||||
// Send batches of at most MaxSamplesPerSend samples to the remote storage.
|
||||
// If we have fewer samples than that, flush them out after a deadline
|
||||
// anyways.
|
||||
pendingSamples := model.Samples{}
|
||||
pendingSamples := []prompb.TimeSeries{}
|
||||
|
||||
max := s.qm.cfg.MaxSamplesPerSend
|
||||
timer := time.NewTimer(time.Duration(s.qm.cfg.BatchSendDeadline))
|
||||
stop := func() {
|
||||
if !timer.Stop() {
|
||||
|
@ -465,25 +656,29 @@ func (s *shards) runShard(i int) {
|
|||
|
||||
for {
|
||||
select {
|
||||
case <-s.ctx.Done():
|
||||
case <-ctx.Done():
|
||||
return
|
||||
|
||||
case sample, ok := <-queue:
|
||||
if !ok {
|
||||
if len(pendingSamples) > 0 {
|
||||
level.Debug(s.qm.logger).Log("msg", "Flushing samples to remote storage...", "count", len(pendingSamples))
|
||||
s.sendSamples(pendingSamples)
|
||||
s.sendSamples(ctx, pendingSamples)
|
||||
level.Debug(s.qm.logger).Log("msg", "Done flushing.")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
queueLength.WithLabelValues(s.qm.queueName).Dec()
|
||||
// Number of pending samples is limited by the fact that sendSamples (via sendSamplesWithBackoff)
|
||||
// retries endlessly, so once we reach > 100 samples, if we can never send to the endpoint we'll
|
||||
// stop reading from the queue (which has a size of 10).
|
||||
pendingSamples = append(pendingSamples, sample)
|
||||
s.qm.pendingSamplesMetric.Inc()
|
||||
|
||||
if len(pendingSamples) >= s.qm.cfg.MaxSamplesPerSend {
|
||||
s.sendSamples(pendingSamples[:s.qm.cfg.MaxSamplesPerSend])
|
||||
pendingSamples = pendingSamples[s.qm.cfg.MaxSamplesPerSend:]
|
||||
if len(pendingSamples) >= max {
|
||||
s.sendSamples(ctx, pendingSamples[:max])
|
||||
pendingSamples = pendingSamples[max:]
|
||||
s.qm.pendingSamplesMetric.Sub(float64(max))
|
||||
|
||||
stop()
|
||||
timer.Reset(time.Duration(s.qm.cfg.BatchSendDeadline))
|
||||
|
@ -491,17 +686,24 @@ func (s *shards) runShard(i int) {
|
|||
|
||||
case <-timer.C:
|
||||
if len(pendingSamples) > 0 {
|
||||
s.sendSamples(pendingSamples)
|
||||
level.Debug(s.qm.logger).Log("msg", "runShard timer ticked, sending samples", "samples", len(pendingSamples), "shard", shardNum)
|
||||
n := len(pendingSamples)
|
||||
s.sendSamples(ctx, pendingSamples)
|
||||
pendingSamples = pendingSamples[:0]
|
||||
s.qm.pendingSamplesMetric.Sub(float64(n))
|
||||
}
|
||||
timer.Reset(time.Duration(s.qm.cfg.BatchSendDeadline))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *shards) sendSamples(samples model.Samples) {
|
||||
func (s *shards) sendSamples(ctx context.Context, samples []prompb.TimeSeries) {
|
||||
begin := time.Now()
|
||||
s.sendSamplesWithBackoff(samples)
|
||||
err := s.sendSamplesWithBackoff(ctx, samples)
|
||||
if err != nil && s.qm.logLimiter.Allow() {
|
||||
level.Error(s.qm.logger).Log("msg", "non-recoverable error", "count", len(samples), "err", err)
|
||||
failedSamplesTotal.WithLabelValues(s.qm.queueName).Add(float64(len(samples)))
|
||||
}
|
||||
|
||||
// These counters are used to calculate the dynamic sharding, and as such
|
||||
// should be maintained irrespective of success or failure.
|
||||
|
@ -510,30 +712,67 @@ func (s *shards) sendSamples(samples model.Samples) {
|
|||
}
|
||||
|
||||
// sendSamples to the remote storage with backoff for recoverable errors.
|
||||
func (s *shards) sendSamplesWithBackoff(samples model.Samples) {
|
||||
func (s *shards) sendSamplesWithBackoff(ctx context.Context, samples []prompb.TimeSeries) error {
|
||||
backoff := s.qm.cfg.MinBackoff
|
||||
req := ToWriteRequest(samples)
|
||||
|
||||
for retries := s.qm.cfg.MaxRetries; retries > 0; retries-- {
|
||||
req, highest, err := buildWriteRequest(samples)
|
||||
// Failing to build the write request is non-recoverable, since it will
|
||||
// only error if marshaling the proto to bytes fails.
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
begin := time.Now()
|
||||
err := s.qm.client.Store(s.ctx, req)
|
||||
err := s.qm.client.Store(ctx, req)
|
||||
|
||||
sentBatchDuration.WithLabelValues(s.qm.queueName).Observe(time.Since(begin).Seconds())
|
||||
|
||||
if err == nil {
|
||||
succeededSamplesTotal.WithLabelValues(s.qm.queueName).Add(float64(len(samples)))
|
||||
return
|
||||
now := time.Now()
|
||||
s.qm.setLastSendTimestamp(now)
|
||||
s.qm.setHighestSentTimestamp(highest)
|
||||
return nil
|
||||
}
|
||||
|
||||
level.Warn(s.qm.logger).Log("msg", "Error sending samples to remote storage", "count", len(samples), "err", err)
|
||||
if _, ok := err.(recoverableError); !ok {
|
||||
break
|
||||
return err
|
||||
}
|
||||
retriedSamplesTotal.WithLabelValues(s.qm.queueName).Add(float64(len(samples)))
|
||||
|
||||
if s.qm.logLimiter.Allow() {
|
||||
level.Error(s.qm.logger).Log("err", err)
|
||||
}
|
||||
|
||||
time.Sleep(time.Duration(backoff))
|
||||
backoff = backoff * 2
|
||||
if backoff > s.qm.cfg.MaxBackoff {
|
||||
backoff = s.qm.cfg.MaxBackoff
|
||||
}
|
||||
}
|
||||
|
||||
failedSamplesTotal.WithLabelValues(s.qm.queueName).Add(float64(len(samples)))
|
||||
}
|
||||
|
||||
func buildWriteRequest(samples []prompb.TimeSeries) ([]byte, int64, error) {
|
||||
var highest int64
|
||||
for _, ts := range samples {
|
||||
// At the moment we only ever append a TimeSeries with a single sample in it.
|
||||
if ts.Samples[0].Timestamp > highest {
|
||||
highest = ts.Samples[0].Timestamp
|
||||
}
|
||||
}
|
||||
req := &prompb.WriteRequest{
|
||||
Timeseries: samples,
|
||||
}
|
||||
|
||||
data, err := proto.Marshal(req)
|
||||
if err != nil {
|
||||
return nil, highest, err
|
||||
}
|
||||
|
||||
compressed := snappy.Encode(nil, data)
|
||||
return compressed, highest, nil
|
||||
}
|
||||
|
|
|
@ -22,13 +22,229 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
"github.com/prometheus/tsdb"
|
||||
"github.com/prometheus/tsdb/labels"
|
||||
)
|
||||
|
||||
const defaultFlushDeadline = 1 * time.Minute
|
||||
|
||||
func TestSampleDelivery(t *testing.T) {
|
||||
// Let's create an even number of send batches so we don't run into the
|
||||
// batch timeout case.
|
||||
n := config.DefaultQueueConfig.Capacity * 2
|
||||
samples, series := createTimeseries(n)
|
||||
|
||||
c := NewTestStorageClient()
|
||||
c.expectSamples(samples[:len(samples)/2], series)
|
||||
|
||||
cfg := config.DefaultQueueConfig
|
||||
cfg.BatchSendDeadline = model.Duration(100 * time.Millisecond)
|
||||
cfg.MaxShards = 1
|
||||
var temp int64
|
||||
m := NewQueueManager(nil, "", newEWMARate(ewmaWeight, shardUpdateDuration), &temp, cfg, nil, nil, c, defaultFlushDeadline, 0)
|
||||
m.seriesLabels = refSeriesToLabelsProto(series)
|
||||
|
||||
// These should be received by the client.
|
||||
m.Start()
|
||||
m.Append(samples[:len(samples)/2])
|
||||
defer m.Stop()
|
||||
|
||||
c.waitForExpectedSamples(t)
|
||||
m.Append(samples[len(samples)/2:])
|
||||
c.expectSamples(samples[len(samples)/2:], series)
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
func TestSampleDeliveryTimeout(t *testing.T) {
|
||||
// Let's send one less sample than batch size, and wait the timeout duration
|
||||
n := 9
|
||||
samples, series := createTimeseries(n)
|
||||
c := NewTestStorageClient()
|
||||
|
||||
cfg := config.DefaultQueueConfig
|
||||
cfg.MaxShards = 1
|
||||
cfg.BatchSendDeadline = model.Duration(100 * time.Millisecond)
|
||||
var temp int64
|
||||
m := NewQueueManager(nil, "", newEWMARate(ewmaWeight, shardUpdateDuration), &temp, cfg, nil, nil, c, defaultFlushDeadline, 0)
|
||||
m.seriesLabels = refSeriesToLabelsProto(series)
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
|
||||
// Send the samples twice, waiting for the samples in the meantime.
|
||||
c.expectSamples(samples, series)
|
||||
m.Append(samples)
|
||||
c.waitForExpectedSamples(t)
|
||||
|
||||
c.expectSamples(samples, series)
|
||||
m.Append(samples)
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
func TestSampleDeliveryOrder(t *testing.T) {
|
||||
ts := 10
|
||||
n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
|
||||
samples := make([]tsdb.RefSample, 0, n)
|
||||
series := make([]tsdb.RefSeries, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
name := fmt.Sprintf("test_metric_%d", i%ts)
|
||||
samples = append(samples, tsdb.RefSample{
|
||||
Ref: uint64(i),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
})
|
||||
series = append(series, tsdb.RefSeries{
|
||||
Ref: uint64(i),
|
||||
Labels: labels.Labels{labels.Label{Name: "__name__", Value: name}},
|
||||
})
|
||||
}
|
||||
|
||||
c := NewTestStorageClient()
|
||||
c.expectSamples(samples, series)
|
||||
var temp int64
|
||||
m := NewQueueManager(nil, "", newEWMARate(ewmaWeight, shardUpdateDuration), &temp, config.DefaultQueueConfig, nil, nil, c, defaultFlushDeadline, 0)
|
||||
m.seriesLabels = refSeriesToLabelsProto(series)
|
||||
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
// These should be received by the client.
|
||||
m.Append(samples)
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
func TestShutdown(t *testing.T) {
|
||||
deadline := 5 * time.Second
|
||||
c := NewTestBlockedStorageClient()
|
||||
|
||||
var temp int64
|
||||
m := NewQueueManager(nil, "", newEWMARate(ewmaWeight, shardUpdateDuration), &temp, config.DefaultQueueConfig, nil, nil, c, deadline, 0)
|
||||
samples, series := createTimeseries(2 * config.DefaultQueueConfig.MaxSamplesPerSend)
|
||||
m.seriesLabels = refSeriesToLabelsProto(series)
|
||||
m.Start()
|
||||
|
||||
// Append blocks to guarantee delivery, so we do it in the background.
|
||||
go func() {
|
||||
m.Append(samples)
|
||||
}()
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// Test to ensure that Stop doesn't block.
|
||||
start := time.Now()
|
||||
m.Stop()
|
||||
// The samples will never be delivered, so duration should
|
||||
// be at least equal to deadline, otherwise the flush deadline
|
||||
// was not respected.
|
||||
duration := time.Since(start)
|
||||
if duration > time.Duration(deadline+(deadline/10)) {
|
||||
t.Errorf("Took too long to shutdown: %s > %s", duration, deadline)
|
||||
}
|
||||
if duration < time.Duration(deadline) {
|
||||
t.Errorf("Shutdown occurred before flush deadline: %s < %s", duration, deadline)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSeriesReset(t *testing.T) {
|
||||
c := NewTestBlockedStorageClient()
|
||||
deadline := 5 * time.Second
|
||||
var temp int64
|
||||
numSegments := 4
|
||||
numSeries := 25
|
||||
|
||||
m := NewQueueManager(nil, "", newEWMARate(ewmaWeight, shardUpdateDuration), &temp, config.DefaultQueueConfig, nil, nil, c, deadline, 0)
|
||||
for i := 0; i < numSegments; i++ {
|
||||
series := []tsdb.RefSeries{}
|
||||
for j := 0; j < numSeries; j++ {
|
||||
series = append(series, tsdb.RefSeries{Ref: uint64((i * 100) + j), Labels: labels.Labels{labels.Label{Name: "a", Value: "a"}}})
|
||||
}
|
||||
m.StoreSeries(series, i)
|
||||
}
|
||||
testutil.Equals(t, numSegments*numSeries, len(m.seriesLabels))
|
||||
m.SeriesReset(2)
|
||||
testutil.Equals(t, numSegments*numSeries/2, len(m.seriesLabels))
|
||||
}
|
||||
|
||||
func TestReshard(t *testing.T) {
|
||||
size := 10 // Make bigger to find more races.
|
||||
n := config.DefaultQueueConfig.Capacity * size
|
||||
samples, series := createTimeseries(n)
|
||||
|
||||
c := NewTestStorageClient()
|
||||
c.expectSamples(samples, series)
|
||||
|
||||
cfg := config.DefaultQueueConfig
|
||||
cfg.MaxShards = 1
|
||||
|
||||
var temp int64
|
||||
m := NewQueueManager(nil, "", newEWMARate(ewmaWeight, shardUpdateDuration), &temp, cfg, nil, nil, c, defaultFlushDeadline, 0)
|
||||
m.seriesLabels = refSeriesToLabelsProto(series)
|
||||
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
|
||||
go func() {
|
||||
for i := 0; i < len(samples); i += config.DefaultQueueConfig.Capacity {
|
||||
sent := m.Append(samples[i : i+config.DefaultQueueConfig.Capacity])
|
||||
require.True(t, sent)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
}()
|
||||
|
||||
for i := 1; i < len(samples)/config.DefaultQueueConfig.Capacity; i++ {
|
||||
m.shards.stop()
|
||||
m.shards.start(i)
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
func createTimeseries(n int) ([]tsdb.RefSample, []tsdb.RefSeries) {
|
||||
samples := make([]tsdb.RefSample, 0, n)
|
||||
series := make([]tsdb.RefSeries, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
name := fmt.Sprintf("test_metric_%d", i)
|
||||
samples = append(samples, tsdb.RefSample{
|
||||
Ref: uint64(i),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
})
|
||||
series = append(series, tsdb.RefSeries{
|
||||
Ref: uint64(i),
|
||||
Labels: labels.Labels{labels.Label{Name: "__name__", Value: name}},
|
||||
})
|
||||
}
|
||||
return samples, series
|
||||
}
|
||||
|
||||
func getSeriesNameFromRef(r tsdb.RefSeries) string {
|
||||
for _, l := range r.Labels {
|
||||
if l.Name == "__name__" {
|
||||
return l.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func refSeriesToLabelsProto(series []tsdb.RefSeries) map[uint64][]prompb.Label {
|
||||
result := make(map[uint64][]prompb.Label)
|
||||
for _, s := range series {
|
||||
for _, l := range s.Labels {
|
||||
result[s.Ref] = append(result[s.Ref], prompb.Label{
|
||||
Name: l.Name,
|
||||
Value: l.Value,
|
||||
})
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
type TestStorageClient struct {
|
||||
receivedSamples map[string][]prompb.Sample
|
||||
expectedSamples map[string][]prompb.Sample
|
||||
|
@ -43,7 +259,7 @@ func NewTestStorageClient() *TestStorageClient {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *TestStorageClient) expectSamples(ss model.Samples) {
|
||||
func (c *TestStorageClient) expectSamples(ss []tsdb.RefSample, series []tsdb.RefSeries) {
|
||||
c.mtx.Lock()
|
||||
defer c.mtx.Unlock()
|
||||
|
||||
|
@ -51,10 +267,10 @@ func (c *TestStorageClient) expectSamples(ss model.Samples) {
|
|||
c.receivedSamples = map[string][]prompb.Sample{}
|
||||
|
||||
for _, s := range ss {
|
||||
ts := labelProtosToLabels(MetricToLabelProtos(s.Metric)).String()
|
||||
c.expectedSamples[ts] = append(c.expectedSamples[ts], prompb.Sample{
|
||||
Timestamp: int64(s.Timestamp),
|
||||
Value: float64(s.Value),
|
||||
seriesName := getSeriesNameFromRef(series[s.Ref])
|
||||
c.expectedSamples[seriesName] = append(c.expectedSamples[seriesName], prompb.Sample{
|
||||
Timestamp: s.T,
|
||||
Value: s.V,
|
||||
})
|
||||
}
|
||||
c.wg.Add(len(ss))
|
||||
|
@ -62,7 +278,6 @@ func (c *TestStorageClient) expectSamples(ss model.Samples) {
|
|||
|
||||
func (c *TestStorageClient) waitForExpectedSamples(t *testing.T) {
|
||||
c.wg.Wait()
|
||||
|
||||
c.mtx.Lock()
|
||||
defer c.mtx.Unlock()
|
||||
for ts, expectedSamples := range c.expectedSamples {
|
||||
|
@ -72,15 +287,31 @@ func (c *TestStorageClient) waitForExpectedSamples(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func (c *TestStorageClient) Store(_ context.Context, req *prompb.WriteRequest) error {
|
||||
func (c *TestStorageClient) Store(_ context.Context, req []byte) error {
|
||||
c.mtx.Lock()
|
||||
defer c.mtx.Unlock()
|
||||
reqBuf, err := snappy.Decode(nil, req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var reqProto prompb.WriteRequest
|
||||
if err := proto.Unmarshal(reqBuf, &reqProto); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
count := 0
|
||||
for _, ts := range req.Timeseries {
|
||||
labels := labelProtosToLabels(ts.Labels).String()
|
||||
for _, ts := range reqProto.Timeseries {
|
||||
var seriesName string
|
||||
labels := labelProtosToLabels(ts.Labels)
|
||||
for _, label := range labels {
|
||||
if label.Name == "__name__" {
|
||||
seriesName = label.Value
|
||||
}
|
||||
}
|
||||
for _, sample := range ts.Samples {
|
||||
count++
|
||||
c.receivedSamples[labels] = append(c.receivedSamples[labels], sample)
|
||||
c.receivedSamples[seriesName] = append(c.receivedSamples[seriesName], sample)
|
||||
}
|
||||
}
|
||||
c.wg.Add(-count)
|
||||
|
@ -91,133 +322,21 @@ func (c *TestStorageClient) Name() string {
|
|||
return "teststorageclient"
|
||||
}
|
||||
|
||||
func TestSampleDelivery(t *testing.T) {
|
||||
// Let's create an even number of send batches so we don't run into the
|
||||
// batch timeout case.
|
||||
n := config.DefaultQueueConfig.Capacity * 2
|
||||
|
||||
samples := make(model.Samples, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i))
|
||||
samples = append(samples, &model.Sample{
|
||||
Metric: model.Metric{
|
||||
model.MetricNameLabel: name,
|
||||
},
|
||||
Value: model.SampleValue(i),
|
||||
})
|
||||
}
|
||||
|
||||
c := NewTestStorageClient()
|
||||
c.expectSamples(samples[:len(samples)/2])
|
||||
|
||||
cfg := config.DefaultQueueConfig
|
||||
cfg.MaxShards = 1
|
||||
m := NewQueueManager(nil, cfg, nil, nil, c, defaultFlushDeadline)
|
||||
|
||||
// These should be received by the client.
|
||||
for _, s := range samples[:len(samples)/2] {
|
||||
m.Append(s)
|
||||
}
|
||||
// These will be dropped because the queue is full.
|
||||
for _, s := range samples[len(samples)/2:] {
|
||||
m.Append(s)
|
||||
}
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
func TestSampleDeliveryTimeout(t *testing.T) {
|
||||
// Let's send one less sample than batch size, and wait the timeout duration
|
||||
n := config.DefaultQueueConfig.Capacity - 1
|
||||
|
||||
samples := make(model.Samples, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i))
|
||||
samples = append(samples, &model.Sample{
|
||||
Metric: model.Metric{
|
||||
model.MetricNameLabel: name,
|
||||
},
|
||||
Value: model.SampleValue(i),
|
||||
})
|
||||
}
|
||||
|
||||
c := NewTestStorageClient()
|
||||
|
||||
cfg := config.DefaultQueueConfig
|
||||
cfg.MaxShards = 1
|
||||
cfg.BatchSendDeadline = model.Duration(100 * time.Millisecond)
|
||||
m := NewQueueManager(nil, cfg, nil, nil, c, defaultFlushDeadline)
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
|
||||
// Send the samples twice, waiting for the samples in the meantime.
|
||||
c.expectSamples(samples)
|
||||
for _, s := range samples {
|
||||
m.Append(s)
|
||||
}
|
||||
c.waitForExpectedSamples(t)
|
||||
|
||||
c.expectSamples(samples)
|
||||
for _, s := range samples {
|
||||
m.Append(s)
|
||||
}
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
func TestSampleDeliveryOrder(t *testing.T) {
|
||||
ts := 10
|
||||
n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
|
||||
|
||||
samples := make(model.Samples, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i%ts))
|
||||
samples = append(samples, &model.Sample{
|
||||
Metric: model.Metric{
|
||||
model.MetricNameLabel: name,
|
||||
},
|
||||
Value: model.SampleValue(i),
|
||||
Timestamp: model.Time(i),
|
||||
})
|
||||
}
|
||||
|
||||
c := NewTestStorageClient()
|
||||
c.expectSamples(samples)
|
||||
m := NewQueueManager(nil, config.DefaultQueueConfig, nil, nil, c, defaultFlushDeadline)
|
||||
|
||||
// These should be received by the client.
|
||||
for _, s := range samples {
|
||||
m.Append(s)
|
||||
}
|
||||
m.Start()
|
||||
defer m.Stop()
|
||||
|
||||
c.waitForExpectedSamples(t)
|
||||
}
|
||||
|
||||
// TestBlockingStorageClient is a queue_manager StorageClient which will block
|
||||
// on any calls to Store(), until the `block` channel is closed, at which point
|
||||
// the `numCalls` property will contain a count of how many times Store() was
|
||||
// called.
|
||||
// on any calls to Store(), until the request's Context is cancelled, at which
|
||||
// point the `numCalls` property will contain a count of how many times Store()
|
||||
// was called.
|
||||
type TestBlockingStorageClient struct {
|
||||
numCalls uint64
|
||||
block chan bool
|
||||
}
|
||||
|
||||
func NewTestBlockedStorageClient() *TestBlockingStorageClient {
|
||||
return &TestBlockingStorageClient{
|
||||
block: make(chan bool),
|
||||
numCalls: 0,
|
||||
}
|
||||
return &TestBlockingStorageClient{}
|
||||
}
|
||||
|
||||
func (c *TestBlockingStorageClient) Store(ctx context.Context, _ *prompb.WriteRequest) error {
|
||||
func (c *TestBlockingStorageClient) Store(ctx context.Context, _ []byte) error {
|
||||
atomic.AddUint64(&c.numCalls, 1)
|
||||
select {
|
||||
case <-c.block:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
<-ctx.Done()
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -225,106 +344,6 @@ func (c *TestBlockingStorageClient) NumCalls() uint64 {
|
|||
return atomic.LoadUint64(&c.numCalls)
|
||||
}
|
||||
|
||||
func (c *TestBlockingStorageClient) unlock() {
|
||||
close(c.block)
|
||||
}
|
||||
|
||||
func (c *TestBlockingStorageClient) Name() string {
|
||||
return "testblockingstorageclient"
|
||||
}
|
||||
|
||||
func (t *QueueManager) queueLen() int {
|
||||
t.shardsMtx.Lock()
|
||||
defer t.shardsMtx.Unlock()
|
||||
queueLength := 0
|
||||
for _, shard := range t.shards.queues {
|
||||
queueLength += len(shard)
|
||||
}
|
||||
return queueLength
|
||||
}
|
||||
|
||||
func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
|
||||
// Our goal is to fully empty the queue:
|
||||
// `MaxSamplesPerSend*Shards` samples should be consumed by the
|
||||
// per-shard goroutines, and then another `MaxSamplesPerSend`
|
||||
// should be left on the queue.
|
||||
n := config.DefaultQueueConfig.MaxSamplesPerSend * 2
|
||||
|
||||
samples := make(model.Samples, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
name := model.LabelValue(fmt.Sprintf("test_metric_%d", i))
|
||||
samples = append(samples, &model.Sample{
|
||||
Metric: model.Metric{
|
||||
model.MetricNameLabel: name,
|
||||
},
|
||||
Value: model.SampleValue(i),
|
||||
})
|
||||
}
|
||||
|
||||
c := NewTestBlockedStorageClient()
|
||||
cfg := config.DefaultQueueConfig
|
||||
cfg.MaxShards = 1
|
||||
cfg.Capacity = n
|
||||
m := NewQueueManager(nil, cfg, nil, nil, c, defaultFlushDeadline)
|
||||
|
||||
m.Start()
|
||||
|
||||
defer func() {
|
||||
c.unlock()
|
||||
m.Stop()
|
||||
}()
|
||||
|
||||
for _, s := range samples {
|
||||
m.Append(s)
|
||||
}
|
||||
|
||||
// Wait until the runShard() loops drain the queue. If things went right, it
|
||||
// should then immediately block in sendSamples(), but, in case of error,
|
||||
// it would spawn too many goroutines, and thus we'd see more calls to
|
||||
// client.Store()
|
||||
//
|
||||
// The timed wait is maybe non-ideal, but, in order to verify that we're
|
||||
// not spawning too many concurrent goroutines, we have to wait on the
|
||||
// Run() loop to consume a specific number of elements from the
|
||||
// queue... and it doesn't signal that in any obvious way, except by
|
||||
// draining the queue. We cap the waiting at 1 second -- that should give
|
||||
// plenty of time, and keeps the failure fairly quick if we're not draining
|
||||
// the queue properly.
|
||||
for i := 0; i < 100 && m.queueLen() > 0; i++ {
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if m.queueLen() != config.DefaultQueueConfig.MaxSamplesPerSend {
|
||||
t.Fatalf("Failed to drain QueueManager queue, %d elements left",
|
||||
m.queueLen(),
|
||||
)
|
||||
}
|
||||
|
||||
numCalls := c.NumCalls()
|
||||
if numCalls != uint64(1) {
|
||||
t.Errorf("Saw %d concurrent sends, expected 1", numCalls)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShutdown(t *testing.T) {
|
||||
deadline := 10 * time.Second
|
||||
c := NewTestBlockedStorageClient()
|
||||
m := NewQueueManager(nil, config.DefaultQueueConfig, nil, nil, c, deadline)
|
||||
for i := 0; i < config.DefaultQueueConfig.MaxSamplesPerSend; i++ {
|
||||
m.Append(&model.Sample{
|
||||
Metric: model.Metric{
|
||||
model.MetricNameLabel: model.LabelValue(fmt.Sprintf("test_metric_%d", i)),
|
||||
},
|
||||
Value: model.SampleValue(i),
|
||||
Timestamp: model.Time(i),
|
||||
})
|
||||
}
|
||||
m.Start()
|
||||
|
||||
start := time.Now()
|
||||
m.Stop()
|
||||
duration := time.Since(start)
|
||||
if duration > deadline+(deadline/10) {
|
||||
t.Errorf("Took too long to shutdown: %s > %s", duration, deadline)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,9 +19,12 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/pkg/timestamp"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
)
|
||||
|
||||
|
@ -35,7 +38,13 @@ type Storage struct {
|
|||
mtx sync.RWMutex
|
||||
|
||||
// For writes
|
||||
queues []*QueueManager
|
||||
walDir string
|
||||
queues []*QueueManager
|
||||
samplesIn *ewmaRate
|
||||
samplesInMetric prometheus.Counter
|
||||
highestTimestampMtx sync.Mutex
|
||||
highestTimestamp int64
|
||||
highestTimestampMetric prometheus.Gauge
|
||||
|
||||
// For reads
|
||||
queryables []storage.Queryable
|
||||
|
@ -44,15 +53,30 @@ type Storage struct {
|
|||
}
|
||||
|
||||
// NewStorage returns a remote.Storage.
|
||||
func NewStorage(l log.Logger, stCallback startTimeCallback, flushDeadline time.Duration) *Storage {
|
||||
func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCallback, walDir string, flushDeadline time.Duration) *Storage {
|
||||
if l == nil {
|
||||
l = log.NewNopLogger()
|
||||
}
|
||||
return &Storage{
|
||||
shardUpdateDuration := 10 * time.Second
|
||||
s := &Storage{
|
||||
logger: l,
|
||||
localStartTimeCallback: stCallback,
|
||||
flushDeadline: flushDeadline,
|
||||
walDir: walDir,
|
||||
// queues: make(map[*QueueManager]struct{}),
|
||||
samplesIn: newEWMARate(ewmaWeight, shardUpdateDuration),
|
||||
samplesInMetric: prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "prometheus_remote_storage_samples_in_total",
|
||||
Help: "Samples in to remote storage, compare to samples out for queue managers.",
|
||||
}),
|
||||
highestTimestampMetric: prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "prometheus_remote_storage_highest_timestamp_in",
|
||||
Help: "Highest timestamp that has come into the remote storage via the Appender interface.",
|
||||
}),
|
||||
}
|
||||
reg.MustRegister(s.samplesInMetric)
|
||||
reg.MustRegister(s.highestTimestampMetric)
|
||||
return s
|
||||
}
|
||||
|
||||
// ApplyConfig updates the state as the new config requires.
|
||||
|
@ -61,7 +85,6 @@ func (s *Storage) ApplyConfig(conf *config.Config) error {
|
|||
defer s.mtx.Unlock()
|
||||
|
||||
// Update write queues
|
||||
|
||||
newQueues := []*QueueManager{}
|
||||
// TODO: we should only stop & recreate queues which have changes,
|
||||
// as this can be quite disruptive.
|
||||
|
@ -74,13 +97,20 @@ func (s *Storage) ApplyConfig(conf *config.Config) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Convert to int64 for comparison with timestamps from samples
|
||||
// we will eventually read from the WAL on startup.
|
||||
startTime := timestamp.FromTime(time.Now())
|
||||
newQueues = append(newQueues, NewQueueManager(
|
||||
s.logger,
|
||||
s.walDir,
|
||||
s.samplesIn,
|
||||
&s.highestTimestamp,
|
||||
rwConf.QueueConfig,
|
||||
conf.GlobalConfig.ExternalLabels,
|
||||
rwConf.WriteRelabelConfigs,
|
||||
c,
|
||||
s.flushDeadline,
|
||||
startTime,
|
||||
))
|
||||
}
|
||||
|
||||
|
|
521
storage/remote/wal_watcher.go
Normal file
521
storage/remote/wal_watcher.go
Normal file
|
@ -0,0 +1,521 @@
|
|||
// Copyright 2018 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package remote
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/tsdb"
|
||||
"github.com/prometheus/tsdb/fileutil"
|
||||
"github.com/prometheus/tsdb/wal"
|
||||
)
|
||||
|
||||
const (
|
||||
readPeriod = 10 * time.Millisecond
|
||||
checkpointPeriod = 5 * time.Second
|
||||
segmentCheckPeriod = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
var (
|
||||
watcherSamplesRecordsRead = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "samples_records_read_total",
|
||||
Help: "Number of samples records read by the WAL watcher from the WAL.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherSeriesRecordsRead = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "series_records_read_total",
|
||||
Help: "Number of series records read by the WAL watcher from the WAL.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherTombstoneRecordsRead = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "tombstone_records_read_total",
|
||||
Help: "Number of tombstone records read by the WAL watcher from the WAL.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherInvalidRecordsRead = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "invalid_records_read_total",
|
||||
Help: "Number of invalid records read by the WAL watcher from the WAL.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherUnknownTypeRecordsRead = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "unknown_records_read_total",
|
||||
Help: "Number of records read by the WAL watcher from the WAL of an unknown record type.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherRecordDecodeFails = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "record_decode_failures_total",
|
||||
Help: "Number of records read by the WAL watcher that resulted in an error when decoding.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherSamplesSentPreTailing = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "samples_sent_pre_tailing_total",
|
||||
Help: "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
watcherCurrentSegment = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: "prometheus",
|
||||
Subsystem: "wal_watcher",
|
||||
Name: "current_segment",
|
||||
Help: "Current segment the WAL watcher is reading records from.",
|
||||
},
|
||||
[]string{queue},
|
||||
)
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(watcherSamplesRecordsRead)
|
||||
prometheus.MustRegister(watcherSeriesRecordsRead)
|
||||
prometheus.MustRegister(watcherTombstoneRecordsRead)
|
||||
prometheus.MustRegister(watcherInvalidRecordsRead)
|
||||
prometheus.MustRegister(watcherUnknownTypeRecordsRead)
|
||||
prometheus.MustRegister(watcherRecordDecodeFails)
|
||||
prometheus.MustRegister(watcherSamplesSentPreTailing)
|
||||
prometheus.MustRegister(watcherCurrentSegment)
|
||||
}
|
||||
|
||||
type writeTo interface {
|
||||
Append([]tsdb.RefSample) bool
|
||||
StoreSeries([]tsdb.RefSeries, int)
|
||||
SeriesReset(int)
|
||||
}
|
||||
|
||||
// WALWatcher watches the TSDB WAL for a given WriteTo.
|
||||
type WALWatcher struct {
|
||||
name string
|
||||
writer writeTo
|
||||
logger log.Logger
|
||||
walDir string
|
||||
|
||||
currentSegment int
|
||||
lastCheckpoint string
|
||||
startTime int64
|
||||
|
||||
samplesReadMetric prometheus.Counter
|
||||
seriesReadMetric prometheus.Counter
|
||||
tombstonesReadMetric prometheus.Counter
|
||||
invalidReadMetric prometheus.Counter
|
||||
unknownReadMetric prometheus.Counter
|
||||
recordDecodeFailsMetric prometheus.Counter
|
||||
samplesSentPreTailing prometheus.Counter
|
||||
currentSegmentMetric prometheus.Gauge
|
||||
|
||||
quit chan struct{}
|
||||
}
|
||||
|
||||
// NewWALWatcher creates a new WAL watcher for a given WriteTo.
|
||||
func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string, startTime int64) *WALWatcher {
|
||||
if logger == nil {
|
||||
logger = log.NewNopLogger()
|
||||
}
|
||||
w := &WALWatcher{
|
||||
logger: logger,
|
||||
writer: writer,
|
||||
walDir: path.Join(walDir, "wal"),
|
||||
startTime: startTime,
|
||||
name: name,
|
||||
quit: make(chan struct{}),
|
||||
}
|
||||
|
||||
w.samplesReadMetric = watcherSamplesRecordsRead.WithLabelValues(w.name)
|
||||
w.seriesReadMetric = watcherSeriesRecordsRead.WithLabelValues(w.name)
|
||||
w.tombstonesReadMetric = watcherTombstoneRecordsRead.WithLabelValues(w.name)
|
||||
w.unknownReadMetric = watcherUnknownTypeRecordsRead.WithLabelValues(w.name)
|
||||
w.invalidReadMetric = watcherInvalidRecordsRead.WithLabelValues(w.name)
|
||||
w.recordDecodeFailsMetric = watcherRecordDecodeFails.WithLabelValues(w.name)
|
||||
w.samplesSentPreTailing = watcherSamplesSentPreTailing.WithLabelValues(w.name)
|
||||
w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name)
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
func (w *WALWatcher) Start() {
|
||||
level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name)
|
||||
go w.runWatcher()
|
||||
}
|
||||
|
||||
func (w *WALWatcher) Stop() {
|
||||
level.Info(w.logger).Log("msg", "stopping WAL watcher", "queue", w.name)
|
||||
close(w.quit)
|
||||
}
|
||||
|
||||
func (w *WALWatcher) runWatcher() {
|
||||
// The WAL dir may not exist when Prometheus first starts up.
|
||||
for {
|
||||
if _, err := os.Stat(w.walDir); os.IsNotExist(err) {
|
||||
time.Sleep(time.Second)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
nw, err := wal.New(nil, nil, w.walDir)
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
return
|
||||
}
|
||||
|
||||
first, last, err := nw.Segments()
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
return
|
||||
}
|
||||
|
||||
if last == -1 {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Backfill from the checkpoint first if it exists.
|
||||
dir, _, err := tsdb.LastCheckpoint(w.walDir)
|
||||
if err != nil && err != tsdb.ErrNotFound {
|
||||
level.Error(w.logger).Log("msg", "error looking for existing checkpoint, some samples may be dropped", "err", errors.Wrap(err, "find last checkpoint"))
|
||||
}
|
||||
|
||||
level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", dir)
|
||||
if err == nil {
|
||||
w.lastCheckpoint = dir
|
||||
err = w.readCheckpoint(dir)
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("msg", "error reading existing checkpoint, some samples may be dropped", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
w.currentSegment = first
|
||||
w.currentSegmentMetric.Set(float64(w.currentSegment))
|
||||
segment, err := wal.OpenReadSegment(wal.SegmentName(w.walDir, w.currentSegment))
|
||||
// TODO: callum, is this error really fatal?
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
return
|
||||
}
|
||||
reader := wal.NewLiveReader(segment)
|
||||
tail := false
|
||||
|
||||
for {
|
||||
// If we've replayed the existing WAL, start tailing.
|
||||
if w.currentSegment == last {
|
||||
tail = true
|
||||
}
|
||||
if tail {
|
||||
level.Info(w.logger).Log("msg", "watching segment", "segment", w.currentSegment)
|
||||
} else {
|
||||
level.Info(w.logger).Log("msg", "replaying segment", "segment", w.currentSegment)
|
||||
}
|
||||
|
||||
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
|
||||
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
|
||||
err := w.watch(nw, reader, tail)
|
||||
segment.Close()
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("msg", "runWatcher is ending", "err", err)
|
||||
return
|
||||
}
|
||||
|
||||
w.currentSegment++
|
||||
w.currentSegmentMetric.Set(float64(w.currentSegment))
|
||||
|
||||
segment, err = wal.OpenReadSegment(wal.SegmentName(w.walDir, w.currentSegment))
|
||||
// TODO: callum, is this error really fatal?
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
return
|
||||
}
|
||||
reader = wal.NewLiveReader(segment)
|
||||
}
|
||||
}
|
||||
|
||||
// Use tail true to indicate that the reader is currently on a segment that is
|
||||
// actively being written to. If false, assume it's a full segment and we're
|
||||
// replaying it on start to cache the series records.
|
||||
func (w *WALWatcher) watch(wl *wal.WAL, reader *wal.LiveReader, tail bool) error {
|
||||
|
||||
readTicker := time.NewTicker(readPeriod)
|
||||
defer readTicker.Stop()
|
||||
|
||||
checkpointTicker := time.NewTicker(checkpointPeriod)
|
||||
defer checkpointTicker.Stop()
|
||||
|
||||
segmentTicker := time.NewTicker(segmentCheckPeriod)
|
||||
defer segmentTicker.Stop()
|
||||
// If we're replaying the segment we need to know the size of the file to know
|
||||
// when to return from watch and move on to the next segment.
|
||||
size := int64(math.MaxInt64)
|
||||
if !tail {
|
||||
segmentTicker.Stop()
|
||||
checkpointTicker.Stop()
|
||||
var err error
|
||||
size, err = getSegmentSize(w.walDir, w.currentSegment)
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("msg", "error getting segment size", "segment", w.currentSegment)
|
||||
return errors.Wrap(err, "get segment size")
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-w.quit:
|
||||
level.Info(w.logger).Log("msg", "quitting WAL watcher watch loop")
|
||||
return errors.New("quit channel")
|
||||
|
||||
case <-checkpointTicker.C:
|
||||
// Periodically check if there is a new checkpoint.
|
||||
// As this is considered an optimisation, we ignore errors during
|
||||
// checkpoint processing.
|
||||
|
||||
dir, _, err := tsdb.LastCheckpoint(w.walDir)
|
||||
if err != nil && err != tsdb.ErrNotFound {
|
||||
level.Error(w.logger).Log("msg", "error getting last checkpoint", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if dir == w.lastCheckpoint {
|
||||
continue
|
||||
}
|
||||
|
||||
level.Info(w.logger).Log("msg", "new checkpoint detected", "last", w.lastCheckpoint, "new", dir)
|
||||
|
||||
d, err := checkpointNum(dir)
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("msg", "error parsing checkpoint", "err", err)
|
||||
continue
|
||||
}
|
||||
|
||||
if d >= w.currentSegment {
|
||||
level.Info(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", w.currentSegment), "checkpoint", dir)
|
||||
continue
|
||||
}
|
||||
|
||||
w.lastCheckpoint = dir
|
||||
// This potentially takes a long time, should we run it in another go routine?
|
||||
err = w.readCheckpoint(w.lastCheckpoint)
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
}
|
||||
// Clear series with a checkpoint or segment index # lower than the checkpoint we just read.
|
||||
w.writer.SeriesReset(d)
|
||||
|
||||
case <-segmentTicker.C:
|
||||
_, last, err := wl.Segments()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "segments")
|
||||
}
|
||||
|
||||
// Check if new segments exists.
|
||||
if last <= w.currentSegment {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := w.readSegment(reader); err != nil {
|
||||
// Ignore errors reading to end of segment, as we're going to move to
|
||||
// next segment now.
|
||||
level.Error(w.logger).Log("msg", "error reading to end of segment", "err", err)
|
||||
}
|
||||
|
||||
level.Info(w.logger).Log("msg", "a new segment exists, we should start reading it", "current", fmt.Sprintf("%08d", w.currentSegment), "new", fmt.Sprintf("%08d", last))
|
||||
return nil
|
||||
|
||||
case <-readTicker.C:
|
||||
if err := w.readSegment(reader); err != nil && err != io.EOF {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
return err
|
||||
}
|
||||
if reader.TotalRead() >= size && !tail {
|
||||
level.Info(w.logger).Log("msg", "done replaying segment", "segment", w.currentSegment, "size", size, "read", reader.TotalRead())
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *WALWatcher) readSegment(r *wal.LiveReader) error {
|
||||
for r.Next() && !isClosed(w.quit) {
|
||||
err := w.decodeRecord(r.Record())
|
||||
|
||||
// Intentionally skip over record decode errors.
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("err", err)
|
||||
}
|
||||
}
|
||||
return r.Err()
|
||||
}
|
||||
|
||||
func (w *WALWatcher) decodeRecord(rec []byte) error {
|
||||
var (
|
||||
dec tsdb.RecordDecoder
|
||||
series []tsdb.RefSeries
|
||||
samples []tsdb.RefSample
|
||||
)
|
||||
switch dec.Type(rec) {
|
||||
case tsdb.RecordSeries:
|
||||
series, err := dec.Series(rec, series[:0])
|
||||
if err != nil {
|
||||
w.recordDecodeFailsMetric.Inc()
|
||||
return err
|
||||
}
|
||||
w.seriesReadMetric.Add(float64(len(series)))
|
||||
w.writer.StoreSeries(series, w.currentSegment)
|
||||
|
||||
case tsdb.RecordSamples:
|
||||
samples, err := dec.Samples(rec, samples[:0])
|
||||
if err != nil {
|
||||
w.recordDecodeFailsMetric.Inc()
|
||||
return err
|
||||
}
|
||||
var send []tsdb.RefSample
|
||||
for _, s := range samples {
|
||||
if s.T > w.startTime {
|
||||
send = append(send, s)
|
||||
}
|
||||
}
|
||||
if len(send) > 0 {
|
||||
// We don't want to count samples read prior to the starting timestamp
|
||||
// so that we can compare samples in vs samples read and succeeded samples.
|
||||
w.samplesReadMetric.Add(float64(len(samples)))
|
||||
// Blocks until the sample is sent to all remote write endpoints or closed (because enqueue blocks).
|
||||
w.writer.Append(send)
|
||||
}
|
||||
|
||||
case tsdb.RecordTombstones:
|
||||
w.tombstonesReadMetric.Add(float64(len(samples)))
|
||||
|
||||
case tsdb.RecordInvalid:
|
||||
w.invalidReadMetric.Add(float64(len(samples)))
|
||||
return errors.New("invalid record")
|
||||
|
||||
default:
|
||||
w.recordDecodeFailsMetric.Inc()
|
||||
return errors.New("unknown TSDB record type")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read all the series records from a Checkpoint directory.
|
||||
func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
|
||||
level.Info(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir)
|
||||
sr, err := wal.NewSegmentsReader(checkpointDir)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "open checkpoint")
|
||||
}
|
||||
defer sr.Close()
|
||||
|
||||
size, err := getCheckpointSize(checkpointDir)
|
||||
if err != nil {
|
||||
level.Error(w.logger).Log("msg", "error getting checkpoint size", "checkpoint", checkpointDir)
|
||||
return errors.Wrap(err, "get checkpoint size")
|
||||
}
|
||||
|
||||
// w.readSeriesRecords(wal.NewLiveReader(sr), i, size)
|
||||
r := wal.NewLiveReader(sr)
|
||||
w.readSegment(r)
|
||||
if r.TotalRead() != size {
|
||||
level.Warn(w.logger).Log("msg", "may not have read all data from checkpoint")
|
||||
}
|
||||
level.Debug(w.logger).Log("msg", "read series references from checkpoint", "checkpoint", checkpointDir)
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkpointNum(dir string) (int, error) {
|
||||
// Checkpoint dir names are in the format checkpoint.000001
|
||||
chunks := strings.Split(dir, ".")
|
||||
if len(chunks) != 2 {
|
||||
return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
|
||||
}
|
||||
|
||||
result, err := strconv.Atoi(chunks[1])
|
||||
if err != nil {
|
||||
return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func getCheckpointSize(dir string) (int64, error) {
|
||||
i := int64(0)
|
||||
segs, err := fileutil.ReadDir(dir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
for _, fn := range segs {
|
||||
num, err := strconv.Atoi(fn)
|
||||
if err != nil {
|
||||
return i, err
|
||||
}
|
||||
sz, err := getSegmentSize(dir, num)
|
||||
if err != nil {
|
||||
return i, err
|
||||
}
|
||||
i += sz
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
// Get size of segment.
|
||||
func getSegmentSize(dir string, index int) (int64, error) {
|
||||
i := int64(-1)
|
||||
fi, err := os.Stat(wal.SegmentName(dir, index))
|
||||
if err == nil {
|
||||
i = fi.Size()
|
||||
}
|
||||
return i, err
|
||||
}
|
||||
|
||||
func isClosed(c chan struct{}) bool {
|
||||
select {
|
||||
case <-c:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
417
storage/remote/wal_watcher_test.go
Normal file
417
storage/remote/wal_watcher_test.go
Normal file
|
@ -0,0 +1,417 @@
|
|||
// Copyright 2018 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
package remote
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/pkg/timestamp"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
"github.com/prometheus/tsdb"
|
||||
"github.com/prometheus/tsdb/labels"
|
||||
"github.com/prometheus/tsdb/wal"
|
||||
)
|
||||
|
||||
var defaultRetryInterval = 100 * time.Millisecond
|
||||
var defaultRetries = 100
|
||||
|
||||
// retry executes f() n times at each interval until it returns true.
|
||||
func retry(t *testing.T, interval time.Duration, n int, f func() bool) {
|
||||
t.Helper()
|
||||
ticker := time.NewTicker(interval)
|
||||
for i := 0; i <= n; i++ {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
t.Logf("retry %d/%d", i, n)
|
||||
<-ticker.C
|
||||
}
|
||||
ticker.Stop()
|
||||
t.Logf("function returned false")
|
||||
}
|
||||
|
||||
type writeToMock struct {
|
||||
samplesAppended int
|
||||
seriesLock sync.Mutex
|
||||
seriesSegmentIndexes map[uint64]int
|
||||
}
|
||||
|
||||
func (wtm *writeToMock) Append(s []tsdb.RefSample) bool {
|
||||
wtm.samplesAppended += len(s)
|
||||
return true
|
||||
}
|
||||
|
||||
func (wtm *writeToMock) StoreSeries(series []tsdb.RefSeries, index int) {
|
||||
wtm.seriesLock.Lock()
|
||||
defer wtm.seriesLock.Unlock()
|
||||
for _, s := range series {
|
||||
wtm.seriesSegmentIndexes[s.Ref] = index
|
||||
}
|
||||
}
|
||||
|
||||
func (wtm *writeToMock) SeriesReset(index int) {
|
||||
// Check for series that are in segments older than the checkpoint
|
||||
// that were not also present in the checkpoint.
|
||||
wtm.seriesLock.Lock()
|
||||
defer wtm.seriesLock.Unlock()
|
||||
for k, v := range wtm.seriesSegmentIndexes {
|
||||
if v < index {
|
||||
delete(wtm.seriesSegmentIndexes, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wtm *writeToMock) checkNumLabels() int {
|
||||
wtm.seriesLock.Lock()
|
||||
defer wtm.seriesLock.Unlock()
|
||||
return len(wtm.seriesSegmentIndexes)
|
||||
}
|
||||
|
||||
func newWriteToMock() *writeToMock {
|
||||
return &writeToMock{
|
||||
seriesSegmentIndexes: make(map[uint64]int),
|
||||
}
|
||||
}
|
||||
|
||||
func Test_readToEnd_noCheckpoint(t *testing.T) {
|
||||
pageSize := 32 * 1024
|
||||
const seriesCount = 10
|
||||
const samplesCount = 250
|
||||
|
||||
dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
wdir := path.Join(dir, "wal")
|
||||
err = os.Mkdir(wdir, 0777)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
w, err := wal.NewSize(nil, nil, wdir, 128*pageSize)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
var recs [][]byte
|
||||
|
||||
enc := tsdb.RecordEncoder{}
|
||||
|
||||
for i := 0; i < seriesCount; i++ {
|
||||
series := enc.Series([]tsdb.RefSeries{
|
||||
tsdb.RefSeries{
|
||||
Ref: uint64(i),
|
||||
Labels: labels.Labels{labels.Label{"__name__", fmt.Sprintf("metric_%d", i)}},
|
||||
},
|
||||
}, nil)
|
||||
recs = append(recs, series)
|
||||
for j := 0; j < samplesCount; j++ {
|
||||
sample := enc.Samples([]tsdb.RefSample{
|
||||
tsdb.RefSample{
|
||||
Ref: uint64(j),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
},
|
||||
}, nil)
|
||||
|
||||
recs = append(recs, sample)
|
||||
|
||||
// Randomly batch up records.
|
||||
if rand.Intn(4) < 3 {
|
||||
testutil.Ok(t, w.Log(recs...))
|
||||
recs = recs[:0]
|
||||
}
|
||||
}
|
||||
}
|
||||
testutil.Ok(t, w.Log(recs...))
|
||||
|
||||
_, _, err = w.Segments()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
wt := newWriteToMock()
|
||||
st := timestamp.FromTime(time.Now())
|
||||
watcher := NewWALWatcher(nil, "", wt, dir, st)
|
||||
go watcher.Start()
|
||||
|
||||
expected := seriesCount
|
||||
retry(t, defaultRetryInterval, defaultRetries, func() bool {
|
||||
return wt.checkNumLabels() >= expected
|
||||
})
|
||||
watcher.Stop()
|
||||
testutil.Equals(t, expected, wt.checkNumLabels())
|
||||
}
|
||||
|
||||
func Test_readToEnd_withCheckpoint(t *testing.T) {
|
||||
pageSize := 32 * 1024
|
||||
const seriesCount = 10
|
||||
const samplesCount = 250
|
||||
|
||||
dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
wdir := path.Join(dir, "wal")
|
||||
err = os.Mkdir(wdir, 0777)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
os.Create(wal.SegmentName(wdir, 30))
|
||||
|
||||
enc := tsdb.RecordEncoder{}
|
||||
w, err := wal.NewSize(nil, nil, wdir, 128*pageSize)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
// Write to the initial segment then checkpoint.
|
||||
for i := 0; i < seriesCount*10; i++ {
|
||||
ref := i + 100
|
||||
series := enc.Series([]tsdb.RefSeries{
|
||||
tsdb.RefSeries{
|
||||
Ref: uint64(ref),
|
||||
Labels: labels.Labels{labels.Label{"__name__", fmt.Sprintf("metric_%d", i)}},
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(series))
|
||||
|
||||
for j := 0; j < samplesCount*10; j++ {
|
||||
inner := rand.Intn(ref + 1)
|
||||
sample := enc.Samples([]tsdb.RefSample{
|
||||
tsdb.RefSample{
|
||||
Ref: uint64(inner),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(sample))
|
||||
}
|
||||
}
|
||||
tsdb.Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0)
|
||||
w.Truncate(32)
|
||||
|
||||
// Write more records after checkpointing.
|
||||
for i := 0; i < seriesCount*10; i++ {
|
||||
series := enc.Series([]tsdb.RefSeries{
|
||||
tsdb.RefSeries{
|
||||
Ref: uint64(i),
|
||||
Labels: labels.Labels{labels.Label{"__name__", fmt.Sprintf("metric_%d", i)}},
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(series))
|
||||
|
||||
for j := 0; j < samplesCount*10; j++ {
|
||||
sample := enc.Samples([]tsdb.RefSample{
|
||||
tsdb.RefSample{
|
||||
Ref: uint64(j),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(sample))
|
||||
}
|
||||
}
|
||||
|
||||
_, _, err = w.Segments()
|
||||
testutil.Ok(t, err)
|
||||
wt := newWriteToMock()
|
||||
st := timestamp.FromTime(time.Now())
|
||||
watcher := NewWALWatcher(nil, "", wt, dir, st)
|
||||
go watcher.Start()
|
||||
|
||||
expected := seriesCount * 10 * 2
|
||||
retry(t, defaultRetryInterval, defaultRetries, func() bool {
|
||||
return wt.checkNumLabels() >= expected
|
||||
})
|
||||
watcher.Stop()
|
||||
testutil.Equals(t, expected, wt.checkNumLabels())
|
||||
}
|
||||
|
||||
func Test_readCheckpoint(t *testing.T) {
|
||||
pageSize := 32 * 1024
|
||||
const seriesCount = 10
|
||||
const samplesCount = 250
|
||||
|
||||
dir, err := ioutil.TempDir("", "readCheckpoint")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
wdir := path.Join(dir, "wal")
|
||||
err = os.Mkdir(wdir, 0777)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
os.Create(wal.SegmentName(wdir, 30))
|
||||
|
||||
enc := tsdb.RecordEncoder{}
|
||||
w, err := wal.NewSize(nil, nil, wdir, 128*pageSize)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
// Write to the initial segment then checkpoint.
|
||||
for i := 0; i < seriesCount*10; i++ {
|
||||
ref := i + 100
|
||||
series := enc.Series([]tsdb.RefSeries{
|
||||
tsdb.RefSeries{
|
||||
Ref: uint64(ref),
|
||||
Labels: labels.Labels{labels.Label{"__name__", fmt.Sprintf("metric_%d", i)}},
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(series))
|
||||
|
||||
for j := 0; j < samplesCount*10; j++ {
|
||||
inner := rand.Intn(ref + 1)
|
||||
sample := enc.Samples([]tsdb.RefSample{
|
||||
tsdb.RefSample{
|
||||
Ref: uint64(inner),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(sample))
|
||||
}
|
||||
}
|
||||
tsdb.Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0)
|
||||
w.Truncate(32)
|
||||
|
||||
// Start read after checkpoint, no more data written.
|
||||
_, _, err = w.Segments()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
wt := newWriteToMock()
|
||||
st := timestamp.FromTime(time.Now())
|
||||
watcher := NewWALWatcher(nil, "", wt, dir, st)
|
||||
go watcher.Start()
|
||||
|
||||
expected := seriesCount * 10
|
||||
retry(t, defaultRetryInterval, defaultRetries, func() bool {
|
||||
return wt.checkNumLabels() >= expected
|
||||
})
|
||||
watcher.Stop()
|
||||
testutil.Equals(t, expected, wt.checkNumLabels())
|
||||
}
|
||||
|
||||
func Test_checkpoint_seriesReset(t *testing.T) {
|
||||
pageSize := 32 * 1024
|
||||
const seriesCount = 10
|
||||
const samplesCount = 250
|
||||
|
||||
dir, err := ioutil.TempDir("", "seriesReset")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
wdir := path.Join(dir, "wal")
|
||||
err = os.Mkdir(wdir, 0777)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
enc := tsdb.RecordEncoder{}
|
||||
w, err := wal.NewSize(nil, nil, wdir, pageSize)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
// Write to the initial segment, then checkpoint later.
|
||||
for i := 0; i < seriesCount*10; i++ {
|
||||
ref := i + 100
|
||||
series := enc.Series([]tsdb.RefSeries{
|
||||
tsdb.RefSeries{
|
||||
Ref: uint64(ref),
|
||||
Labels: labels.Labels{labels.Label{"__name__", fmt.Sprintf("metric_%d", i)}},
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(series))
|
||||
|
||||
for j := 0; j < samplesCount*10; j++ {
|
||||
inner := rand.Intn(ref + 1)
|
||||
sample := enc.Samples([]tsdb.RefSample{
|
||||
tsdb.RefSample{
|
||||
Ref: uint64(inner),
|
||||
T: int64(i),
|
||||
V: float64(i),
|
||||
},
|
||||
}, nil)
|
||||
testutil.Ok(t, w.Log(sample))
|
||||
}
|
||||
}
|
||||
|
||||
_, _, err = w.Segments()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
wt := newWriteToMock()
|
||||
st := timestamp.FromTime(time.Now())
|
||||
watcher := NewWALWatcher(nil, "", wt, dir, st)
|
||||
go watcher.Start()
|
||||
|
||||
expected := seriesCount * 10
|
||||
retry(t, defaultRetryInterval, defaultRetries, func() bool {
|
||||
return wt.checkNumLabels() >= expected
|
||||
})
|
||||
watcher.Stop()
|
||||
testutil.Equals(t, seriesCount*10, wt.checkNumLabels())
|
||||
|
||||
// If you modify the checkpoint and truncate segment #'s run the test to see how
|
||||
// many series records you end up with and change the last Equals check accordingly
|
||||
// or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10)
|
||||
_, err = tsdb.Checkpoint(w, 50, 200, func(x uint64) bool { return true }, 0)
|
||||
testutil.Ok(t, err)
|
||||
w.Truncate(200)
|
||||
|
||||
cp, _, err := tsdb.LastCheckpoint(path.Join(dir, "wal"))
|
||||
testutil.Ok(t, err)
|
||||
err = watcher.readCheckpoint(cp)
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
func Test_decodeRecord(t *testing.T) {
|
||||
dir, err := ioutil.TempDir("", "decodeRecord")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
wt := newWriteToMock()
|
||||
// st := timestamp.FromTime(time.Now().Add(-10 * time.Second))
|
||||
watcher := NewWALWatcher(nil, "", wt, dir, 0)
|
||||
|
||||
// decode a series record
|
||||
enc := tsdb.RecordEncoder{}
|
||||
buf := enc.Series([]tsdb.RefSeries{tsdb.RefSeries{Ref: 1234, Labels: labels.Labels{}}}, nil)
|
||||
watcher.decodeRecord(buf)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
testutil.Equals(t, 1, wt.checkNumLabels())
|
||||
|
||||
// decode a samples record
|
||||
buf = enc.Samples([]tsdb.RefSample{tsdb.RefSample{Ref: 100, T: 1, V: 1.0}, tsdb.RefSample{Ref: 100, T: 2, V: 2.0}}, nil)
|
||||
watcher.decodeRecord(buf)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
testutil.Equals(t, 2, wt.samplesAppended)
|
||||
}
|
||||
|
||||
func Test_decodeRecord_afterStart(t *testing.T) {
|
||||
dir, err := ioutil.TempDir("", "decodeRecord")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
wt := newWriteToMock()
|
||||
// st := timestamp.FromTime(time.Now().Add(-10 * time.Second))
|
||||
watcher := NewWALWatcher(nil, "", wt, dir, 1)
|
||||
|
||||
// decode a series record
|
||||
enc := tsdb.RecordEncoder{}
|
||||
buf := enc.Series([]tsdb.RefSeries{tsdb.RefSeries{Ref: 1234, Labels: labels.Labels{}}}, nil)
|
||||
watcher.decodeRecord(buf)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
testutil.Equals(t, 1, wt.checkNumLabels())
|
||||
|
||||
// decode a samples record
|
||||
buf = enc.Samples([]tsdb.RefSample{tsdb.RefSample{Ref: 100, T: 1, V: 1.0}, tsdb.RefSample{Ref: 100, T: 2, V: 2.0}}, nil)
|
||||
watcher.decodeRecord(buf)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
testutil.Equals(t, 1, wt.samplesAppended)
|
||||
}
|
|
@ -14,44 +14,53 @@
|
|||
package remote
|
||||
|
||||
import (
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/prometheus/pkg/labels"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
)
|
||||
|
||||
// Appender implements scrape.Appendable.
|
||||
func (s *Storage) Appender() (storage.Appender, error) {
|
||||
return s, nil
|
||||
return ×tampTracker{
|
||||
storage: s,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type timestampTracker struct {
|
||||
storage *Storage
|
||||
samples int64
|
||||
highestTimestamp int64
|
||||
}
|
||||
|
||||
// Add implements storage.Appender.
|
||||
func (s *Storage) Add(l labels.Labels, t int64, v float64) (uint64, error) {
|
||||
s.mtx.RLock()
|
||||
defer s.mtx.RUnlock()
|
||||
for _, q := range s.queues {
|
||||
if err := q.Append(&model.Sample{
|
||||
Metric: labelsToMetric(l),
|
||||
Timestamp: model.Time(t),
|
||||
Value: model.SampleValue(v),
|
||||
}); err != nil {
|
||||
panic(err) // QueueManager.Append() should always return nil as per doc string.
|
||||
}
|
||||
func (t *timestampTracker) Add(_ labels.Labels, ts int64, v float64) (uint64, error) {
|
||||
t.samples++
|
||||
if ts > t.highestTimestamp {
|
||||
t.highestTimestamp = ts
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// AddFast implements storage.Appender.
|
||||
func (s *Storage) AddFast(l labels.Labels, _ uint64, t int64, v float64) error {
|
||||
_, err := s.Add(l, t, v)
|
||||
func (t *timestampTracker) AddFast(l labels.Labels, _ uint64, ts int64, v float64) error {
|
||||
_, err := t.Add(l, ts, v)
|
||||
return err
|
||||
}
|
||||
|
||||
// Commit implements storage.Appender.
|
||||
func (*Storage) Commit() error {
|
||||
func (t *timestampTracker) Commit() error {
|
||||
t.storage.samplesIn.incr(t.samples)
|
||||
t.storage.samplesInMetric.Add(float64(t.samples))
|
||||
|
||||
t.storage.highestTimestampMtx.Lock()
|
||||
defer t.storage.highestTimestampMtx.Unlock()
|
||||
if t.highestTimestamp > t.storage.highestTimestamp {
|
||||
t.storage.highestTimestamp = t.highestTimestamp
|
||||
t.storage.highestTimestampMetric.Set(float64(t.highestTimestamp))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rollback implements storage.Appender.
|
||||
func (*Storage) Rollback() error {
|
||||
func (*timestampTracker) Rollback() error {
|
||||
return nil
|
||||
}
|
||||
|
|
4
vendor/modules.txt
vendored
4
vendor/modules.txt
vendored
|
@ -243,12 +243,12 @@ github.com/prometheus/procfs/xfs
|
|||
github.com/prometheus/procfs/internal/util
|
||||
# github.com/prometheus/tsdb v0.4.0
|
||||
github.com/prometheus/tsdb
|
||||
github.com/prometheus/tsdb/fileutil
|
||||
github.com/prometheus/tsdb/wal
|
||||
github.com/prometheus/tsdb/labels
|
||||
github.com/prometheus/tsdb/chunkenc
|
||||
github.com/prometheus/tsdb/chunks
|
||||
github.com/prometheus/tsdb/fileutil
|
||||
github.com/prometheus/tsdb/index
|
||||
github.com/prometheus/tsdb/wal
|
||||
# github.com/samuel/go-zookeeper v0.0.0-20161028232340-1d7be4effb13
|
||||
github.com/samuel/go-zookeeper/zk
|
||||
# github.com/sasha-s/go-deadlock v0.0.0-20161201235124-341000892f3d
|
||||
|
|
|
@ -33,6 +33,7 @@ import (
|
|||
"github.com/go-kit/kit/log"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
config_util "github.com/prometheus/common/config"
|
||||
"github.com/prometheus/common/model"
|
||||
"github.com/prometheus/common/promlog"
|
||||
|
@ -279,9 +280,14 @@ func TestEndpoints(t *testing.T) {
|
|||
Format: &af,
|
||||
}
|
||||
|
||||
remote := remote.NewStorage(promlog.New(&promlogConfig), func() (int64, error) {
|
||||
dbDir, err := ioutil.TempDir("", "tsdb-api-ready")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dbDir)
|
||||
|
||||
testutil.Ok(t, err)
|
||||
remote := remote.NewStorage(promlog.New(&promlogConfig), prometheus.DefaultRegisterer, func() (int64, error) {
|
||||
return 0, nil
|
||||
}, 1*time.Second)
|
||||
}, dbDir, 1*time.Second)
|
||||
|
||||
err = remote.ApplyConfig(&config.Config{
|
||||
RemoteReadConfigs: []*config.RemoteReadConfig{
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -62,6 +62,12 @@ Prometheus.Graph.prototype.initialize = function() {
|
|||
var graphWrapper = self.el.find("#graph_wrapper" + self.id);
|
||||
self.queryForm = graphWrapper.find(".query_form");
|
||||
|
||||
// Auto-resize the text area on input or mouseclick
|
||||
var resizeTextarea = function(el) {
|
||||
var offset = el.offsetHeight - el.clientHeight;
|
||||
$(el).css('height', 'auto').css('height', el.scrollHeight + offset);
|
||||
};
|
||||
|
||||
self.expr = graphWrapper.find("textarea[name=expr]");
|
||||
self.expr.keypress(function(e) {
|
||||
const enter = 13;
|
||||
|
@ -69,14 +75,13 @@ Prometheus.Graph.prototype.initialize = function() {
|
|||
self.queryForm.submit();
|
||||
e.preventDefault();
|
||||
}
|
||||
|
||||
// Auto-resize the text area on input.
|
||||
var offset = this.offsetHeight - this.clientHeight;
|
||||
var resizeTextarea = function(el) {
|
||||
$(el).css('height', 'auto').css('height', el.scrollHeight + offset);
|
||||
};
|
||||
$(this).on('keyup input', function() { resizeTextarea(this); });
|
||||
});
|
||||
|
||||
self.expr.click(function(e) {
|
||||
resizeTextarea(this);
|
||||
});
|
||||
|
||||
self.expr.change(self.handleChange);
|
||||
|
||||
self.rangeInput = self.queryForm.find("input[name=range_input]");
|
||||
|
|
|
@ -21,6 +21,14 @@ function showUnhealthy(_, container) {
|
|||
}
|
||||
|
||||
function init() {
|
||||
if (!localStorage.selectedTab || localStorage.selectedTab == "all-targets"){
|
||||
$("#all-targets").parent().addClass("active");
|
||||
$(".table-container").each(showAll);
|
||||
} else if (localStorage.selectedTab == "unhealthy-targets") {
|
||||
$("#unhealthy-targets").parent().addClass("active");
|
||||
$(".table-container").each(showUnhealthy);
|
||||
}
|
||||
|
||||
$("button.targets").click(function () {
|
||||
const tableTitle = $(this).closest("h2").find("a").attr("id");
|
||||
|
||||
|
@ -45,8 +53,10 @@ function init() {
|
|||
|
||||
if (target === "all-targets") {
|
||||
$(".table-container").each(showAll);
|
||||
localStorage.setItem("selectedTab", "all-targets");
|
||||
} else if (target === "unhealthy-targets") {
|
||||
$(".table-container").each(showUnhealthy);
|
||||
localStorage.setItem("selectedTab", "unhealthy-targets");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
<div class="container-fluid">
|
||||
<h1>Targets</h1>
|
||||
<div id="showTargets" class="btn-group btn-group-toggle" data-toggle="buttons">
|
||||
<label class="btn btn-primary active">
|
||||
<label class="btn btn-primary">
|
||||
<input type="radio" name="targets" id="all-targets" autocomplete="off" checked> All
|
||||
</label>
|
||||
<label class="btn btn-primary ml-1">
|
||||
|
|
|
@ -25,6 +25,10 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/notifier"
|
||||
"github.com/prometheus/prometheus/rules"
|
||||
"github.com/prometheus/prometheus/scrape"
|
||||
"github.com/prometheus/prometheus/storage/tsdb"
|
||||
"github.com/prometheus/prometheus/util/testutil"
|
||||
libtsdb "github.com/prometheus/tsdb"
|
||||
|
@ -101,8 +105,8 @@ func TestReadyAndHealthy(t *testing.T) {
|
|||
Context: nil,
|
||||
Storage: &tsdb.ReadyStorage{},
|
||||
QueryEngine: nil,
|
||||
ScrapeManager: nil,
|
||||
RuleManager: nil,
|
||||
ScrapeManager: &scrape.Manager{},
|
||||
RuleManager: &rules.Manager{},
|
||||
Notifier: nil,
|
||||
RoutePrefix: "/",
|
||||
EnableAdminAPI: true,
|
||||
|
@ -118,6 +122,10 @@ func TestReadyAndHealthy(t *testing.T) {
|
|||
opts.Flags = map[string]string{}
|
||||
|
||||
webHandler := New(nil, opts)
|
||||
|
||||
webHandler.config = &config.Config{}
|
||||
webHandler.notifier = ¬ifier.Manager{}
|
||||
|
||||
go func() {
|
||||
err := webHandler.Run(context.Background())
|
||||
if err != nil {
|
||||
|
@ -159,6 +167,46 @@ func TestReadyAndHealthy(t *testing.T) {
|
|||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/graph")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/alerts")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/flags")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/rules")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/service-discovery")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/targets")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/config")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/status")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusServiceUnavailable, resp.StatusCode)
|
||||
|
||||
// Set to ready.
|
||||
webHandler.Ready()
|
||||
|
||||
|
@ -191,6 +239,41 @@ func TestReadyAndHealthy(t *testing.T) {
|
|||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/alerts")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/flags")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/rules")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/service-discovery")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/targets")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/config")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
|
||||
resp, err = http.Get("http://localhost:9090/status")
|
||||
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, http.StatusOK, resp.StatusCode)
|
||||
}
|
||||
|
||||
func TestRoutePrefix(t *testing.T) {
|
||||
|
|
Loading…
Reference in a new issue