Merge pull request #2991 from tomwilkie/2990-remote-config

Make queue manager configurable.
This commit is contained in:
Tom Wilkie 2017-08-03 10:26:29 +01:00 committed by GitHub
commit e1c77cdfd4
5 changed files with 75 additions and 49 deletions

View file

@ -173,6 +173,25 @@ var (
// DefaultRemoteWriteConfig is the default remote write configuration. // DefaultRemoteWriteConfig is the default remote write configuration.
DefaultRemoteWriteConfig = RemoteWriteConfig{ DefaultRemoteWriteConfig = RemoteWriteConfig{
RemoteTimeout: model.Duration(30 * time.Second), RemoteTimeout: model.Duration(30 * time.Second),
QueueConfig: DefaultQueueConfig,
}
// DefaultQueueConfig is the default remote queue configuration.
DefaultQueueConfig = QueueConfig{
// With a maximum of 1000 shards, assuming an average of 100ms remote write
// time and 100 samples per batch, we will be able to push 1M samples/s.
MaxShards: 1000,
MaxSamplesPerSend: 100,
// By default, buffer 1000 batches, which at 100ms per batch is 1:40mins. At
// 1000 shards, this will buffer 100M samples total.
Capacity: 100 * 1000,
BatchSendDeadline: 5 * time.Second,
// Max number of times to retry a batch on recoverable errors.
MaxRetries: 10,
MinBackoff: 30 * time.Millisecond,
MaxBackoff: 100 * time.Millisecond,
} }
// DefaultRemoteReadConfig is the default remote read configuration. // DefaultRemoteReadConfig is the default remote read configuration.
@ -1392,6 +1411,7 @@ type RemoteWriteConfig struct {
// We cannot do proper Go type embedding below as the parser will then parse // We cannot do proper Go type embedding below as the parser will then parse
// values arbitrarily into the overflow maps of further-down types. // values arbitrarily into the overflow maps of further-down types.
HTTPClientConfig HTTPClientConfig `yaml:",inline"` HTTPClientConfig HTTPClientConfig `yaml:",inline"`
QueueConfig QueueConfig `yaml:"queue_config,omitempty"`
// Catches all undefined fields and must be empty after parsing. // Catches all undefined fields and must be empty after parsing.
XXX map[string]interface{} `yaml:",inline"` XXX map[string]interface{} `yaml:",inline"`
@ -1404,12 +1424,43 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
if err := unmarshal((*plain)(c)); err != nil { if err := unmarshal((*plain)(c)); err != nil {
return err return err
} }
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
// We cannot make it a pointer as the parser panics for inlined pointer structs.
// Thus we just do its validation here.
if err := c.HTTPClientConfig.validate(); err != nil {
return err
}
if err := checkOverflow(c.XXX, "remote_write"); err != nil { if err := checkOverflow(c.XXX, "remote_write"); err != nil {
return err return err
} }
return nil return nil
} }
// QueueConfig is the configuration for the queue used to write to remote
// storage.
type QueueConfig struct {
// Number of samples to buffer per shard before we start dropping them.
Capacity int `yaml:"capacity,omitempty"`
// Max number of shards, i.e. amount of concurrency.
MaxShards int `yaml:"max_shards,omitempty"`
// Maximum number of samples per send.
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
// Maximum time sample will wait in buffer.
BatchSendDeadline time.Duration `yaml:"batch_send_deadline,omitempty"`
// Max number of times to retry a batch on recoverable errors.
MaxRetries int `yaml:"max_retries,omitempty"`
// On recoverable errors, backoff exponentially.
MinBackoff time.Duration `yaml:"min_backoff,omitempty"`
MaxBackoff time.Duration `yaml:"max_backoff,omitempty"`
}
// RemoteReadConfig is the configuration for reading from remote storage. // RemoteReadConfig is the configuration for reading from remote storage.
type RemoteReadConfig struct { type RemoteReadConfig struct {
URL *URL `yaml:"url,omitempty"` URL *URL `yaml:"url,omitempty"`
@ -1430,6 +1481,14 @@ func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro
if err := unmarshal((*plain)(c)); err != nil { if err := unmarshal((*plain)(c)); err != nil {
return err return err
} }
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
// We cannot make it a pointer as the parser panics for inlined pointer structs.
// Thus we just do its validation here.
if err := c.HTTPClientConfig.validate(); err != nil {
return err
}
if err := checkOverflow(c.XXX, "remote_read"); err != nil { if err := checkOverflow(c.XXX, "remote_read"); err != nil {
return err return err
} }

View file

@ -66,10 +66,12 @@ var expectedConf = &Config{
Action: RelabelDrop, Action: RelabelDrop,
}, },
}, },
QueueConfig: DefaultQueueConfig,
}, },
{ {
URL: mustParseURL("http://remote2/push"), URL: mustParseURL("http://remote2/push"),
RemoteTimeout: model.Duration(30 * time.Second), RemoteTimeout: model.Duration(30 * time.Second),
QueueConfig: DefaultQueueConfig,
}, },
}, },

View file

@ -123,42 +123,6 @@ func init() {
prometheus.MustRegister(numShards) prometheus.MustRegister(numShards)
} }
// QueueManagerConfig is the configuration for the queue used to write to remote
// storage.
type QueueManagerConfig struct {
// Number of samples to buffer per shard before we start dropping them.
QueueCapacity int
// Max number of shards, i.e. amount of concurrency.
MaxShards int
// Maximum number of samples per send.
MaxSamplesPerSend int
// Maximum time sample will wait in buffer.
BatchSendDeadline time.Duration
// Max number of times to retry a batch on recoverable errors.
MaxRetries int
// On recoverable errors, backoff exponentially.
MinBackoff time.Duration
MaxBackoff time.Duration
}
// defaultQueueManagerConfig is the default remote queue configuration.
var defaultQueueManagerConfig = QueueManagerConfig{
// With a maximum of 1000 shards, assuming an average of 100ms remote write
// time and 100 samples per batch, we will be able to push 1M samples/s.
MaxShards: 1000,
MaxSamplesPerSend: 100,
// By default, buffer 1000 batches, which at 100ms per batch is 1:40mins. At
// 1000 shards, this will buffer 100M samples total.
QueueCapacity: 100 * 1000,
BatchSendDeadline: 5 * time.Second,
// Max number of times to retry a batch on recoverable errors.
MaxRetries: 10,
MinBackoff: 30 * time.Millisecond,
MaxBackoff: 100 * time.Millisecond,
}
// StorageClient defines an interface for sending a batch of samples to an // StorageClient defines an interface for sending a batch of samples to an
// external timeseries database. // external timeseries database.
type StorageClient interface { type StorageClient interface {
@ -171,7 +135,7 @@ type StorageClient interface {
// QueueManager manages a queue of samples to be sent to the Storage // QueueManager manages a queue of samples to be sent to the Storage
// indicated by the provided StorageClient. // indicated by the provided StorageClient.
type QueueManager struct { type QueueManager struct {
cfg QueueManagerConfig cfg config.QueueConfig
externalLabels model.LabelSet externalLabels model.LabelSet
relabelConfigs []*config.RelabelConfig relabelConfigs []*config.RelabelConfig
client StorageClient client StorageClient
@ -190,7 +154,7 @@ type QueueManager struct {
} }
// NewQueueManager builds a new QueueManager. // NewQueueManager builds a new QueueManager.
func NewQueueManager(cfg QueueManagerConfig, externalLabels model.LabelSet, relabelConfigs []*config.RelabelConfig, client StorageClient) *QueueManager { func NewQueueManager(cfg config.QueueConfig, externalLabels model.LabelSet, relabelConfigs []*config.RelabelConfig, client StorageClient) *QueueManager {
t := &QueueManager{ t := &QueueManager{
cfg: cfg, cfg: cfg,
externalLabels: externalLabels, externalLabels: externalLabels,
@ -209,7 +173,7 @@ func NewQueueManager(cfg QueueManagerConfig, externalLabels model.LabelSet, rela
} }
t.shards = t.newShards(t.numShards) t.shards = t.newShards(t.numShards)
numShards.WithLabelValues(t.queueName).Set(float64(t.numShards)) numShards.WithLabelValues(t.queueName).Set(float64(t.numShards))
queueCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.QueueCapacity)) queueCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.Capacity))
return t return t
} }
@ -397,7 +361,7 @@ type shards struct {
func (t *QueueManager) newShards(numShards int) *shards { func (t *QueueManager) newShards(numShards int) *shards {
queues := make([]chan *model.Sample, numShards) queues := make([]chan *model.Sample, numShards)
for i := 0; i < numShards; i++ { for i := 0; i < numShards; i++ {
queues[i] = make(chan *model.Sample, t.cfg.QueueCapacity) queues[i] = make(chan *model.Sample, t.cfg.Capacity)
} }
s := &shards{ s := &shards{
qm: t, qm: t,

View file

@ -21,6 +21,7 @@ import (
"time" "time"
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/config"
) )
type TestStorageClient struct { type TestStorageClient struct {
@ -81,7 +82,7 @@ func (c *TestStorageClient) Name() string {
func TestSampleDelivery(t *testing.T) { func TestSampleDelivery(t *testing.T) {
// Let's create an even number of send batches so we don't run into the // Let's create an even number of send batches so we don't run into the
// batch timeout case. // batch timeout case.
n := defaultQueueManagerConfig.QueueCapacity * 2 n := config.DefaultQueueConfig.Capacity * 2
samples := make(model.Samples, 0, n) samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
@ -97,7 +98,7 @@ func TestSampleDelivery(t *testing.T) {
c := NewTestStorageClient() c := NewTestStorageClient()
c.expectSamples(samples[:len(samples)/2]) c.expectSamples(samples[:len(samples)/2])
cfg := defaultQueueManagerConfig cfg := config.DefaultQueueConfig
cfg.MaxShards = 1 cfg.MaxShards = 1
m := NewQueueManager(cfg, nil, nil, c) m := NewQueueManager(cfg, nil, nil, c)
@ -117,7 +118,7 @@ func TestSampleDelivery(t *testing.T) {
func TestSampleDeliveryOrder(t *testing.T) { func TestSampleDeliveryOrder(t *testing.T) {
ts := 10 ts := 10
n := defaultQueueManagerConfig.MaxSamplesPerSend * ts n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
samples := make(model.Samples, 0, n) samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
@ -133,7 +134,7 @@ func TestSampleDeliveryOrder(t *testing.T) {
c := NewTestStorageClient() c := NewTestStorageClient()
c.expectSamples(samples) c.expectSamples(samples)
m := NewQueueManager(defaultQueueManagerConfig, nil, nil, c) m := NewQueueManager(config.DefaultQueueConfig, nil, nil, c)
// These should be received by the client. // These should be received by the client.
for _, s := range samples { for _, s := range samples {
@ -194,7 +195,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
// `MaxSamplesPerSend*Shards` samples should be consumed by the // `MaxSamplesPerSend*Shards` samples should be consumed by the
// per-shard goroutines, and then another `MaxSamplesPerSend` // per-shard goroutines, and then another `MaxSamplesPerSend`
// should be left on the queue. // should be left on the queue.
n := defaultQueueManagerConfig.MaxSamplesPerSend * 2 n := config.DefaultQueueConfig.MaxSamplesPerSend * 2
samples := make(model.Samples, 0, n) samples := make(model.Samples, 0, n)
for i := 0; i < n; i++ { for i := 0; i < n; i++ {
@ -208,9 +209,9 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
} }
c := NewTestBlockedStorageClient() c := NewTestBlockedStorageClient()
cfg := defaultQueueManagerConfig cfg := config.DefaultQueueConfig
cfg.MaxShards = 1 cfg.MaxShards = 1
cfg.QueueCapacity = n cfg.Capacity = n
m := NewQueueManager(cfg, nil, nil, c) m := NewQueueManager(cfg, nil, nil, c)
m.Start() m.Start()
@ -240,7 +241,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
time.Sleep(10 * time.Millisecond) time.Sleep(10 * time.Millisecond)
} }
if m.queueLen() != defaultQueueManagerConfig.MaxSamplesPerSend { if m.queueLen() != config.DefaultQueueConfig.MaxSamplesPerSend {
t.Fatalf("Failed to drain QueueManager queue, %d elements left", t.Fatalf("Failed to drain QueueManager queue, %d elements left",
m.queueLen(), m.queueLen(),
) )

View file

@ -45,7 +45,7 @@ func (w *Writer) ApplyConfig(conf *config.Config) error {
return err return err
} }
newQueues = append(newQueues, NewQueueManager( newQueues = append(newQueues, NewQueueManager(
defaultQueueManagerConfig, rwConf.QueueConfig,
conf.GlobalConfig.ExternalLabels, conf.GlobalConfig.ExternalLabels,
rwConf.WriteRelabelConfigs, rwConf.WriteRelabelConfigs,
c, c,