mirror of
https://github.com/prometheus/prometheus.git
synced 2025-02-21 03:16:00 -08:00
Merge pull request #2991 from tomwilkie/2990-remote-config
Make queue manager configurable.
This commit is contained in:
commit
e1c77cdfd4
|
@ -173,6 +173,25 @@ var (
|
||||||
// DefaultRemoteWriteConfig is the default remote write configuration.
|
// DefaultRemoteWriteConfig is the default remote write configuration.
|
||||||
DefaultRemoteWriteConfig = RemoteWriteConfig{
|
DefaultRemoteWriteConfig = RemoteWriteConfig{
|
||||||
RemoteTimeout: model.Duration(30 * time.Second),
|
RemoteTimeout: model.Duration(30 * time.Second),
|
||||||
|
QueueConfig: DefaultQueueConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultQueueConfig is the default remote queue configuration.
|
||||||
|
DefaultQueueConfig = QueueConfig{
|
||||||
|
// With a maximum of 1000 shards, assuming an average of 100ms remote write
|
||||||
|
// time and 100 samples per batch, we will be able to push 1M samples/s.
|
||||||
|
MaxShards: 1000,
|
||||||
|
MaxSamplesPerSend: 100,
|
||||||
|
|
||||||
|
// By default, buffer 1000 batches, which at 100ms per batch is 1:40mins. At
|
||||||
|
// 1000 shards, this will buffer 100M samples total.
|
||||||
|
Capacity: 100 * 1000,
|
||||||
|
BatchSendDeadline: 5 * time.Second,
|
||||||
|
|
||||||
|
// Max number of times to retry a batch on recoverable errors.
|
||||||
|
MaxRetries: 10,
|
||||||
|
MinBackoff: 30 * time.Millisecond,
|
||||||
|
MaxBackoff: 100 * time.Millisecond,
|
||||||
}
|
}
|
||||||
|
|
||||||
// DefaultRemoteReadConfig is the default remote read configuration.
|
// DefaultRemoteReadConfig is the default remote read configuration.
|
||||||
|
@ -1392,6 +1411,7 @@ type RemoteWriteConfig struct {
|
||||||
// We cannot do proper Go type embedding below as the parser will then parse
|
// We cannot do proper Go type embedding below as the parser will then parse
|
||||||
// values arbitrarily into the overflow maps of further-down types.
|
// values arbitrarily into the overflow maps of further-down types.
|
||||||
HTTPClientConfig HTTPClientConfig `yaml:",inline"`
|
HTTPClientConfig HTTPClientConfig `yaml:",inline"`
|
||||||
|
QueueConfig QueueConfig `yaml:"queue_config,omitempty"`
|
||||||
|
|
||||||
// Catches all undefined fields and must be empty after parsing.
|
// Catches all undefined fields and must be empty after parsing.
|
||||||
XXX map[string]interface{} `yaml:",inline"`
|
XXX map[string]interface{} `yaml:",inline"`
|
||||||
|
@ -1404,12 +1424,43 @@ func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) err
|
||||||
if err := unmarshal((*plain)(c)); err != nil {
|
if err := unmarshal((*plain)(c)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
|
||||||
|
// We cannot make it a pointer as the parser panics for inlined pointer structs.
|
||||||
|
// Thus we just do its validation here.
|
||||||
|
if err := c.HTTPClientConfig.validate(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if err := checkOverflow(c.XXX, "remote_write"); err != nil {
|
if err := checkOverflow(c.XXX, "remote_write"); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// QueueConfig is the configuration for the queue used to write to remote
|
||||||
|
// storage.
|
||||||
|
type QueueConfig struct {
|
||||||
|
// Number of samples to buffer per shard before we start dropping them.
|
||||||
|
Capacity int `yaml:"capacity,omitempty"`
|
||||||
|
|
||||||
|
// Max number of shards, i.e. amount of concurrency.
|
||||||
|
MaxShards int `yaml:"max_shards,omitempty"`
|
||||||
|
|
||||||
|
// Maximum number of samples per send.
|
||||||
|
MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
|
||||||
|
|
||||||
|
// Maximum time sample will wait in buffer.
|
||||||
|
BatchSendDeadline time.Duration `yaml:"batch_send_deadline,omitempty"`
|
||||||
|
|
||||||
|
// Max number of times to retry a batch on recoverable errors.
|
||||||
|
MaxRetries int `yaml:"max_retries,omitempty"`
|
||||||
|
|
||||||
|
// On recoverable errors, backoff exponentially.
|
||||||
|
MinBackoff time.Duration `yaml:"min_backoff,omitempty"`
|
||||||
|
MaxBackoff time.Duration `yaml:"max_backoff,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// RemoteReadConfig is the configuration for reading from remote storage.
|
// RemoteReadConfig is the configuration for reading from remote storage.
|
||||||
type RemoteReadConfig struct {
|
type RemoteReadConfig struct {
|
||||||
URL *URL `yaml:"url,omitempty"`
|
URL *URL `yaml:"url,omitempty"`
|
||||||
|
@ -1430,6 +1481,14 @@ func (c *RemoteReadConfig) UnmarshalYAML(unmarshal func(interface{}) error) erro
|
||||||
if err := unmarshal((*plain)(c)); err != nil {
|
if err := unmarshal((*plain)(c)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The UnmarshalYAML method of HTTPClientConfig is not being called because it's not a pointer.
|
||||||
|
// We cannot make it a pointer as the parser panics for inlined pointer structs.
|
||||||
|
// Thus we just do its validation here.
|
||||||
|
if err := c.HTTPClientConfig.validate(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
if err := checkOverflow(c.XXX, "remote_read"); err != nil {
|
if err := checkOverflow(c.XXX, "remote_read"); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,10 +66,12 @@ var expectedConf = &Config{
|
||||||
Action: RelabelDrop,
|
Action: RelabelDrop,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
QueueConfig: DefaultQueueConfig,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
URL: mustParseURL("http://remote2/push"),
|
URL: mustParseURL("http://remote2/push"),
|
||||||
RemoteTimeout: model.Duration(30 * time.Second),
|
RemoteTimeout: model.Duration(30 * time.Second),
|
||||||
|
QueueConfig: DefaultQueueConfig,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
@ -123,42 +123,6 @@ func init() {
|
||||||
prometheus.MustRegister(numShards)
|
prometheus.MustRegister(numShards)
|
||||||
}
|
}
|
||||||
|
|
||||||
// QueueManagerConfig is the configuration for the queue used to write to remote
|
|
||||||
// storage.
|
|
||||||
type QueueManagerConfig struct {
|
|
||||||
// Number of samples to buffer per shard before we start dropping them.
|
|
||||||
QueueCapacity int
|
|
||||||
// Max number of shards, i.e. amount of concurrency.
|
|
||||||
MaxShards int
|
|
||||||
// Maximum number of samples per send.
|
|
||||||
MaxSamplesPerSend int
|
|
||||||
// Maximum time sample will wait in buffer.
|
|
||||||
BatchSendDeadline time.Duration
|
|
||||||
// Max number of times to retry a batch on recoverable errors.
|
|
||||||
MaxRetries int
|
|
||||||
// On recoverable errors, backoff exponentially.
|
|
||||||
MinBackoff time.Duration
|
|
||||||
MaxBackoff time.Duration
|
|
||||||
}
|
|
||||||
|
|
||||||
// defaultQueueManagerConfig is the default remote queue configuration.
|
|
||||||
var defaultQueueManagerConfig = QueueManagerConfig{
|
|
||||||
// With a maximum of 1000 shards, assuming an average of 100ms remote write
|
|
||||||
// time and 100 samples per batch, we will be able to push 1M samples/s.
|
|
||||||
MaxShards: 1000,
|
|
||||||
MaxSamplesPerSend: 100,
|
|
||||||
|
|
||||||
// By default, buffer 1000 batches, which at 100ms per batch is 1:40mins. At
|
|
||||||
// 1000 shards, this will buffer 100M samples total.
|
|
||||||
QueueCapacity: 100 * 1000,
|
|
||||||
BatchSendDeadline: 5 * time.Second,
|
|
||||||
|
|
||||||
// Max number of times to retry a batch on recoverable errors.
|
|
||||||
MaxRetries: 10,
|
|
||||||
MinBackoff: 30 * time.Millisecond,
|
|
||||||
MaxBackoff: 100 * time.Millisecond,
|
|
||||||
}
|
|
||||||
|
|
||||||
// StorageClient defines an interface for sending a batch of samples to an
|
// StorageClient defines an interface for sending a batch of samples to an
|
||||||
// external timeseries database.
|
// external timeseries database.
|
||||||
type StorageClient interface {
|
type StorageClient interface {
|
||||||
|
@ -171,7 +135,7 @@ type StorageClient interface {
|
||||||
// QueueManager manages a queue of samples to be sent to the Storage
|
// QueueManager manages a queue of samples to be sent to the Storage
|
||||||
// indicated by the provided StorageClient.
|
// indicated by the provided StorageClient.
|
||||||
type QueueManager struct {
|
type QueueManager struct {
|
||||||
cfg QueueManagerConfig
|
cfg config.QueueConfig
|
||||||
externalLabels model.LabelSet
|
externalLabels model.LabelSet
|
||||||
relabelConfigs []*config.RelabelConfig
|
relabelConfigs []*config.RelabelConfig
|
||||||
client StorageClient
|
client StorageClient
|
||||||
|
@ -190,7 +154,7 @@ type QueueManager struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewQueueManager builds a new QueueManager.
|
// NewQueueManager builds a new QueueManager.
|
||||||
func NewQueueManager(cfg QueueManagerConfig, externalLabels model.LabelSet, relabelConfigs []*config.RelabelConfig, client StorageClient) *QueueManager {
|
func NewQueueManager(cfg config.QueueConfig, externalLabels model.LabelSet, relabelConfigs []*config.RelabelConfig, client StorageClient) *QueueManager {
|
||||||
t := &QueueManager{
|
t := &QueueManager{
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
externalLabels: externalLabels,
|
externalLabels: externalLabels,
|
||||||
|
@ -209,7 +173,7 @@ func NewQueueManager(cfg QueueManagerConfig, externalLabels model.LabelSet, rela
|
||||||
}
|
}
|
||||||
t.shards = t.newShards(t.numShards)
|
t.shards = t.newShards(t.numShards)
|
||||||
numShards.WithLabelValues(t.queueName).Set(float64(t.numShards))
|
numShards.WithLabelValues(t.queueName).Set(float64(t.numShards))
|
||||||
queueCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.QueueCapacity))
|
queueCapacity.WithLabelValues(t.queueName).Set(float64(t.cfg.Capacity))
|
||||||
|
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
|
@ -397,7 +361,7 @@ type shards struct {
|
||||||
func (t *QueueManager) newShards(numShards int) *shards {
|
func (t *QueueManager) newShards(numShards int) *shards {
|
||||||
queues := make([]chan *model.Sample, numShards)
|
queues := make([]chan *model.Sample, numShards)
|
||||||
for i := 0; i < numShards; i++ {
|
for i := 0; i < numShards; i++ {
|
||||||
queues[i] = make(chan *model.Sample, t.cfg.QueueCapacity)
|
queues[i] = make(chan *model.Sample, t.cfg.Capacity)
|
||||||
}
|
}
|
||||||
s := &shards{
|
s := &shards{
|
||||||
qm: t,
|
qm: t,
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/common/model"
|
"github.com/prometheus/common/model"
|
||||||
|
"github.com/prometheus/prometheus/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
type TestStorageClient struct {
|
type TestStorageClient struct {
|
||||||
|
@ -81,7 +82,7 @@ func (c *TestStorageClient) Name() string {
|
||||||
func TestSampleDelivery(t *testing.T) {
|
func TestSampleDelivery(t *testing.T) {
|
||||||
// Let's create an even number of send batches so we don't run into the
|
// Let's create an even number of send batches so we don't run into the
|
||||||
// batch timeout case.
|
// batch timeout case.
|
||||||
n := defaultQueueManagerConfig.QueueCapacity * 2
|
n := config.DefaultQueueConfig.Capacity * 2
|
||||||
|
|
||||||
samples := make(model.Samples, 0, n)
|
samples := make(model.Samples, 0, n)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
|
@ -97,7 +98,7 @@ func TestSampleDelivery(t *testing.T) {
|
||||||
c := NewTestStorageClient()
|
c := NewTestStorageClient()
|
||||||
c.expectSamples(samples[:len(samples)/2])
|
c.expectSamples(samples[:len(samples)/2])
|
||||||
|
|
||||||
cfg := defaultQueueManagerConfig
|
cfg := config.DefaultQueueConfig
|
||||||
cfg.MaxShards = 1
|
cfg.MaxShards = 1
|
||||||
m := NewQueueManager(cfg, nil, nil, c)
|
m := NewQueueManager(cfg, nil, nil, c)
|
||||||
|
|
||||||
|
@ -117,7 +118,7 @@ func TestSampleDelivery(t *testing.T) {
|
||||||
|
|
||||||
func TestSampleDeliveryOrder(t *testing.T) {
|
func TestSampleDeliveryOrder(t *testing.T) {
|
||||||
ts := 10
|
ts := 10
|
||||||
n := defaultQueueManagerConfig.MaxSamplesPerSend * ts
|
n := config.DefaultQueueConfig.MaxSamplesPerSend * ts
|
||||||
|
|
||||||
samples := make(model.Samples, 0, n)
|
samples := make(model.Samples, 0, n)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
|
@ -133,7 +134,7 @@ func TestSampleDeliveryOrder(t *testing.T) {
|
||||||
|
|
||||||
c := NewTestStorageClient()
|
c := NewTestStorageClient()
|
||||||
c.expectSamples(samples)
|
c.expectSamples(samples)
|
||||||
m := NewQueueManager(defaultQueueManagerConfig, nil, nil, c)
|
m := NewQueueManager(config.DefaultQueueConfig, nil, nil, c)
|
||||||
|
|
||||||
// These should be received by the client.
|
// These should be received by the client.
|
||||||
for _, s := range samples {
|
for _, s := range samples {
|
||||||
|
@ -194,7 +195,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
|
||||||
// `MaxSamplesPerSend*Shards` samples should be consumed by the
|
// `MaxSamplesPerSend*Shards` samples should be consumed by the
|
||||||
// per-shard goroutines, and then another `MaxSamplesPerSend`
|
// per-shard goroutines, and then another `MaxSamplesPerSend`
|
||||||
// should be left on the queue.
|
// should be left on the queue.
|
||||||
n := defaultQueueManagerConfig.MaxSamplesPerSend * 2
|
n := config.DefaultQueueConfig.MaxSamplesPerSend * 2
|
||||||
|
|
||||||
samples := make(model.Samples, 0, n)
|
samples := make(model.Samples, 0, n)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
|
@ -208,9 +209,9 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
c := NewTestBlockedStorageClient()
|
c := NewTestBlockedStorageClient()
|
||||||
cfg := defaultQueueManagerConfig
|
cfg := config.DefaultQueueConfig
|
||||||
cfg.MaxShards = 1
|
cfg.MaxShards = 1
|
||||||
cfg.QueueCapacity = n
|
cfg.Capacity = n
|
||||||
m := NewQueueManager(cfg, nil, nil, c)
|
m := NewQueueManager(cfg, nil, nil, c)
|
||||||
|
|
||||||
m.Start()
|
m.Start()
|
||||||
|
@ -240,7 +241,7 @@ func TestSpawnNotMoreThanMaxConcurrentSendsGoroutines(t *testing.T) {
|
||||||
time.Sleep(10 * time.Millisecond)
|
time.Sleep(10 * time.Millisecond)
|
||||||
}
|
}
|
||||||
|
|
||||||
if m.queueLen() != defaultQueueManagerConfig.MaxSamplesPerSend {
|
if m.queueLen() != config.DefaultQueueConfig.MaxSamplesPerSend {
|
||||||
t.Fatalf("Failed to drain QueueManager queue, %d elements left",
|
t.Fatalf("Failed to drain QueueManager queue, %d elements left",
|
||||||
m.queueLen(),
|
m.queueLen(),
|
||||||
)
|
)
|
||||||
|
|
|
@ -45,7 +45,7 @@ func (w *Writer) ApplyConfig(conf *config.Config) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
newQueues = append(newQueues, NewQueueManager(
|
newQueues = append(newQueues, NewQueueManager(
|
||||||
defaultQueueManagerConfig,
|
rwConf.QueueConfig,
|
||||||
conf.GlobalConfig.ExternalLabels,
|
conf.GlobalConfig.ExternalLabels,
|
||||||
rwConf.WriteRelabelConfigs,
|
rwConf.WriteRelabelConfigs,
|
||||||
c,
|
c,
|
||||||
|
|
Loading…
Reference in a new issue