From b299aba6cf32f940596863fc8e260b96bb18c60b Mon Sep 17 00:00:00 2001 From: Chris Marchbanks Date: Fri, 26 Jun 2020 00:33:52 -0600 Subject: [PATCH 1/2] Fix panic when updating a remote write queue (#7452) Right now Queue Manager metrics are registered when the metrics struct is created, which happens before a changed queue is shutdown and the old metrics are unregistered. In the case of named queues or updates to external labels the apply config will panic due to duplicate metrics. Instead, register the metrics as part of starting the queue as we always guarantee that Stop will be called before a new Start. Signed-off-by: Chris Marchbanks --- storage/remote/queue_manager.go | 12 ++++++--- storage/remote/write_test.go | 45 ++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/storage/remote/queue_manager.go b/storage/remote/queue_manager.go index 6a8b4c4325..7872727885 100644 --- a/storage/remote/queue_manager.go +++ b/storage/remote/queue_manager.go @@ -177,8 +177,12 @@ func newQueueManagerMetrics(r prometheus.Registerer, rn, e string) *queueManager ConstLabels: constLabels, }) - if r != nil { - r.MustRegister( + return m +} + +func (m *queueManagerMetrics) register() { + if m.reg != nil { + m.reg.MustRegister( m.succeededSamplesTotal, m.failedSamplesTotal, m.retriedSamplesTotal, @@ -195,7 +199,6 @@ func newQueueManagerMetrics(r prometheus.Registerer, rn, e string) *queueManager m.bytesSent, ) } - return m } func (m *queueManagerMetrics) unregister() { @@ -358,7 +361,8 @@ outer: // Start the queue manager sending samples to the remote storage. // Does not block. func (t *QueueManager) Start() { - // Initialise some metrics. + // Register and initialise some metrics. + t.metrics.register() t.metrics.shardCapacity.Set(float64(t.cfg.Capacity)) t.metrics.pendingSamples.Set(0) t.metrics.maxNumShards.Set(float64(t.cfg.MaxShards)) diff --git a/storage/remote/write_test.go b/storage/remote/write_test.go index bb8551cc63..69b710aa5a 100644 --- a/storage/remote/write_test.go +++ b/storage/remote/write_test.go @@ -20,6 +20,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" common_config "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" @@ -154,6 +155,48 @@ func TestRestartOnNameChange(t *testing.T) { testutil.Ok(t, err) } +func TestUpdateWithRegisterer(t *testing.T) { + dir, err := ioutil.TempDir("", "TestRestartWithRegisterer") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + s := NewWriteStorage(nil, prometheus.NewRegistry(), dir, time.Millisecond) + c1 := &config.RemoteWriteConfig{ + Name: "named", + URL: &common_config.URL{ + URL: &url.URL{ + Scheme: "http", + Host: "localhost", + }, + }, + QueueConfig: config.DefaultQueueConfig, + } + c2 := &config.RemoteWriteConfig{ + URL: &common_config.URL{ + URL: &url.URL{ + Scheme: "http", + Host: "localhost", + }, + }, + QueueConfig: config.DefaultQueueConfig, + } + conf := &config.Config{ + GlobalConfig: config.DefaultGlobalConfig, + RemoteWriteConfigs: []*config.RemoteWriteConfig{c1, c2}, + } + testutil.Ok(t, s.ApplyConfig(conf)) + + c1.QueueConfig.MaxShards = 10 + c2.QueueConfig.MaxShards = 10 + testutil.Ok(t, s.ApplyConfig(conf)) + for _, queue := range s.queues { + testutil.Equals(t, 10, queue.cfg.MaxShards) + } + + err = s.Close() + testutil.Ok(t, err) +} + func TestWriteStorageLifecycle(t *testing.T) { dir, err := ioutil.TempDir("", "TestWriteStorageLifecycle") testutil.Ok(t, err) @@ -178,7 +221,7 @@ func TestUpdateExternalLabels(t *testing.T) { testutil.Ok(t, err) defer os.RemoveAll(dir) - s := NewWriteStorage(nil, nil, dir, time.Second) + s := NewWriteStorage(nil, prometheus.NewRegistry(), dir, time.Second) externalLabels := labels.FromStrings("external", "true") conf := &config.Config{ From c448ada63d83002e9c1d2c9f84e09f55a61f0ff7 Mon Sep 17 00:00:00 2001 From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com> Date: Fri, 26 Jun 2020 14:27:23 +0530 Subject: [PATCH 2/2] Cut v2.19.2 (#7464) Signed-off-by: Ganesh Vernekar --- CHANGELOG.md | 4 ++++ VERSION | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf855180a4..ce2e187346 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2.19.2 / 2020-06-26 + +* [BUGFIX] Remote Write: Fix panic when reloading config with modified queue parameters. #7452 + ## 2.19.1 / 2020-06-18 * [BUGFIX] TSDB: Fix m-map file truncation leading to unsequential files. #7414 diff --git a/VERSION b/VERSION index b8e248f40b..17bdb70fa8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.19.1 +2.19.2