remote_write: shard up more when backlogged

Change the coefficient from 1% to 5%, so instead of targetting to clear the backlog in 100s we target 20s. Update unit test to reflect the new behaviour. Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
2025-03-05 20:59:13 -08:00 · 2021-08-29 18:11:13 +01:00 · 2021-08-29 18:11:13 +01:00 · 6d01ce8c4d
parent d588b14d9c
commit 6d01ce8c4d
2 changed files with 10 additions and 12 deletions
--- a/storage/remote/queue_manager.go
+++ b/storage/remote/queue_manager.go
@ -831,14 +831,12 @@ func (t *QueueManager) calculateDesiredShards() int {
 		return t.numShards
 	}
 	// When behind we will try to catch up on a proporation of samples per tick.
 	// This works similarly to an integral accumulator in that pending samples
 	// is the result of the error integral.
 	const integralGain = 0.1 / float64(shardUpdateDuration/time.Second)
 	var (
 		// When behind we will try to catch up on 5% of samples per second.
 		backlogCatchup = 0.05 * dataPending
 		// Calculate Time to send one sample, averaged across all sends done this tick.
 		timePerSample = dataOutDuration / dataOutRate
-		desiredShards = timePerSample * (dataInRate*dataKeptRatio + integralGain*dataPending)
+		desiredShards = timePerSample * (dataInRate*dataKeptRatio + backlogCatchup)
 	)
 	t.metrics.desiredNumShards.Set(desiredShards)
 	level.Debug(t.logger).Log("msg", "QueueManager.calculateDesiredShards",
--- a/storage/remote/queue_manager_test.go
+++ b/storage/remote/queue_manager_test.go
@ -990,25 +990,25 @@ func TestCalculateDesiredShardsDetail(t *testing.T) {
 			dataOut:         10,
 			dataOutDuration: 2,
 			backlog:         20,
-			expectedShards:  2, // ? should be trying to catch up
+			expectedShards:  4,
 		},
 		{
 			name:            "backlogged 90s",
-			prevShards:      2,
+			prevShards:      4,
 			dataIn:          10,
 			dataOut:         10,
-			dataOutDuration: 2,
+			dataOutDuration: 4,
 			backlog:         90,
-			expectedShards:  4, // ?! should be trying to go much faster
+			expectedShards:  22,
 		},
 		{
 			name:            "backlog reduced",
-			prevShards:      4,
+			prevShards:      22,
 			dataIn:          10,
 			dataOut:         20,
 			dataOutDuration: 4,
 			backlog:         10,
-			expectedShards:  3, // ?! shouldn't downshard from 4 to 3: less than 30% change
+			expectedShards:  3,
 		},
 		{
 			name:            "backlog eliminated",