remote_write: shard up more when backlogged

Change the coefficient from 1% to 5%, so instead of targetting to clear
the backlog in 100s we target 20s.

Update unit test to reflect the new behaviour.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
This commit is contained in:
Bryan Boreham 2021-08-29 18:11:13 +01:00
parent d588b14d9c
commit 6d01ce8c4d
2 changed files with 10 additions and 12 deletions

View file

@ -831,14 +831,12 @@ func (t *QueueManager) calculateDesiredShards() int {
return t.numShards
}
// When behind we will try to catch up on a proporation of samples per tick.
// This works similarly to an integral accumulator in that pending samples
// is the result of the error integral.
const integralGain = 0.1 / float64(shardUpdateDuration/time.Second)
var (
// When behind we will try to catch up on 5% of samples per second.
backlogCatchup = 0.05 * dataPending
// Calculate Time to send one sample, averaged across all sends done this tick.
timePerSample = dataOutDuration / dataOutRate
desiredShards = timePerSample * (dataInRate*dataKeptRatio + integralGain*dataPending)
desiredShards = timePerSample * (dataInRate*dataKeptRatio + backlogCatchup)
)
t.metrics.desiredNumShards.Set(desiredShards)
level.Debug(t.logger).Log("msg", "QueueManager.calculateDesiredShards",

View file

@ -990,25 +990,25 @@ func TestCalculateDesiredShardsDetail(t *testing.T) {
dataOut: 10,
dataOutDuration: 2,
backlog: 20,
expectedShards: 2, // ? should be trying to catch up
expectedShards: 4,
},
{
name: "backlogged 90s",
prevShards: 2,
prevShards: 4,
dataIn: 10,
dataOut: 10,
dataOutDuration: 2,
dataOutDuration: 4,
backlog: 90,
expectedShards: 4, // ?! should be trying to go much faster
expectedShards: 22,
},
{
name: "backlog reduced",
prevShards: 4,
prevShards: 22,
dataIn: 10,
dataOut: 20,
dataOutDuration: 4,
backlog: 10,
expectedShards: 3, // ?! shouldn't downshard from 4 to 3: less than 30% change
expectedShards: 3,
},
{
name: "backlog eliminated",