mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 22:37:27 -08:00
Fix deadlock when stopping a shard (#10279)
If a queue is stopped and one of its shards happens to hit the batch_send_deadline at the same time a deadlock can occur where stop holds the mutex and will not release it until the send is finished, but the send needs the mutex to retrieve the most recent batch. This is fixed by using a second mutex just for writing. In addition, the test I wrote exposed a case where during shutdown a batch could be sent twice due to concurrent calls to queue.Batch() and queue.FlushAndShutdown(). Protect these with a mutex as well. Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
This commit is contained in:
parent
1b3443ede1
commit
bfb1500a38
|
@ -906,7 +906,8 @@ func (t *QueueManager) newShards() *shards {
|
||||||
}
|
}
|
||||||
|
|
||||||
type shards struct {
|
type shards struct {
|
||||||
mtx sync.RWMutex // With the WAL, this is never actually contended.
|
mtx sync.RWMutex // With the WAL, this is never actually contended.
|
||||||
|
writeMtx sync.Mutex
|
||||||
|
|
||||||
qm *QueueManager
|
qm *QueueManager
|
||||||
queues []*queue
|
queues []*queue
|
||||||
|
@ -949,6 +950,8 @@ func (s *shards) start(n int) {
|
||||||
s.softShutdown = make(chan struct{})
|
s.softShutdown = make(chan struct{})
|
||||||
s.running.Store(int32(n))
|
s.running.Store(int32(n))
|
||||||
s.done = make(chan struct{})
|
s.done = make(chan struct{})
|
||||||
|
s.enqueuedSamples.Store(0)
|
||||||
|
s.enqueuedExemplars.Store(0)
|
||||||
s.samplesDroppedOnHardShutdown.Store(0)
|
s.samplesDroppedOnHardShutdown.Store(0)
|
||||||
s.exemplarsDroppedOnHardShutdown.Store(0)
|
s.exemplarsDroppedOnHardShutdown.Store(0)
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
|
@ -996,6 +999,8 @@ func (s *shards) stop() {
|
||||||
func (s *shards) enqueue(ref chunks.HeadSeriesRef, data sampleOrExemplar) bool {
|
func (s *shards) enqueue(ref chunks.HeadSeriesRef, data sampleOrExemplar) bool {
|
||||||
s.mtx.RLock()
|
s.mtx.RLock()
|
||||||
defer s.mtx.RUnlock()
|
defer s.mtx.RUnlock()
|
||||||
|
s.writeMtx.Lock()
|
||||||
|
defer s.writeMtx.Unlock()
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-s.softShutdown:
|
case <-s.softShutdown:
|
||||||
|
@ -1024,14 +1029,18 @@ func (s *shards) enqueue(ref chunks.HeadSeriesRef, data sampleOrExemplar) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
type queue struct {
|
type queue struct {
|
||||||
|
// batchMtx covers sending to the batchQueue and batch operations other
|
||||||
|
// than appending a sample. It is mainly to make sure (*queue).Batch() and
|
||||||
|
// (*queue).FlushAndShutdown() are not called concurrently.
|
||||||
|
batchMtx sync.Mutex
|
||||||
batch []sampleOrExemplar
|
batch []sampleOrExemplar
|
||||||
batchQueue chan []sampleOrExemplar
|
batchQueue chan []sampleOrExemplar
|
||||||
|
|
||||||
// Since we know there are a limited number of batches out, using a stack
|
// Since we know there are a limited number of batches out, using a stack
|
||||||
// is easy and safe so a sync.Pool is not necessary.
|
// is easy and safe so a sync.Pool is not necessary.
|
||||||
|
// poolMtx covers adding and removing batches from the batchPool.
|
||||||
|
poolMtx sync.Mutex
|
||||||
batchPool [][]sampleOrExemplar
|
batchPool [][]sampleOrExemplar
|
||||||
// This mutex covers adding and removing batches from the batchPool.
|
|
||||||
poolMux sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type sampleOrExemplar struct {
|
type sampleOrExemplar struct {
|
||||||
|
@ -1076,6 +1085,9 @@ func (q *queue) Chan() <-chan []sampleOrExemplar {
|
||||||
// Batch returns the current batch and allocates a new batch. Must not be
|
// Batch returns the current batch and allocates a new batch. Must not be
|
||||||
// called concurrently with Append.
|
// called concurrently with Append.
|
||||||
func (q *queue) Batch() []sampleOrExemplar {
|
func (q *queue) Batch() []sampleOrExemplar {
|
||||||
|
q.batchMtx.Lock()
|
||||||
|
defer q.batchMtx.Unlock()
|
||||||
|
|
||||||
batch := q.batch
|
batch := q.batch
|
||||||
q.batch = q.newBatch(cap(batch))
|
q.batch = q.newBatch(cap(batch))
|
||||||
return batch
|
return batch
|
||||||
|
@ -1083,8 +1095,8 @@ func (q *queue) Batch() []sampleOrExemplar {
|
||||||
|
|
||||||
// ReturnForReuse adds the batch buffer back to the internal pool.
|
// ReturnForReuse adds the batch buffer back to the internal pool.
|
||||||
func (q *queue) ReturnForReuse(batch []sampleOrExemplar) {
|
func (q *queue) ReturnForReuse(batch []sampleOrExemplar) {
|
||||||
q.poolMux.Lock()
|
q.poolMtx.Lock()
|
||||||
defer q.poolMux.Unlock()
|
defer q.poolMtx.Unlock()
|
||||||
if len(q.batchPool) < cap(q.batchPool) {
|
if len(q.batchPool) < cap(q.batchPool) {
|
||||||
q.batchPool = append(q.batchPool, batch[:0])
|
q.batchPool = append(q.batchPool, batch[:0])
|
||||||
}
|
}
|
||||||
|
@ -1093,6 +1105,9 @@ func (q *queue) ReturnForReuse(batch []sampleOrExemplar) {
|
||||||
// FlushAndShutdown stops the queue and flushes any samples. No appends can be
|
// FlushAndShutdown stops the queue and flushes any samples. No appends can be
|
||||||
// made after this is called.
|
// made after this is called.
|
||||||
func (q *queue) FlushAndShutdown(done <-chan struct{}) {
|
func (q *queue) FlushAndShutdown(done <-chan struct{}) {
|
||||||
|
q.batchMtx.Lock()
|
||||||
|
defer q.batchMtx.Unlock()
|
||||||
|
|
||||||
if len(q.batch) > 0 {
|
if len(q.batch) > 0 {
|
||||||
select {
|
select {
|
||||||
case q.batchQueue <- q.batch:
|
case q.batchQueue <- q.batch:
|
||||||
|
@ -1106,8 +1121,8 @@ func (q *queue) FlushAndShutdown(done <-chan struct{}) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (q *queue) newBatch(capacity int) []sampleOrExemplar {
|
func (q *queue) newBatch(capacity int) []sampleOrExemplar {
|
||||||
q.poolMux.Lock()
|
q.poolMtx.Lock()
|
||||||
defer q.poolMux.Unlock()
|
defer q.poolMtx.Unlock()
|
||||||
batches := len(q.batchPool)
|
batches := len(q.batchPool)
|
||||||
if batches > 0 {
|
if batches > 0 {
|
||||||
batch := q.batchPool[batches-1]
|
batch := q.batchPool[batches-1]
|
||||||
|
@ -1186,18 +1201,22 @@ func (s *shards) runShard(ctx context.Context, shardID int, queue *queue) {
|
||||||
timer.Reset(time.Duration(s.qm.cfg.BatchSendDeadline))
|
timer.Reset(time.Duration(s.qm.cfg.BatchSendDeadline))
|
||||||
|
|
||||||
case <-timer.C:
|
case <-timer.C:
|
||||||
// We need to take the lock when getting a batch to avoid
|
// We need to take the write lock when getting a batch to avoid
|
||||||
// concurrent Appends. Generally this will only happen on low
|
// concurrent Appends. Generally this will only happen on low
|
||||||
// traffic instances.
|
// traffic instances or during resharding. We have to use writeMtx
|
||||||
s.mtx.Lock()
|
// and not the batchMtx on a queue because we do not want to have
|
||||||
// First, we need to see if we can happen to get a batch from the queue if it filled while acquiring the lock.
|
// to lock each queue for each sample, and cannot call
|
||||||
|
// queue.Batch() while an Append happens.
|
||||||
|
s.writeMtx.Lock()
|
||||||
|
// First, we need to see if we can happen to get a batch from the
|
||||||
|
// queue if it filled while acquiring the lock.
|
||||||
var batch []sampleOrExemplar
|
var batch []sampleOrExemplar
|
||||||
select {
|
select {
|
||||||
case batch = <-batchQueue:
|
case batch = <-batchQueue:
|
||||||
default:
|
default:
|
||||||
batch = queue.Batch()
|
batch = queue.Batch()
|
||||||
}
|
}
|
||||||
s.mtx.Unlock()
|
s.writeMtx.Unlock()
|
||||||
if len(batch) > 0 {
|
if len(batch) > 0 {
|
||||||
nPendingSamples, nPendingExemplars := s.populateTimeSeries(batch, pendingData)
|
nPendingSamples, nPendingExemplars := s.populateTimeSeries(batch, pendingData)
|
||||||
n := nPendingSamples + nPendingExemplars
|
n := nPendingSamples + nPendingExemplars
|
||||||
|
|
|
@ -382,6 +382,44 @@ func TestReshardRaceWithStop(t *testing.T) {
|
||||||
<-exitCh
|
<-exitCh
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReshardPartialBatch(t *testing.T) {
|
||||||
|
samples, series := createTimeseries(1, 10)
|
||||||
|
|
||||||
|
c := NewTestBlockedWriteClient()
|
||||||
|
|
||||||
|
cfg := config.DefaultQueueConfig
|
||||||
|
mcfg := config.DefaultMetadataConfig
|
||||||
|
cfg.MaxShards = 1
|
||||||
|
batchSendDeadline := time.Millisecond
|
||||||
|
flushDeadline := 10 * time.Millisecond
|
||||||
|
cfg.BatchSendDeadline = model.Duration(batchSendDeadline)
|
||||||
|
|
||||||
|
metrics := newQueueManagerMetrics(nil, "", "")
|
||||||
|
m := NewQueueManager(metrics, nil, nil, nil, t.TempDir(), newEWMARate(ewmaWeight, shardUpdateDuration), cfg, mcfg, nil, nil, c, flushDeadline, newPool(), newHighestTimestampMetric(), nil, false)
|
||||||
|
m.StoreSeries(series, 0)
|
||||||
|
|
||||||
|
m.Start()
|
||||||
|
|
||||||
|
for i := 0; i < 100; i++ {
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
m.Append(samples)
|
||||||
|
time.Sleep(batchSendDeadline)
|
||||||
|
m.shards.stop()
|
||||||
|
m.shards.start(1)
|
||||||
|
done <- struct{}{}
|
||||||
|
}()
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
t.Error("Deadlock between sending and stopping detected")
|
||||||
|
t.FailNow()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We can only call stop if there was not a deadlock.
|
||||||
|
m.Stop()
|
||||||
|
}
|
||||||
|
|
||||||
func TestReleaseNoninternedString(t *testing.T) {
|
func TestReleaseNoninternedString(t *testing.T) {
|
||||||
cfg := config.DefaultQueueConfig
|
cfg := config.DefaultQueueConfig
|
||||||
mcfg := config.DefaultMetadataConfig
|
mcfg := config.DefaultMetadataConfig
|
||||||
|
|
Loading…
Reference in a new issue