diff --git a/tsdb/db.go b/tsdb/db.go index 2d35e3fb00..ea8cbfff0b 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -992,9 +992,14 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn db.metrics.maxBytes.Set(float64(maxBytes)) db.metrics.retentionDuration.Set((time.Duration(opts.RetentionDuration) * time.Millisecond).Seconds()) + // Calling db.reload() calls db.reloadBlocks() which requires cmtx to be locked. + db.cmtx.Lock() if err := db.reload(); err != nil { + db.cmtx.Unlock() return nil, err } + db.cmtx.Unlock() + // Set the min valid time for the ingested samples // to be no lower than the maxt of the last block. minValidTime := int64(math.MinInt64) @@ -1568,13 +1573,9 @@ func (db *DB) reloadBlocks() (err error) { db.metrics.reloads.Inc() }() - // Now that we reload TSDB every minute, there is a high chance for a race condition with a reload - // triggered by CleanTombstones(). We need to lock the reload to avoid the situation where - // a normal reload and CleanTombstones try to delete the same block. - db.mtx.Lock() - defer db.mtx.Unlock() - + db.mtx.RLock() loadable, corrupted, err := openBlocks(db.logger, db.dir, db.blocks, db.chunkPool, db.opts.PostingsDecoderFactory) + db.mtx.RUnlock() if err != nil { return err } @@ -1643,8 +1644,10 @@ func (db *DB) reloadBlocks() (err error) { }) // Swap new blocks first for subsequently created readers to be seen. + db.mtx.Lock() oldBlocks := db.blocks db.blocks = toLoad + db.mtx.Unlock() // Only check overlapping blocks when overlapping compaction is enabled. if db.opts.EnableOverlappingCompaction {