tsdb: Replay m-map chunk only when required

M-map chunks replayed on startup are discarded if there
was no WAL and no snapshot loaded, because there is no
series created in the Head that it can map to. So only
load m-map chunks from disk if there is either a snapshot
loaded or there is WAL on disk.

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
This commit is contained in:
Ganesh Vernekar 2023-03-13 11:15:45 +05:30
parent 6c008ec56a
commit c9d06f2826
No known key found for this signature in database
GPG key ID: F056451B52F1DC34

View file

@ -590,6 +590,7 @@ func (h *Head) Init(minValidTime int64) error {
snapIdx, snapOffset := -1, 0 snapIdx, snapOffset := -1, 0
refSeries := make(map[chunks.HeadSeriesRef]*memSeries) refSeries := make(map[chunks.HeadSeriesRef]*memSeries)
snapshotLoaded := false
if h.opts.EnableMemorySnapshotOnShutdown { if h.opts.EnableMemorySnapshotOnShutdown {
level.Info(h.logger).Log("msg", "Chunk snapshot is enabled, replaying from the snapshot") level.Info(h.logger).Log("msg", "Chunk snapshot is enabled, replaying from the snapshot")
// If there are any WAL files, there should be at least one WAL file with an index that is current or newer // If there are any WAL files, there should be at least one WAL file with an index that is current or newer
@ -619,6 +620,7 @@ func (h *Head) Init(minValidTime int64) error {
var err error var err error
snapIdx, snapOffset, refSeries, err = h.loadChunkSnapshot() snapIdx, snapOffset, refSeries, err = h.loadChunkSnapshot()
if err == nil { if err == nil {
snapshotLoaded = true
level.Info(h.logger).Log("msg", "Chunk snapshot loading time", "duration", time.Since(start).String()) level.Info(h.logger).Log("msg", "Chunk snapshot loading time", "duration", time.Since(start).String())
} }
if err != nil { if err != nil {
@ -636,26 +638,36 @@ func (h *Head) Init(minValidTime int64) error {
} }
mmapChunkReplayStart := time.Now() mmapChunkReplayStart := time.Now()
mmappedChunks, oooMmappedChunks, lastMmapRef, err := h.loadMmappedChunks(refSeries) var (
if err != nil { mmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk
// TODO(codesome): clear out all m-map chunks here for refSeries. oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk
level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err) lastMmapRef chunks.ChunkDiskMapperRef
if _, ok := errors.Cause(err).(*chunks.CorruptionErr); ok { err error
h.metrics.mmapChunkCorruptionTotal.Inc() )
} if snapshotLoaded || h.wal != nil {
// If snapshot was not loaded and if there is no WAL, then m-map chunks will be discarded
// Discard snapshot data since we need to replay the WAL for the missed m-map chunks data. // anyway. So we only load m-map chunks when it won't be discarded.
snapIdx, snapOffset = -1, 0 mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.loadMmappedChunks(refSeries)
// If this fails, data will be recovered from WAL.
// Hence we wont lose any data (given WAL is not corrupt).
mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.removeCorruptedMmappedChunks(err)
if err != nil { if err != nil {
return err // TODO(codesome): clear out all m-map chunks here for refSeries.
level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err)
if _, ok := errors.Cause(err).(*chunks.CorruptionErr); ok {
h.metrics.mmapChunkCorruptionTotal.Inc()
}
// Discard snapshot data since we need to replay the WAL for the missed m-map chunks data.
snapIdx, snapOffset = -1, 0
// If this fails, data will be recovered from WAL.
// Hence we wont lose any data (given WAL is not corrupt).
mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.removeCorruptedMmappedChunks(err)
if err != nil {
return err
}
} }
level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", time.Since(mmapChunkReplayStart).String())
} }
level.Info(h.logger).Log("msg", "On-disk memory mappable chunks replay completed", "duration", time.Since(mmapChunkReplayStart).String())
if h.wal == nil { if h.wal == nil {
level.Info(h.logger).Log("msg", "WAL not found") level.Info(h.logger).Log("msg", "WAL not found")
return nil return nil