Enhanced WAL replay for duplicate series record (#7438)

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
This commit is contained in:
Ganesh Vernekar 2021-08-03 20:03:54 +05:30 committed by GitHub
parent 8002a3ab80
commit 848cb5a6d6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 22 deletions

View file

@ -1466,7 +1466,11 @@ type memChunk struct {
// OverlapsClosedInterval returns true if the chunk overlaps [mint, maxt]. // OverlapsClosedInterval returns true if the chunk overlaps [mint, maxt].
func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool { func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool {
return mc.minTime <= maxt && mint <= mc.maxTime return overlapsClosedInterval(mc.minTime, mc.maxTime, mint, maxt)
}
func overlapsClosedInterval(mint1, maxt1, mint2, maxt2 int64) bool {
return mint1 <= maxt2 && mint2 <= maxt1
} }
type mmappedChunk struct { type mmappedChunk struct {
@ -1477,7 +1481,7 @@ type mmappedChunk struct {
// Returns true if the chunk overlaps [mint, maxt]. // Returns true if the chunk overlaps [mint, maxt].
func (mc *mmappedChunk) OverlapsClosedInterval(mint, maxt int64) bool { func (mc *mmappedChunk) OverlapsClosedInterval(mint, maxt int64) bool {
return mc.minTime <= maxt && mint <= mc.maxTime return overlapsClosedInterval(mc.minTime, mc.maxTime, mint, maxt)
} }
type noopSeriesLifecycleCallback struct{} type noopSeriesLifecycleCallback struct{}

View file

@ -306,7 +306,9 @@ func TestHead_ReadWAL(t *testing.T) {
} }
require.Equal(t, []sample{{100, 2}, {101, 5}}, expandChunk(s10.iterator(0, nil, head.chunkDiskMapper, nil))) require.Equal(t, []sample{{100, 2}, {101, 5}}, expandChunk(s10.iterator(0, nil, head.chunkDiskMapper, nil)))
require.Equal(t, []sample{{101, 6}}, expandChunk(s50.iterator(0, nil, head.chunkDiskMapper, nil))) require.Equal(t, []sample{{101, 6}}, expandChunk(s50.iterator(0, nil, head.chunkDiskMapper, nil)))
require.Equal(t, []sample{{100, 3}, {101, 7}}, expandChunk(s100.iterator(0, nil, head.chunkDiskMapper, nil))) // The samples before the new series record should be discarded since a duplicate record
// is only possible when old samples were compacted.
require.Equal(t, []sample{{101, 7}}, expandChunk(s100.iterator(0, nil, head.chunkDiskMapper, nil)))
q, err := head.ExemplarQuerier(context.Background()) q, err := head.ExemplarQuerier(context.Background())
require.NoError(t, err) require.NoError(t, err)
@ -369,9 +371,9 @@ func TestHead_WALMultiRef(t *testing.T) {
q, err := NewBlockQuerier(head, 0, 2100) q, err := NewBlockQuerier(head, 0, 2100)
require.NoError(t, err) require.NoError(t, err)
series := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")) series := query(t, q, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar"))
// The samples before the new ref should be discarded since Head truncation
// happens only after compacting the Head.
require.Equal(t, map[string][]tsdbutil.Sample{`{foo="bar"}`: { require.Equal(t, map[string][]tsdbutil.Sample{`{foo="bar"}`: {
sample{100, 1},
sample{1500, 2},
sample{1700, 3}, sample{1700, 3},
sample{2000, 4}, sample{2000, 4},
}}, series) }}, series)

View file

@ -18,6 +18,7 @@ import (
"math" "math"
"runtime" "runtime"
"sync" "sync"
"time"
"github.com/go-kit/log/level" "github.com/go-kit/log/level"
"github.com/pkg/errors" "github.com/pkg/errors"
@ -181,39 +182,75 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64, mmappedChunks
} }
}() }()
// The records are always replayed from the oldest to the newest.
Outer: Outer:
for d := range decoded { for d := range decoded {
switch v := d.(type) { switch v := d.(type) {
case []record.RefSeries: case []record.RefSeries:
for _, s := range v { for _, walSeries := range v {
series, created, err := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels) mSeries, created, err := h.getOrCreateWithID(walSeries.Ref, walSeries.Labels.Hash(), walSeries.Labels)
if err != nil { if err != nil {
seriesCreationErr = err seriesCreationErr = err
break Outer break Outer
} }
if created { if h.lastSeriesID.Load() < walSeries.Ref {
// If this series gets a duplicate record, we don't restore its mmapped chunks, h.lastSeriesID.Store(walSeries.Ref)
// and instead restore everything from WAL records. }
series.mmappedChunks = mmappedChunks[series.ref]
mmc := mmappedChunks[walSeries.Ref]
h.metrics.chunks.Add(float64(len(series.mmappedChunks)))
h.metrics.chunksCreated.Add(float64(len(series.mmappedChunks))) if created {
// This is the first WAL series record for this series.
if len(series.mmappedChunks) > 0 { h.metrics.chunksCreated.Add(float64(len(mmc)))
h.updateMinMaxTime(series.minTime(), series.maxTime()) h.metrics.chunks.Add(float64(len(mmc)))
mSeries.mmappedChunks = mmc
continue
} }
} else {
// TODO(codesome) Discard old samples and mmapped chunks and use mmap chunks for the new series ID.
// There's already a different ref for this series. // There's already a different ref for this series.
multiRef[s.Ref] = series.ref // A duplicate series record is only possible when the old samples were already compacted into a block.
// Hence we can discard all the samples and m-mapped chunks replayed till now for this series.
multiRef[walSeries.Ref] = mSeries.ref
idx := mSeries.ref % uint64(n)
// It is possible that some old sample is being processed in processWALSamples that
// could cause race below. So we wait for the goroutine to empty input the buffer and finish
// processing all old samples after emptying the buffer.
inputs[idx] <- []record.RefSample{}
for len(inputs[idx]) != 0 {
time.Sleep(1 * time.Millisecond)
} }
if h.lastSeriesID.Load() < s.Ref { // Checking if the new m-mapped chunks overlap with the already existing ones.
h.lastSeriesID.Store(s.Ref) // This should never happen, but we have a check anyway to detect any
// edge cases that we might have missed.
if len(mSeries.mmappedChunks) > 0 && len(mmc) > 0 {
if overlapsClosedInterval(
mSeries.mmappedChunks[0].minTime,
mSeries.mmappedChunks[len(mSeries.mmappedChunks)-1].maxTime,
mmc[0].minTime,
mmc[len(mmc)-1].maxTime,
) {
// The m-map chunks for the new series ref overlaps with old m-map chunks.
seriesCreationErr = errors.Errorf("overlapping m-mapped chunks for series %s", mSeries.lset.String())
break Outer
} }
} }
// Replacing m-mapped chunks with the new ones (could be empty).
h.metrics.chunksCreated.Add(float64(len(mmc)))
h.metrics.chunksRemoved.Add(float64(len(mSeries.mmappedChunks)))
h.metrics.chunks.Add(float64(len(mmc) - len(mSeries.mmappedChunks)))
mSeries.mmappedChunks = mmc
// Any samples replayed till now would already be compacted. Resetting the head chunk.
mSeries.nextAt = 0
mSeries.headChunk = nil
mSeries.app = nil
h.updateMinMaxTime(mSeries.minTime(), mSeries.maxTime())
}
//nolint:staticcheck // Ignore SA6002 relax staticcheck verification. //nolint:staticcheck // Ignore SA6002 relax staticcheck verification.
seriesPool.Put(v) seriesPool.Put(v)
case []record.RefSample: case []record.RefSample: