[tsdb] re-implement WAL watcher to read via a "notification" channel (#11949)

* WIP implement WAL watcher reading via notifications over a channel from
the TSDB code

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Notify via head appenders Commit (finished all WAL logging) rather than
on each WAL Log call

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix misspelled Notify plus add a metric for dropped Write notifications

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Update tests to handle new notification pattern

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* this test maybe needs more time on windows?

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* does this test need more time on windows as well?

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* read timeout is already a time.Duration

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* remove mistakenly commited benchmark data files

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* address some review feedback

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* fix missed changes from previous commit

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Fix issues from wrapper function

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* try fixing race condition in test by allowing tests to overwrite the
read ticker timeout instead of calling the Notify function

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* fix linting

Signed-off-by: Callum Styan <callumstyan@gmail.com>

---------

Signed-off-by: Callum Styan <callumstyan@gmail.com>
This commit is contained in:
Callum Styan 2023-05-15 12:31:49 -07:00 committed by GitHub
parent b727e69b76
commit 0d2108ad79
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 128 additions and 40 deletions

View file

@ -1053,6 +1053,7 @@ func main() {
startTimeMargin := int64(2 * time.Duration(cfg.tsdb.MinBlockDuration).Seconds() * 1000)
localStorage.Set(db, startTimeMargin)
db.SetWriteNotified(remoteStorage)
close(dbOpen)
<-cancel
return nil

View file

@ -76,6 +76,13 @@ func NewStorage(l log.Logger, reg prometheus.Registerer, stCallback startTimeCal
return s
}
func (s *Storage) Notify() {
for _, q := range s.rws.queues {
// These should all be non blocking
q.watcher.Notify()
}
}
// ApplyConfig updates the state as the new config requires.
func (s *Storage) ApplyConfig(conf *config.Config) error {
s.mtx.Lock()

View file

@ -229,6 +229,8 @@ type DB struct {
// out-of-order compaction and vertical queries.
oooWasEnabled atomic.Bool
writeNotified wlog.WriteNotified
registerer prometheus.Registerer
}
@ -802,6 +804,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
if err != nil {
return nil, err
}
db.head.writeNotified = db.writeNotified
// Register metrics after assigning the head block.
db.metrics = newDBMetrics(db, r)
@ -2016,6 +2019,12 @@ func (db *DB) CleanTombstones() (err error) {
return nil
}
func (db *DB) SetWriteNotified(wn wlog.WriteNotified) {
db.writeNotified = wn
// It's possible we already created the head struct, so we should also set the WN for that.
db.head.writeNotified = wn
}
func isBlockDir(fi fs.DirEntry) bool {
if !fi.IsDir() {
return false

View file

@ -121,6 +121,8 @@ type Head struct {
stats *HeadStats
reg prometheus.Registerer
writeNotified wlog.WriteNotified
memTruncationInProcess atomic.Bool
}

View file

@ -842,6 +842,10 @@ func (a *headAppender) Commit() (err error) {
return errors.Wrap(err, "write to WAL")
}
if a.head.writeNotified != nil {
a.head.writeNotified.Notify()
}
// No errors logging to WAL, so pass the exemplars along to the in memory storage.
for _, e := range a.exemplars {
s := a.head.series.getByID(chunks.HeadSeriesRef(e.ref))

View file

@ -34,12 +34,16 @@ import (
)
const (
readPeriod = 10 * time.Millisecond
checkpointPeriod = 5 * time.Second
segmentCheckPeriod = 100 * time.Millisecond
consumer = "consumer"
)
var (
ErrIgnorable = errors.New("ignore me")
readTimeout = 15 * time.Second
)
// WriteTo is an interface used by the Watcher to send the samples it's read
// from the WAL on to somewhere else. Functions will be called concurrently
// and it is left to the implementer to make sure they are safe.
@ -61,11 +65,17 @@ type WriteTo interface {
SeriesReset(int)
}
// Used to notifier the watcher that data has been written so that it can read.
type WriteNotified interface {
Notify()
}
type WatcherMetrics struct {
recordsRead *prometheus.CounterVec
recordDecodeFails *prometheus.CounterVec
samplesSentPreTailing *prometheus.CounterVec
currentSegment *prometheus.GaugeVec
notificationsSkipped *prometheus.CounterVec
}
// Watcher watches the TSDB WAL for a given WriteTo.
@ -88,9 +98,11 @@ type Watcher struct {
recordDecodeFailsMetric prometheus.Counter
samplesSentPreTailing prometheus.Counter
currentSegmentMetric prometheus.Gauge
notificationsSkipped prometheus.Counter
quit chan struct{}
done chan struct{}
readNotify chan struct{}
quit chan struct{}
done chan struct{}
// For testing, stop when we hit this segment.
MaxSegment int
@ -134,6 +146,15 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics {
},
[]string{consumer},
),
notificationsSkipped: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "prometheus",
Subsystem: "wal_watcher",
Name: "notifications_skipped_total",
Help: "The number of WAL write notifications that the Watcher has skipped due to already being in a WAL read routine.",
},
[]string{consumer},
),
}
if reg != nil {
@ -141,6 +162,7 @@ func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics {
reg.MustRegister(m.recordDecodeFails)
reg.MustRegister(m.samplesSentPreTailing)
reg.MustRegister(m.currentSegment)
reg.MustRegister(m.notificationsSkipped)
}
return m
@ -161,13 +183,25 @@ func NewWatcher(metrics *WatcherMetrics, readerMetrics *LiveReaderMetrics, logge
sendExemplars: sendExemplars,
sendHistograms: sendHistograms,
quit: make(chan struct{}),
done: make(chan struct{}),
readNotify: make(chan struct{}),
quit: make(chan struct{}),
done: make(chan struct{}),
MaxSegment: -1,
}
}
func (w *Watcher) Notify() {
select {
case w.readNotify <- struct{}{}:
return
default: // default so we can exit
// we don't need a buffered channel or any buffering since
// for each notification it recv's the watcher will read until EOF
w.notificationsSkipped.Inc()
}
}
func (w *Watcher) setMetrics() {
// Setup the WAL Watchers metrics. We do this here rather than in the
// constructor because of the ordering of creating Queue Managers's,
@ -177,6 +211,8 @@ func (w *Watcher) setMetrics() {
w.recordDecodeFailsMetric = w.metrics.recordDecodeFails.WithLabelValues(w.name)
w.samplesSentPreTailing = w.metrics.samplesSentPreTailing.WithLabelValues(w.name)
w.currentSegmentMetric = w.metrics.currentSegment.WithLabelValues(w.name)
w.notificationsSkipped = w.metrics.notificationsSkipped.WithLabelValues(w.name)
}
}
@ -262,7 +298,7 @@ func (w *Watcher) Run() error {
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil {
if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil && !errors.Is(err, ErrIgnorable) {
return err
}
@ -330,6 +366,26 @@ func (w *Watcher) segments(dir string) ([]int, error) {
return refs, nil
}
func (w *Watcher) readAndHandleError(r *LiveReader, segmentNum int, tail bool, size int64) error {
err := w.readSegment(r, segmentNum, tail)
// Ignore all errors reading to end of segment whilst replaying the WAL.
if !tail {
if err != nil && errors.Cause(err) != io.EOF {
level.Warn(w.logger).Log("msg", "Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err)
} else if r.Offset() != size {
level.Warn(w.logger).Log("msg", "Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", r.Offset(), "size", size)
}
return ErrIgnorable
}
// Otherwise, when we are tailing, non-EOFs are fatal.
if errors.Cause(err) != io.EOF {
return err
}
return nil
}
// Use tail true to indicate that the reader is currently on a segment that is
// actively being written to. If false, assume it's a full segment and we're
// replaying it on start to cache the series records.
@ -342,7 +398,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error {
reader := NewLiveReader(w.logger, w.readerMetrics, segment)
readTicker := time.NewTicker(readPeriod)
readTicker := time.NewTicker(readTimeout)
defer readTicker.Stop()
checkpointTicker := time.NewTicker(checkpointPeriod)
@ -400,7 +456,6 @@ func (w *Watcher) watch(segmentNum int, tail bool) error {
if last <= segmentNum {
continue
}
err = w.readSegment(reader, segmentNum, tail)
// Ignore errors reading to end of segment whilst replaying the WAL.
@ -421,24 +476,23 @@ func (w *Watcher) watch(segmentNum int, tail bool) error {
return nil
// we haven't read due to a notification in quite some time, try reading anyways
case <-readTicker.C:
err = w.readSegment(reader, segmentNum, tail)
// Ignore all errors reading to end of segment whilst replaying the WAL.
if !tail {
switch {
case err != nil && errors.Cause(err) != io.EOF:
level.Warn(w.logger).Log("msg", "Ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err)
case reader.Offset() != size:
level.Warn(w.logger).Log("msg", "Expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", reader.Offset(), "size", size)
}
return nil
}
// Otherwise, when we are tailing, non-EOFs are fatal.
if errors.Cause(err) != io.EOF {
level.Debug(w.logger).Log("msg", "Watcher is reading the WAL due to timeout, haven't received any write notifications recently", "timeout", readTimeout)
err := w.readAndHandleError(reader, segmentNum, tail, size)
if err != nil {
return err
}
// still want to reset the ticker so we don't read too often
readTicker.Reset(readTimeout)
case <-w.readNotify:
err := w.readAndHandleError(reader, segmentNum, tail, size)
if err != nil {
return err
}
// still want to reset the ticker so we don't read too often
readTicker.Reset(readTimeout)
}
}
}

View file

@ -104,7 +104,7 @@ func (wtm *writeToMock) SeriesReset(index int) {
}
}
func (wtm *writeToMock) checkNumLabels() int {
func (wtm *writeToMock) checkNumSeries() int {
wtm.seriesLock.Lock()
defer wtm.seriesLock.Unlock()
return len(wtm.seriesSegmentIndexes)
@ -230,9 +230,9 @@ func TestTailSamples(t *testing.T) {
expectedExemplars := seriesCount * exemplarsCount
expectedHistograms := seriesCount * histogramsCount
retry(t, defaultRetryInterval, defaultRetries, func() bool {
return wt.checkNumLabels() >= expectedSeries
return wt.checkNumSeries() >= expectedSeries
})
require.Equal(t, expectedSeries, wt.checkNumLabels(), "did not receive the expected number of series")
require.Equal(t, expectedSeries, wt.checkNumSeries(), "did not receive the expected number of series")
require.Equal(t, expectedSamples, wt.samplesAppended, "did not receive the expected number of samples")
require.Equal(t, expectedExemplars, wt.exemplarsAppended, "did not receive the expected number of exemplars")
require.Equal(t, expectedHistograms, wt.histogramsAppended, "did not receive the expected number of histograms")
@ -290,7 +290,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
}
}
require.NoError(t, w.Log(recs...))
readTimeout = time.Second
_, _, err = Segments(w.Dir())
require.NoError(t, err)
@ -299,11 +299,10 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
go watcher.Start()
expected := seriesCount
retry(t, defaultRetryInterval, defaultRetries, func() bool {
return wt.checkNumLabels() >= expected
})
require.Eventually(t, func() bool {
return wt.checkNumSeries() == expected
}, 20*time.Second, 1*time.Second)
watcher.Stop()
require.Equal(t, expected, wt.checkNumLabels())
})
}
}
@ -383,16 +382,17 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
_, _, err = Segments(w.Dir())
require.NoError(t, err)
readTimeout = time.Second
wt := newWriteToMock()
watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false)
go watcher.Start()
expected := seriesCount * 2
retry(t, defaultRetryInterval, defaultRetries, func() bool {
return wt.checkNumLabels() >= expected
})
require.Eventually(t, func() bool {
return wt.checkNumSeries() == expected
}, 10*time.Second, 1*time.Second)
watcher.Stop()
require.Equal(t, expected, wt.checkNumLabels())
})
}
}
@ -460,10 +460,10 @@ func TestReadCheckpoint(t *testing.T) {
expectedSeries := seriesCount
retry(t, defaultRetryInterval, defaultRetries, func() bool {
return wt.checkNumLabels() >= expectedSeries
return wt.checkNumSeries() >= expectedSeries
})
watcher.Stop()
require.Equal(t, expectedSeries, wt.checkNumLabels())
require.Equal(t, expectedSeries, wt.checkNumSeries())
})
}
}
@ -595,6 +595,7 @@ func TestCheckpointSeriesReset(t *testing.T) {
_, _, err = Segments(w.Dir())
require.NoError(t, err)
readTimeout = time.Second
wt := newWriteToMock()
watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false)
watcher.MaxSegment = -1
@ -602,9 +603,11 @@ func TestCheckpointSeriesReset(t *testing.T) {
expected := seriesCount
retry(t, defaultRetryInterval, defaultRetries, func() bool {
return wt.checkNumLabels() >= expected
return wt.checkNumSeries() >= expected
})
require.Equal(t, seriesCount, wt.checkNumLabels())
require.Eventually(t, func() bool {
return wt.checkNumSeries() == seriesCount
}, 10*time.Second, 1*time.Second)
_, err = Checkpoint(log.NewNopLogger(), w, 2, 4, func(x chunks.HeadSeriesRef) bool { return true }, 0)
require.NoError(t, err)
@ -621,7 +624,9 @@ func TestCheckpointSeriesReset(t *testing.T) {
// If you modify the checkpoint and truncate segment #'s run the test to see how
// many series records you end up with and change the last Equals check accordingly
// or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10)
require.Equal(t, tc.segments, wt.checkNumLabels())
require.Eventually(t, func() bool {
return wt.checkNumSeries() == tc.segments
}, 20*time.Second, 1*time.Second)
})
}
}

View file

@ -188,6 +188,8 @@ type WL struct {
compress bool
snappyBuf []byte
WriteNotified WriteNotified
metrics *wlMetrics
}
@ -343,6 +345,10 @@ func (w *WL) Dir() string {
return w.dir
}
func (w *WL) SetWriteNotified(wn WriteNotified) {
w.WriteNotified = wn
}
func (w *WL) run() {
Loop:
for {