Remove some 'global' state, moving segment numbers to parameters.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
Tom Wilkie 2019-02-13 17:06:03 +00:00 committed by Tom Wilkie
parent bdc6b764b0
commit 859cda27ff

View file

@ -139,9 +139,7 @@ type WALWatcher struct {
logger log.Logger logger log.Logger
walDir string walDir string
currentSegment int startTime int64
lastCheckpoint string
startTime int64
samplesReadMetric prometheus.Counter samplesReadMetric prometheus.Counter
seriesReadMetric prometheus.Counter seriesReadMetric prometheus.Counter
@ -153,6 +151,7 @@ type WALWatcher struct {
currentSegmentMetric prometheus.Gauge currentSegmentMetric prometheus.Gauge
quit chan struct{} quit chan struct{}
done chan struct{}
} }
// NewWALWatcher creates a new WAL watcher for a given WriteTo. // NewWALWatcher creates a new WAL watcher for a given WriteTo.
@ -167,6 +166,7 @@ func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string
startTime: startTime, startTime: startTime,
name: name, name: name,
quit: make(chan struct{}), quit: make(chan struct{}),
done: make(chan struct{}),
} }
w.samplesReadMetric = watcherSamplesRecordsRead.WithLabelValues(w.name) w.samplesReadMetric = watcherSamplesRecordsRead.WithLabelValues(w.name)
@ -189,11 +189,14 @@ func (w *WALWatcher) Start() {
func (w *WALWatcher) Stop() { func (w *WALWatcher) Stop() {
level.Info(w.logger).Log("msg", "stopping WAL watcher", "queue", w.name) level.Info(w.logger).Log("msg", "stopping WAL watcher", "queue", w.name)
close(w.quit) close(w.quit)
<-w.done
level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name)
} }
func (w *WALWatcher) loop() { func (w *WALWatcher) loop() {
// We may encourter failures processing the WAL; we should wait and retry. defer close(w.done)
// We may encourter failures processing the WAL; we should wait and retry.
for { for {
if err := w.run(); err != nil { if err := w.run(); err != nil {
level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
@ -213,39 +216,42 @@ func (w *WALWatcher) run() error {
return errors.Wrap(err, "wal.New") return errors.Wrap(err, "wal.New")
} }
// Backfill from the checkpoint first if it exists. _, last, err := nw.Segments()
var nextIndex int
w.lastCheckpoint, nextIndex, err = tsdb.LastCheckpoint(w.walDir)
if err != nil && err != tsdb.ErrNotFound {
return err
}
level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", w.lastCheckpoint, "startFrom", nextIndex)
if err == nil {
if err = w.readCheckpoint(w.lastCheckpoint); err != nil {
return err
}
}
w.currentSegment, err = w.findSegmentForIndex(nextIndex)
if err != nil { if err != nil {
return err return err
} }
level.Debug(w.logger).Log("msg", "starting from", "currentSegment", w.currentSegment) // Backfill from the checkpoint first if it exists.
lastCheckpoint, nextIndex, err := tsdb.LastCheckpoint(w.walDir)
if err != nil && err != tsdb.ErrNotFound {
return err
}
level.Info(w.logger).Log("msg", "reading checkpoint", "dir", lastCheckpoint, "startFrom", nextIndex)
if err == nil {
if err = w.readCheckpoint(lastCheckpoint); err != nil {
return err
}
}
currentSegment, err := w.findSegmentForIndex(nextIndex)
if err != nil {
return err
}
level.Info(w.logger).Log("msg", "starting from", "currentSegment", currentSegment, "last", last)
for { for {
w.currentSegmentMetric.Set(float64(w.currentSegment)) w.currentSegmentMetric.Set(float64(currentSegment))
level.Info(w.logger).Log("msg", "process segment", "segment", w.currentSegment) level.Info(w.logger).Log("msg", "process segment", "segment", currentSegment)
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment. // On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment. // On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
if err := w.watch(nw, w.currentSegment, true); err != nil { if err := w.watch(nw, currentSegment, currentSegment >= last); err != nil {
level.Error(w.logger).Log("msg", "runWatcher is ending", "err", err) level.Error(w.logger).Log("msg", "runWatcher is ending", "err", err)
return err return err
} }
w.currentSegment++ currentSegment++
} }
} }
@ -307,9 +313,9 @@ func (w *WALWatcher) watch(wl *wal.WAL, segmentNum int, tail bool) error {
segmentTicker.Stop() segmentTicker.Stop()
checkpointTicker.Stop() checkpointTicker.Stop()
var err error var err error
size, err = getSegmentSize(w.walDir, w.currentSegment) size, err = getSegmentSize(w.walDir, segmentNum)
if err != nil { if err != nil {
level.Error(w.logger).Log("msg", "error getting segment size", "segment", w.currentSegment) level.Error(w.logger).Log("msg", "error getting segment size", "segment", segmentNum)
return errors.Wrap(err, "get segment size") return errors.Wrap(err, "get segment size")
} }
} }
@ -331,31 +337,25 @@ func (w *WALWatcher) watch(wl *wal.WAL, segmentNum int, tail bool) error {
continue continue
} }
if dir == w.lastCheckpoint { index, err := checkpointNum(dir)
continue
}
level.Info(w.logger).Log("msg", "new checkpoint detected", "last", w.lastCheckpoint, "new", dir)
d, err := checkpointNum(dir)
if err != nil { if err != nil {
level.Error(w.logger).Log("msg", "error parsing checkpoint", "err", err) level.Error(w.logger).Log("msg", "error parsing checkpoint", "err", err)
continue continue
} }
if d >= w.currentSegment { if index >= segmentNum {
level.Info(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", w.currentSegment), "checkpoint", dir) level.Info(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir)
continue continue
} }
w.lastCheckpoint = dir level.Info(w.logger).Log("msg", "new checkpoint detected", "new", dir, "currentSegment", segmentNum)
// This potentially takes a long time, should we run it in another go routine? // This potentially takes a long time, should we run it in another go routine?
err = w.readCheckpoint(w.lastCheckpoint) err = w.readCheckpoint(dir)
if err != nil { if err != nil {
level.Error(w.logger).Log("err", err) level.Error(w.logger).Log("err", err)
} }
// Clear series with a checkpoint or segment index # lower than the checkpoint we just read. // Clear series with a checkpoint or segment index # lower than the checkpoint we just read.
w.writer.SeriesReset(d) w.writer.SeriesReset(index)
case <-segmentTicker.C: case <-segmentTicker.C:
_, last, err := wl.Segments() _, last, err := wl.Segments()
@ -364,35 +364,35 @@ func (w *WALWatcher) watch(wl *wal.WAL, segmentNum int, tail bool) error {
} }
// Check if new segments exists. // Check if new segments exists.
if last <= w.currentSegment { if last <= segmentNum {
continue continue
} }
if err := w.readSegment(reader); err != nil { if err := w.readSegment(reader, segmentNum); err != nil {
// Ignore errors reading to end of segment, as we're going to move to // Ignore errors reading to end of segment, as we're going to move to
// next segment now. // next segment now.
level.Error(w.logger).Log("msg", "error reading to end of segment", "err", err) level.Error(w.logger).Log("msg", "error reading to end of segment", "err", err)
} }
level.Info(w.logger).Log("msg", "a new segment exists, we should start reading it", "current", fmt.Sprintf("%08d", w.currentSegment), "new", fmt.Sprintf("%08d", last)) level.Info(w.logger).Log("msg", "a new segment exists, we should start reading it", "current", fmt.Sprintf("%08d", segmentNum), "new", fmt.Sprintf("%08d", last))
return nil return nil
case <-readTicker.C: case <-readTicker.C:
if err := w.readSegment(reader); err != nil && err != io.EOF { if err := w.readSegment(reader, segmentNum); err != nil && err != io.EOF {
level.Error(w.logger).Log("err", err) level.Error(w.logger).Log("err", err)
return err return err
} }
if reader.TotalRead() >= size && !tail { if reader.TotalRead() >= size && !tail {
level.Info(w.logger).Log("msg", "done replaying segment", "segment", w.currentSegment, "size", size, "read", reader.TotalRead()) level.Info(w.logger).Log("msg", "done replaying segment", "segment", segmentNum, "size", size, "read", reader.TotalRead())
return nil return nil
} }
} }
} }
} }
func (w *WALWatcher) readSegment(r *wal.LiveReader) error { func (w *WALWatcher) readSegment(r *wal.LiveReader, segmentNum int) error {
for r.Next() && !isClosed(w.quit) { for r.Next() && !isClosed(w.quit) {
err := w.decodeRecord(r.Record()) err := w.decodeRecord(r.Record(), segmentNum)
// Intentionally skip over record decode errors. // Intentionally skip over record decode errors.
if err != nil { if err != nil {
@ -402,7 +402,7 @@ func (w *WALWatcher) readSegment(r *wal.LiveReader) error {
return r.Err() return r.Err()
} }
func (w *WALWatcher) decodeRecord(rec []byte) error { func (w *WALWatcher) decodeRecord(rec []byte, segmentNum int) error {
var ( var (
dec tsdb.RecordDecoder dec tsdb.RecordDecoder
series []tsdb.RefSeries series []tsdb.RefSeries
@ -416,7 +416,7 @@ func (w *WALWatcher) decodeRecord(rec []byte) error {
return err return err
} }
w.seriesReadMetric.Add(float64(len(series))) w.seriesReadMetric.Add(float64(len(series)))
w.writer.StoreSeries(series, w.currentSegment) w.writer.StoreSeries(series, segmentNum)
case tsdb.RecordSamples: case tsdb.RecordSamples:
samples, err := dec.Samples(rec, samples[:0]) samples, err := dec.Samples(rec, samples[:0])
@ -455,6 +455,11 @@ func (w *WALWatcher) decodeRecord(rec []byte) error {
// Read all the series records from a Checkpoint directory. // Read all the series records from a Checkpoint directory.
func (w *WALWatcher) readCheckpoint(checkpointDir string) error { func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
level.Info(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir) level.Info(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir)
index, err := checkpointNum(checkpointDir)
if err != nil {
return err
}
sr, err := wal.NewSegmentsReader(checkpointDir) sr, err := wal.NewSegmentsReader(checkpointDir)
if err != nil { if err != nil {
return errors.Wrap(err, "open checkpoint") return errors.Wrap(err, "open checkpoint")
@ -469,7 +474,7 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
// w.readSeriesRecords(wal.NewLiveReader(sr), i, size) // w.readSeriesRecords(wal.NewLiveReader(sr), i, size)
r := wal.NewLiveReader(sr) r := wal.NewLiveReader(sr)
if err := w.readSegment(r); err != nil { if err := w.readSegment(r, index); err != nil {
return errors.Wrap(err, "readSegment") return errors.Wrap(err, "readSegment")
} }