mirror of
https://github.com/prometheus/prometheus.git
synced 2024-12-25 13:44:05 -08:00
Remove some 'global' state, moving segment numbers to parameters.
Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
This commit is contained in:
parent
bdc6b764b0
commit
859cda27ff
|
@ -139,9 +139,7 @@ type WALWatcher struct {
|
||||||
logger log.Logger
|
logger log.Logger
|
||||||
walDir string
|
walDir string
|
||||||
|
|
||||||
currentSegment int
|
startTime int64
|
||||||
lastCheckpoint string
|
|
||||||
startTime int64
|
|
||||||
|
|
||||||
samplesReadMetric prometheus.Counter
|
samplesReadMetric prometheus.Counter
|
||||||
seriesReadMetric prometheus.Counter
|
seriesReadMetric prometheus.Counter
|
||||||
|
@ -153,6 +151,7 @@ type WALWatcher struct {
|
||||||
currentSegmentMetric prometheus.Gauge
|
currentSegmentMetric prometheus.Gauge
|
||||||
|
|
||||||
quit chan struct{}
|
quit chan struct{}
|
||||||
|
done chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewWALWatcher creates a new WAL watcher for a given WriteTo.
|
// NewWALWatcher creates a new WAL watcher for a given WriteTo.
|
||||||
|
@ -167,6 +166,7 @@ func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string
|
||||||
startTime: startTime,
|
startTime: startTime,
|
||||||
name: name,
|
name: name,
|
||||||
quit: make(chan struct{}),
|
quit: make(chan struct{}),
|
||||||
|
done: make(chan struct{}),
|
||||||
}
|
}
|
||||||
|
|
||||||
w.samplesReadMetric = watcherSamplesRecordsRead.WithLabelValues(w.name)
|
w.samplesReadMetric = watcherSamplesRecordsRead.WithLabelValues(w.name)
|
||||||
|
@ -189,11 +189,14 @@ func (w *WALWatcher) Start() {
|
||||||
func (w *WALWatcher) Stop() {
|
func (w *WALWatcher) Stop() {
|
||||||
level.Info(w.logger).Log("msg", "stopping WAL watcher", "queue", w.name)
|
level.Info(w.logger).Log("msg", "stopping WAL watcher", "queue", w.name)
|
||||||
close(w.quit)
|
close(w.quit)
|
||||||
|
<-w.done
|
||||||
|
level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *WALWatcher) loop() {
|
func (w *WALWatcher) loop() {
|
||||||
// We may encourter failures processing the WAL; we should wait and retry.
|
defer close(w.done)
|
||||||
|
|
||||||
|
// We may encourter failures processing the WAL; we should wait and retry.
|
||||||
for {
|
for {
|
||||||
if err := w.run(); err != nil {
|
if err := w.run(); err != nil {
|
||||||
level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
|
level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
|
||||||
|
@ -213,39 +216,42 @@ func (w *WALWatcher) run() error {
|
||||||
return errors.Wrap(err, "wal.New")
|
return errors.Wrap(err, "wal.New")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backfill from the checkpoint first if it exists.
|
_, last, err := nw.Segments()
|
||||||
var nextIndex int
|
|
||||||
w.lastCheckpoint, nextIndex, err = tsdb.LastCheckpoint(w.walDir)
|
|
||||||
if err != nil && err != tsdb.ErrNotFound {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", w.lastCheckpoint, "startFrom", nextIndex)
|
|
||||||
if err == nil {
|
|
||||||
if err = w.readCheckpoint(w.lastCheckpoint); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
w.currentSegment, err = w.findSegmentForIndex(nextIndex)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
level.Debug(w.logger).Log("msg", "starting from", "currentSegment", w.currentSegment)
|
// Backfill from the checkpoint first if it exists.
|
||||||
|
lastCheckpoint, nextIndex, err := tsdb.LastCheckpoint(w.walDir)
|
||||||
|
if err != nil && err != tsdb.ErrNotFound {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
level.Info(w.logger).Log("msg", "reading checkpoint", "dir", lastCheckpoint, "startFrom", nextIndex)
|
||||||
|
if err == nil {
|
||||||
|
if err = w.readCheckpoint(lastCheckpoint); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
currentSegment, err := w.findSegmentForIndex(nextIndex)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
level.Info(w.logger).Log("msg", "starting from", "currentSegment", currentSegment, "last", last)
|
||||||
for {
|
for {
|
||||||
w.currentSegmentMetric.Set(float64(w.currentSegment))
|
w.currentSegmentMetric.Set(float64(currentSegment))
|
||||||
level.Info(w.logger).Log("msg", "process segment", "segment", w.currentSegment)
|
level.Info(w.logger).Log("msg", "process segment", "segment", currentSegment)
|
||||||
|
|
||||||
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
|
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
|
||||||
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
|
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
|
||||||
if err := w.watch(nw, w.currentSegment, true); err != nil {
|
if err := w.watch(nw, currentSegment, currentSegment >= last); err != nil {
|
||||||
level.Error(w.logger).Log("msg", "runWatcher is ending", "err", err)
|
level.Error(w.logger).Log("msg", "runWatcher is ending", "err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
w.currentSegment++
|
currentSegment++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -307,9 +313,9 @@ func (w *WALWatcher) watch(wl *wal.WAL, segmentNum int, tail bool) error {
|
||||||
segmentTicker.Stop()
|
segmentTicker.Stop()
|
||||||
checkpointTicker.Stop()
|
checkpointTicker.Stop()
|
||||||
var err error
|
var err error
|
||||||
size, err = getSegmentSize(w.walDir, w.currentSegment)
|
size, err = getSegmentSize(w.walDir, segmentNum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
level.Error(w.logger).Log("msg", "error getting segment size", "segment", w.currentSegment)
|
level.Error(w.logger).Log("msg", "error getting segment size", "segment", segmentNum)
|
||||||
return errors.Wrap(err, "get segment size")
|
return errors.Wrap(err, "get segment size")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -331,31 +337,25 @@ func (w *WALWatcher) watch(wl *wal.WAL, segmentNum int, tail bool) error {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if dir == w.lastCheckpoint {
|
index, err := checkpointNum(dir)
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
level.Info(w.logger).Log("msg", "new checkpoint detected", "last", w.lastCheckpoint, "new", dir)
|
|
||||||
|
|
||||||
d, err := checkpointNum(dir)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
level.Error(w.logger).Log("msg", "error parsing checkpoint", "err", err)
|
level.Error(w.logger).Log("msg", "error parsing checkpoint", "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if d >= w.currentSegment {
|
if index >= segmentNum {
|
||||||
level.Info(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", w.currentSegment), "checkpoint", dir)
|
level.Info(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
w.lastCheckpoint = dir
|
level.Info(w.logger).Log("msg", "new checkpoint detected", "new", dir, "currentSegment", segmentNum)
|
||||||
// This potentially takes a long time, should we run it in another go routine?
|
// This potentially takes a long time, should we run it in another go routine?
|
||||||
err = w.readCheckpoint(w.lastCheckpoint)
|
err = w.readCheckpoint(dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
level.Error(w.logger).Log("err", err)
|
level.Error(w.logger).Log("err", err)
|
||||||
}
|
}
|
||||||
// Clear series with a checkpoint or segment index # lower than the checkpoint we just read.
|
// Clear series with a checkpoint or segment index # lower than the checkpoint we just read.
|
||||||
w.writer.SeriesReset(d)
|
w.writer.SeriesReset(index)
|
||||||
|
|
||||||
case <-segmentTicker.C:
|
case <-segmentTicker.C:
|
||||||
_, last, err := wl.Segments()
|
_, last, err := wl.Segments()
|
||||||
|
@ -364,35 +364,35 @@ func (w *WALWatcher) watch(wl *wal.WAL, segmentNum int, tail bool) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if new segments exists.
|
// Check if new segments exists.
|
||||||
if last <= w.currentSegment {
|
if last <= segmentNum {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := w.readSegment(reader); err != nil {
|
if err := w.readSegment(reader, segmentNum); err != nil {
|
||||||
// Ignore errors reading to end of segment, as we're going to move to
|
// Ignore errors reading to end of segment, as we're going to move to
|
||||||
// next segment now.
|
// next segment now.
|
||||||
level.Error(w.logger).Log("msg", "error reading to end of segment", "err", err)
|
level.Error(w.logger).Log("msg", "error reading to end of segment", "err", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
level.Info(w.logger).Log("msg", "a new segment exists, we should start reading it", "current", fmt.Sprintf("%08d", w.currentSegment), "new", fmt.Sprintf("%08d", last))
|
level.Info(w.logger).Log("msg", "a new segment exists, we should start reading it", "current", fmt.Sprintf("%08d", segmentNum), "new", fmt.Sprintf("%08d", last))
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case <-readTicker.C:
|
case <-readTicker.C:
|
||||||
if err := w.readSegment(reader); err != nil && err != io.EOF {
|
if err := w.readSegment(reader, segmentNum); err != nil && err != io.EOF {
|
||||||
level.Error(w.logger).Log("err", err)
|
level.Error(w.logger).Log("err", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if reader.TotalRead() >= size && !tail {
|
if reader.TotalRead() >= size && !tail {
|
||||||
level.Info(w.logger).Log("msg", "done replaying segment", "segment", w.currentSegment, "size", size, "read", reader.TotalRead())
|
level.Info(w.logger).Log("msg", "done replaying segment", "segment", segmentNum, "size", size, "read", reader.TotalRead())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *WALWatcher) readSegment(r *wal.LiveReader) error {
|
func (w *WALWatcher) readSegment(r *wal.LiveReader, segmentNum int) error {
|
||||||
for r.Next() && !isClosed(w.quit) {
|
for r.Next() && !isClosed(w.quit) {
|
||||||
err := w.decodeRecord(r.Record())
|
err := w.decodeRecord(r.Record(), segmentNum)
|
||||||
|
|
||||||
// Intentionally skip over record decode errors.
|
// Intentionally skip over record decode errors.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -402,7 +402,7 @@ func (w *WALWatcher) readSegment(r *wal.LiveReader) error {
|
||||||
return r.Err()
|
return r.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *WALWatcher) decodeRecord(rec []byte) error {
|
func (w *WALWatcher) decodeRecord(rec []byte, segmentNum int) error {
|
||||||
var (
|
var (
|
||||||
dec tsdb.RecordDecoder
|
dec tsdb.RecordDecoder
|
||||||
series []tsdb.RefSeries
|
series []tsdb.RefSeries
|
||||||
|
@ -416,7 +416,7 @@ func (w *WALWatcher) decodeRecord(rec []byte) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
w.seriesReadMetric.Add(float64(len(series)))
|
w.seriesReadMetric.Add(float64(len(series)))
|
||||||
w.writer.StoreSeries(series, w.currentSegment)
|
w.writer.StoreSeries(series, segmentNum)
|
||||||
|
|
||||||
case tsdb.RecordSamples:
|
case tsdb.RecordSamples:
|
||||||
samples, err := dec.Samples(rec, samples[:0])
|
samples, err := dec.Samples(rec, samples[:0])
|
||||||
|
@ -455,6 +455,11 @@ func (w *WALWatcher) decodeRecord(rec []byte) error {
|
||||||
// Read all the series records from a Checkpoint directory.
|
// Read all the series records from a Checkpoint directory.
|
||||||
func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
|
func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
|
||||||
level.Info(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir)
|
level.Info(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir)
|
||||||
|
index, err := checkpointNum(checkpointDir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
sr, err := wal.NewSegmentsReader(checkpointDir)
|
sr, err := wal.NewSegmentsReader(checkpointDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return errors.Wrap(err, "open checkpoint")
|
return errors.Wrap(err, "open checkpoint")
|
||||||
|
@ -469,7 +474,7 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
|
||||||
|
|
||||||
// w.readSeriesRecords(wal.NewLiveReader(sr), i, size)
|
// w.readSeriesRecords(wal.NewLiveReader(sr), i, size)
|
||||||
r := wal.NewLiveReader(sr)
|
r := wal.NewLiveReader(sr)
|
||||||
if err := w.readSegment(r); err != nil {
|
if err := w.readSegment(r, index); err != nil {
|
||||||
return errors.Wrap(err, "readSegment")
|
return errors.Wrap(err, "readSegment")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue