2018-09-07 14:26:04 -07:00
// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2019-09-19 02:15:41 -07:00
package wal
2018-09-07 14:26:04 -07:00
import (
"fmt"
2019-01-18 12:31:36 -08:00
"io"
"math"
2018-09-07 14:26:04 -07:00
"os"
"path"
2019-02-13 06:47:35 -08:00
"sort"
2018-09-07 14:26:04 -07:00
"strconv"
"strings"
"time"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
2019-03-25 16:01:12 -07:00
"github.com/prometheus/prometheus/pkg/timestamp"
2019-09-19 02:15:41 -07:00
"github.com/prometheus/prometheus/tsdb/fileutil"
"github.com/prometheus/prometheus/tsdb/record"
2018-09-07 14:26:04 -07:00
)
2019-01-18 04:48:16 -08:00
const (
readPeriod = 10 * time . Millisecond
checkpointPeriod = 5 * time . Second
segmentCheckPeriod = 100 * time . Millisecond
2019-09-19 02:15:41 -07:00
consumer = "consumer"
2019-01-18 04:48:16 -08:00
)
2019-09-19 02:15:41 -07:00
// WriteTo is an interface used by the Watcher to send the samples it's read
// from the WAL on to somewhere else.
type WriteTo interface {
Append ( [ ] record . RefSample ) bool
StoreSeries ( [ ] record . RefSeries , int )
SeriesReset ( int )
2018-09-07 14:26:04 -07:00
}
2019-09-19 02:15:41 -07:00
type WatcherMetrics struct {
recordsRead * prometheus . CounterVec
recordDecodeFails * prometheus . CounterVec
samplesSentPreTailing * prometheus . CounterVec
currentSegment * prometheus . GaugeVec
2018-09-07 14:26:04 -07:00
}
2019-09-19 02:15:41 -07:00
// Watcher watches the TSDB WAL for a given WriteTo.
type Watcher struct {
2019-02-18 22:46:52 -08:00
name string
2019-09-19 02:15:41 -07:00
writer WriteTo
2019-02-18 22:46:52 -08:00
logger log . Logger
walDir string
lastCheckpoint string
2019-09-19 02:15:41 -07:00
metrics * WatcherMetrics
readerMetrics * liveReaderMetrics
2019-01-18 04:48:16 -08:00
2019-09-19 02:15:41 -07:00
StartTime int64
2018-09-07 14:26:04 -07:00
2019-02-13 11:14:15 -08:00
recordsReadMetric * prometheus . CounterVec
2018-09-07 14:26:04 -07:00
recordDecodeFailsMetric prometheus . Counter
samplesSentPreTailing prometheus . Counter
currentSegmentMetric prometheus . Gauge
2019-01-18 04:48:16 -08:00
quit chan struct { }
2019-02-13 09:06:03 -08:00
done chan struct { }
2019-02-19 20:03:41 -08:00
// For testing, stop when we hit this segment.
2019-09-19 02:15:41 -07:00
MaxSegment int
2018-09-07 14:26:04 -07:00
}
2019-09-19 02:15:41 -07:00
func NewWatcherMetrics ( reg prometheus . Registerer ) * WatcherMetrics {
m := & WatcherMetrics {
recordsRead : prometheus . NewCounterVec (
prometheus . CounterOpts {
Namespace : "prometheus" ,
Subsystem : "wal_watcher" ,
Name : "records_read_total" ,
Help : "Number of records read by the WAL watcher from the WAL." ,
} ,
[ ] string { consumer , "type" } ,
) ,
recordDecodeFails : prometheus . NewCounterVec (
prometheus . CounterOpts {
Namespace : "prometheus" ,
Subsystem : "wal_watcher" ,
Name : "record_decode_failures_total" ,
Help : "Number of records read by the WAL watcher that resulted in an error when decoding." ,
} ,
[ ] string { consumer } ,
) ,
samplesSentPreTailing : prometheus . NewCounterVec (
prometheus . CounterOpts {
Namespace : "prometheus" ,
Subsystem : "wal_watcher" ,
Name : "samples_sent_pre_tailing_total" ,
Help : "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL." ,
} ,
[ ] string { consumer } ,
) ,
currentSegment : prometheus . NewGaugeVec (
prometheus . GaugeOpts {
Namespace : "prometheus" ,
Subsystem : "wal_watcher" ,
Name : "current_segment" ,
Help : "Current segment the WAL watcher is reading records from." ,
} ,
[ ] string { consumer } ,
) ,
}
if reg != nil {
_ = reg . Register ( m . recordsRead )
_ = reg . Register ( m . recordDecodeFails )
_ = reg . Register ( m . samplesSentPreTailing )
_ = reg . Register ( m . currentSegment )
}
return m
}
// NewWatcher creates a new WAL watcher for a given WriteTo.
func NewWatcher ( reg prometheus . Registerer , metrics * WatcherMetrics , logger log . Logger , name string , writer WriteTo , walDir string ) * Watcher {
2018-09-07 14:26:04 -07:00
if logger == nil {
logger = log . NewNopLogger ( )
}
2019-09-19 02:15:41 -07:00
return & Watcher {
logger : logger ,
writer : writer ,
metrics : metrics ,
readerMetrics : NewLiveReaderMetrics ( reg ) ,
walDir : path . Join ( walDir , "wal" ) ,
name : name ,
quit : make ( chan struct { } ) ,
done : make ( chan struct { } ) ,
MaxSegment : - 1 ,
2019-02-13 11:14:15 -08:00
}
2018-09-07 14:26:04 -07:00
}
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) setMetrics ( ) {
2019-04-23 01:49:17 -07:00
// Setup the WAL Watchers metrics. We do this here rather than in the
// constructor because of the ordering of creating Queue Managers's,
// stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig.
2019-09-19 02:15:41 -07:00
if w . metrics != nil {
w . recordsReadMetric = w . metrics . recordsRead . MustCurryWith ( prometheus . Labels { consumer : w . name } )
w . recordDecodeFailsMetric = w . metrics . recordDecodeFails . WithLabelValues ( w . name )
w . samplesSentPreTailing = w . metrics . samplesSentPreTailing . WithLabelValues ( w . name )
w . currentSegmentMetric = w . metrics . currentSegment . WithLabelValues ( w . name )
}
2019-04-23 01:49:17 -07:00
}
2019-09-19 02:15:41 -07:00
// Start the Watcher.
func ( w * Watcher ) Start ( ) {
2019-04-23 01:49:17 -07:00
w . setMetrics ( )
2019-01-18 04:48:16 -08:00
level . Info ( w . logger ) . Log ( "msg" , "starting WAL watcher" , "queue" , w . name )
2019-04-23 01:49:17 -07:00
2019-02-13 06:47:35 -08:00
go w . loop ( )
2018-09-07 14:26:04 -07:00
}
2019-09-19 02:15:41 -07:00
// Stop the Watcher.
func ( w * Watcher ) Stop ( ) {
2018-09-07 14:26:04 -07:00
close ( w . quit )
2019-02-13 09:06:03 -08:00
<- w . done
2019-04-23 01:49:17 -07:00
// Records read metric has series and samples.
2019-09-19 02:15:41 -07:00
w . metrics . recordsRead . DeleteLabelValues ( w . name , "series" )
w . metrics . recordsRead . DeleteLabelValues ( w . name , "samples" )
w . metrics . recordDecodeFails . DeleteLabelValues ( w . name )
w . metrics . samplesSentPreTailing . DeleteLabelValues ( w . name )
w . metrics . currentSegment . DeleteLabelValues ( w . name )
2019-04-23 01:49:17 -07:00
2019-02-13 09:06:03 -08:00
level . Info ( w . logger ) . Log ( "msg" , "WAL watcher stopped" , "queue" , w . name )
2018-09-07 14:26:04 -07:00
}
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) loop ( ) {
2019-02-13 09:06:03 -08:00
defer close ( w . done )
2019-02-13 06:47:35 -08:00
2019-07-29 02:45:27 -07:00
// We may encounter failures processing the WAL; we should wait and retry.
2019-02-14 02:02:54 -08:00
for ! isClosed ( w . quit ) {
2019-09-19 02:15:41 -07:00
w . StartTime = timestamp . FromTime ( time . Now ( ) )
if err := w . Run ( ) ; err != nil {
2019-02-13 06:47:35 -08:00
level . Error ( w . logger ) . Log ( "msg" , "error tailing WAL" , "err" , err )
2018-09-07 14:26:04 -07:00
}
2019-02-13 06:47:35 -08:00
select {
case <- w . quit :
return
case <- time . After ( 5 * time . Second ) :
}
2018-09-07 14:26:04 -07:00
}
2019-02-13 06:47:35 -08:00
}
2018-09-07 14:26:04 -07:00
2019-09-19 02:15:41 -07:00
// Run the watcher, which will tail the WAL until the quit channel is closed
// or an error case is hit.
func ( w * Watcher ) Run ( ) error {
2019-03-05 04:21:11 -08:00
_ , lastSegment , err := w . firstAndLast ( )
2019-02-13 09:06:03 -08:00
if err != nil {
2019-02-19 08:43:58 -08:00
return errors . Wrap ( err , "wal.Segments" )
2019-02-13 09:06:03 -08:00
}
2019-01-18 12:31:36 -08:00
// Backfill from the checkpoint first if it exists.
2019-09-19 02:15:41 -07:00
lastCheckpoint , checkpointIndex , err := LastCheckpoint ( w . walDir )
if err != nil && err != record . ErrNotFound {
2019-02-19 08:43:58 -08:00
return errors . Wrap ( err , "tsdb.LastCheckpoint" )
2019-01-18 12:31:36 -08:00
}
if err == nil {
2019-02-13 09:06:03 -08:00
if err = w . readCheckpoint ( lastCheckpoint ) ; err != nil {
2019-02-15 01:54:01 -08:00
return errors . Wrap ( err , "readCheckpoint" )
2019-01-18 12:31:36 -08:00
}
}
2019-02-18 22:46:52 -08:00
w . lastCheckpoint = lastCheckpoint
2019-01-18 12:31:36 -08:00
2019-02-19 23:51:08 -08:00
currentSegment , err := w . findSegmentForIndex ( checkpointIndex )
2019-02-13 06:47:35 -08:00
if err != nil {
return err
}
2019-01-18 04:48:16 -08:00
2019-02-19 23:51:08 -08:00
level . Debug ( w . logger ) . Log ( "msg" , "tailing WAL" , "lastCheckpoint" , lastCheckpoint , "checkpointIndex" , checkpointIndex , "currentSegment" , currentSegment , "lastSegment" , lastSegment )
2019-02-15 01:54:01 -08:00
for ! isClosed ( w . quit ) {
2019-02-13 09:06:03 -08:00
w . currentSegmentMetric . Set ( float64 ( currentSegment ) )
2019-03-05 04:21:11 -08:00
level . Debug ( w . logger ) . Log ( "msg" , "processing segment" , "currentSegment" , currentSegment )
2019-01-18 04:48:16 -08:00
// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
2019-03-05 04:21:11 -08:00
if err := w . watch ( currentSegment , currentSegment >= lastSegment ) ; err != nil {
2019-02-13 06:47:35 -08:00
return err
2019-01-18 04:48:16 -08:00
}
2019-02-19 23:51:08 -08:00
// For testing: stop when you hit a specific segment.
2019-09-19 02:15:41 -07:00
if currentSegment == w . MaxSegment {
2019-02-19 20:03:41 -08:00
return nil
}
2019-02-13 09:06:03 -08:00
currentSegment ++
2019-01-18 04:48:16 -08:00
}
2019-02-15 01:54:01 -08:00
return nil
2018-09-07 14:26:04 -07:00
}
2019-02-14 02:02:54 -08:00
// findSegmentForIndex finds the first segment greater than or equal to index.
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) findSegmentForIndex ( index int ) ( int , error ) {
2019-04-09 02:52:44 -07:00
refs , err := w . segments ( w . walDir )
2019-03-05 04:21:11 -08:00
if err != nil {
2019-07-15 09:52:03 -07:00
return - 1 , err
2019-03-05 04:21:11 -08:00
}
for _ , r := range refs {
if r >= index {
return r , nil
}
}
return - 1 , errors . New ( "failed to find segment for index" )
}
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) firstAndLast ( ) ( int , int , error ) {
2019-04-09 02:52:44 -07:00
refs , err := w . segments ( w . walDir )
2019-03-05 04:21:11 -08:00
if err != nil {
2019-07-15 09:52:03 -07:00
return - 1 , - 1 , err
2019-03-05 04:21:11 -08:00
}
if len ( refs ) == 0 {
return - 1 , - 1 , nil
}
return refs [ 0 ] , refs [ len ( refs ) - 1 ] , nil
}
// Copied from tsdb/wal/wal.go so we do not have to open a WAL.
// Plan is to move WAL watcher to TSDB and dedupe these implementations.
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) segments ( dir string ) ( [ ] int , error ) {
2019-04-09 02:52:44 -07:00
files , err := fileutil . ReadDir ( dir )
2019-02-13 06:47:35 -08:00
if err != nil {
2019-03-05 04:21:11 -08:00
return nil , err
2019-02-13 06:47:35 -08:00
}
var refs [ ] int
var last int
for _ , fn := range files {
k , err := strconv . Atoi ( fn )
if err != nil {
continue
}
if len ( refs ) > 0 && k > last + 1 {
2019-03-05 04:21:11 -08:00
return nil , errors . New ( "segments are not sequential" )
2019-02-13 06:47:35 -08:00
}
refs = append ( refs , k )
last = k
}
2019-02-13 17:11:17 -08:00
sort . Ints ( refs )
2019-02-13 06:47:35 -08:00
2019-03-05 04:21:11 -08:00
return refs , nil
2019-02-13 06:47:35 -08:00
}
2019-02-19 23:51:08 -08:00
// Use tail true to indicate that the reader is currently on a segment that is
2019-01-18 12:31:36 -08:00
// actively being written to. If false, assume it's a full segment and we're
// replaying it on start to cache the series records.
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) watch ( segmentNum int , tail bool ) error {
segment , err := OpenReadSegment ( SegmentName ( w . walDir , segmentNum ) )
2019-02-12 06:12:37 -08:00
if err != nil {
return err
}
defer segment . Close ( )
2019-09-19 02:15:41 -07:00
reader := NewLiveReader ( w . logger , w . readerMetrics , segment )
2019-01-18 04:48:16 -08:00
readTicker := time . NewTicker ( readPeriod )
2018-09-07 14:26:04 -07:00
defer readTicker . Stop ( )
2019-01-18 04:48:16 -08:00
checkpointTicker := time . NewTicker ( checkpointPeriod )
2018-09-07 14:26:04 -07:00
defer checkpointTicker . Stop ( )
2019-01-18 04:48:16 -08:00
segmentTicker := time . NewTicker ( segmentCheckPeriod )
defer segmentTicker . Stop ( )
2019-02-12 06:12:37 -08:00
2019-01-18 12:31:36 -08:00
// If we're replaying the segment we need to know the size of the file to know
// when to return from watch and move on to the next segment.
size := int64 ( math . MaxInt64 )
if ! tail {
segmentTicker . Stop ( )
checkpointTicker . Stop ( )
var err error
2019-02-13 09:06:03 -08:00
size , err = getSegmentSize ( w . walDir , segmentNum )
2019-01-18 12:31:36 -08:00
if err != nil {
2019-02-19 08:43:58 -08:00
return errors . Wrap ( err , "getSegmentSize" )
2019-01-18 12:31:36 -08:00
}
}
2018-09-07 14:26:04 -07:00
for {
select {
case <- w . quit :
2019-02-15 01:54:01 -08:00
return nil
2019-01-18 04:48:16 -08:00
2018-09-07 14:26:04 -07:00
case <- checkpointTicker . C :
2019-02-13 11:14:15 -08:00
// Periodically check if there is a new checkpoint so we can garbage
// collect labels. As this is considered an optimisation, we ignore
// errors during checkpoint processing.
if err := w . garbageCollectSeries ( segmentNum ) ; err != nil {
level . Warn ( w . logger ) . Log ( "msg" , "error process checkpoint" , "err" , err )
2018-09-07 14:26:04 -07:00
}
2019-01-18 04:48:16 -08:00
2018-09-07 14:26:04 -07:00
case <- segmentTicker . C :
2019-03-05 04:21:11 -08:00
_ , last , err := w . firstAndLast ( )
2018-09-07 14:26:04 -07:00
if err != nil {
2019-01-18 04:48:16 -08:00
return errors . Wrap ( err , "segments" )
}
2019-01-18 12:31:36 -08:00
// Check if new segments exists.
2019-02-13 09:06:03 -08:00
if last <= segmentNum {
2018-09-07 14:26:04 -07:00
continue
}
2019-01-18 04:48:16 -08:00
2019-02-19 20:03:41 -08:00
err = w . readSegment ( reader , segmentNum , tail )
2019-02-15 01:54:01 -08:00
// Ignore errors reading to end of segment whilst replaying the WAL.
if ! tail {
if err != nil && err != io . EOF {
level . Warn ( w . logger ) . Log ( "msg" , "ignoring error reading to end of segment, may have dropped data" , "err" , err )
} else if reader . Offset ( ) != size {
level . Warn ( w . logger ) . Log ( "msg" , "expected to have read whole segment, may have dropped data" , "segment" , segmentNum , "read" , reader . Offset ( ) , "size" , size )
}
return nil
}
// Otherwise, when we are tailing, non-EOFs are fatal.
if err != io . EOF {
return err
2018-09-07 14:26:04 -07:00
}
2019-01-18 04:48:16 -08:00
return nil
2018-09-07 14:26:04 -07:00
case <- readTicker . C :
2019-02-19 20:03:41 -08:00
err = w . readSegment ( reader , segmentNum , tail )
2019-02-15 01:54:01 -08:00
// Ignore all errors reading to end of segment whilst replaying the WAL.
if ! tail {
if err != nil && err != io . EOF {
level . Warn ( w . logger ) . Log ( "msg" , "ignoring error reading to end of segment, may have dropped data" , "segment" , segmentNum , "err" , err )
} else if reader . Offset ( ) != size {
level . Warn ( w . logger ) . Log ( "msg" , "expected to have read whole segment, may have dropped data" , "segment" , segmentNum , "read" , reader . Offset ( ) , "size" , size )
}
2019-01-18 12:31:36 -08:00
return nil
}
2019-02-14 02:02:54 -08:00
2019-02-15 01:54:01 -08:00
// Otherwise, when we are tailing, non-EOFs are fatal.
if err != io . EOF {
2019-02-14 02:02:54 -08:00
return err
}
2018-09-07 14:26:04 -07:00
}
}
}
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) garbageCollectSeries ( segmentNum int ) error {
dir , _ , err := LastCheckpoint ( w . walDir )
if err != nil && err != record . ErrNotFound {
2019-02-13 11:14:15 -08:00
return errors . Wrap ( err , "tsdb.LastCheckpoint" )
}
2019-02-18 22:46:52 -08:00
if dir == "" || dir == w . lastCheckpoint {
2019-02-13 11:14:15 -08:00
return nil
}
2019-02-18 22:46:52 -08:00
w . lastCheckpoint = dir
2019-02-13 11:14:15 -08:00
index , err := checkpointNum ( dir )
if err != nil {
return errors . Wrap ( err , "error parsing checkpoint filename" )
}
if index >= segmentNum {
level . Debug ( w . logger ) . Log ( "msg" , "current segment is behind the checkpoint, skipping reading of checkpoint" , "current" , fmt . Sprintf ( "%08d" , segmentNum ) , "checkpoint" , dir )
return nil
}
level . Debug ( w . logger ) . Log ( "msg" , "new checkpoint detected" , "new" , dir , "currentSegment" , segmentNum )
if err = w . readCheckpoint ( dir ) ; err != nil {
return errors . Wrap ( err , "readCheckpoint" )
}
// Clear series with a checkpoint or segment index # lower than the checkpoint we just read.
w . writer . SeriesReset ( index )
return nil
}
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) readSegment ( r * LiveReader , segmentNum int , tail bool ) error {
2019-02-19 20:03:41 -08:00
var (
2019-09-19 02:15:41 -07:00
dec record . Decoder
series [ ] record . RefSeries
samples [ ] record . RefSample
send [ ] record . RefSample
2019-02-19 20:03:41 -08:00
)
2019-01-18 04:48:16 -08:00
for r . Next ( ) && ! isClosed ( w . quit ) {
2019-02-19 20:03:41 -08:00
rec := r . Record ( )
w . recordsReadMetric . WithLabelValues ( recordType ( dec . Type ( rec ) ) ) . Inc ( )
switch dec . Type ( rec ) {
2019-09-19 02:15:41 -07:00
case record . Series :
2019-02-19 20:03:41 -08:00
series , err := dec . Series ( rec , series [ : 0 ] )
if err != nil {
w . recordDecodeFailsMetric . Inc ( )
return err
}
w . writer . StoreSeries ( series , segmentNum )
2019-09-19 02:15:41 -07:00
case record . Samples :
2019-02-19 20:03:41 -08:00
// If we're not tailing a segment we can ignore any samples records we see.
// This speeds up replay of the WAL by > 10x.
if ! tail {
break
}
samples , err := dec . Samples ( rec , samples [ : 0 ] )
if err != nil {
w . recordDecodeFailsMetric . Inc ( )
return err
}
for _ , s := range samples {
2019-09-19 02:15:41 -07:00
if s . T > w . StartTime {
2019-02-19 20:03:41 -08:00
send = append ( send , s )
}
}
if len ( send ) > 0 {
// Blocks until the sample is sent to all remote write endpoints or closed (because enqueue blocks).
w . writer . Append ( send )
2019-06-27 11:48:21 -07:00
send = send [ : 0 ]
2019-02-19 20:03:41 -08:00
}
2019-09-19 02:15:41 -07:00
case record . Tombstones :
2019-02-19 20:03:41 -08:00
// noop
2019-09-19 02:15:41 -07:00
case record . Invalid :
2019-02-19 20:03:41 -08:00
return errors . New ( "invalid record" )
default :
w . recordDecodeFailsMetric . Inc ( )
return errors . New ( "unknown TSDB record type" )
2018-09-07 14:26:04 -07:00
}
}
2019-01-18 04:48:16 -08:00
return r . Err ( )
}
2018-09-07 14:26:04 -07:00
2019-09-19 02:15:41 -07:00
func recordType ( rt record . Type ) string {
2019-02-13 11:14:15 -08:00
switch rt {
2019-09-19 02:15:41 -07:00
case record . Invalid :
2019-02-13 11:14:15 -08:00
return "invalid"
2019-09-19 02:15:41 -07:00
case record . Series :
2019-02-13 11:14:15 -08:00
return "series"
2019-09-19 02:15:41 -07:00
case record . Samples :
2019-02-13 11:14:15 -08:00
return "samples"
2019-09-19 02:15:41 -07:00
case record . Tombstones :
2019-02-13 11:14:15 -08:00
return "tombstones"
default :
2019-03-04 08:33:35 -08:00
return "unknown"
2019-02-13 11:14:15 -08:00
}
}
2019-01-18 04:48:16 -08:00
// Read all the series records from a Checkpoint directory.
2019-09-19 02:15:41 -07:00
func ( w * Watcher ) readCheckpoint ( checkpointDir string ) error {
2019-02-19 08:43:58 -08:00
level . Debug ( w . logger ) . Log ( "msg" , "reading checkpoint" , "dir" , checkpointDir )
2019-02-13 09:06:03 -08:00
index , err := checkpointNum ( checkpointDir )
if err != nil {
2019-02-19 08:43:58 -08:00
return errors . Wrap ( err , "checkpointNum" )
2019-02-13 09:06:03 -08:00
}
2019-04-09 02:52:44 -07:00
// Ensure we read the whole contents of every segment in the checkpoint dir.
segs , err := w . segments ( checkpointDir )
2018-09-07 14:26:04 -07:00
if err != nil {
2019-04-09 02:52:44 -07:00
return errors . Wrap ( err , "Unable to get segments checkpoint dir" )
2018-09-07 14:26:04 -07:00
}
2019-04-09 02:52:44 -07:00
for _ , seg := range segs {
size , err := getSegmentSize ( checkpointDir , seg )
if err != nil {
return errors . Wrap ( err , "getSegmentSize" )
}
2018-09-07 14:26:04 -07:00
2019-09-19 02:15:41 -07:00
sr , err := OpenReadSegment ( SegmentName ( checkpointDir , seg ) )
2019-04-09 02:52:44 -07:00
if err != nil {
return errors . Wrap ( err , "unable to open segment" )
}
defer sr . Close ( )
2018-09-07 14:26:04 -07:00
2019-09-19 02:15:41 -07:00
r := NewLiveReader ( w . logger , w . readerMetrics , sr )
2019-04-09 02:52:44 -07:00
if err := w . readSegment ( r , index , false ) ; err != io . EOF && err != nil {
return errors . Wrap ( err , "readSegment" )
}
2019-02-13 06:47:35 -08:00
2019-04-09 02:52:44 -07:00
if r . Offset ( ) != size {
return fmt . Errorf ( "readCheckpoint wasn't able to read all data from the checkpoint %s/%08d, size: %d, totalRead: %d" , checkpointDir , seg , size , r . Offset ( ) )
}
2019-01-18 12:31:36 -08:00
}
2019-02-13 06:47:35 -08:00
2019-04-09 02:52:44 -07:00
level . Debug ( w . logger ) . Log ( "msg" , "read series references from checkpoint" , "checkpoint" , checkpointDir )
2019-01-18 04:48:16 -08:00
return nil
2018-09-07 14:26:04 -07:00
}
2019-01-18 04:48:16 -08:00
func checkpointNum ( dir string ) ( int , error ) {
2018-09-07 14:26:04 -07:00
// Checkpoint dir names are in the format checkpoint.000001
2019-07-15 09:53:58 -07:00
// dir may contain a hidden directory, so only check the base directory
chunks := strings . Split ( path . Base ( dir ) , "." )
2018-09-07 14:26:04 -07:00
if len ( chunks ) != 2 {
2019-01-18 04:48:16 -08:00
return 0 , errors . Errorf ( "invalid checkpoint dir string: %s" , dir )
2018-09-07 14:26:04 -07:00
}
2019-01-18 04:48:16 -08:00
result , err := strconv . Atoi ( chunks [ 1 ] )
if err != nil {
return 0 , errors . Errorf ( "invalid checkpoint dir string: %s" , dir )
}
return result , nil
2018-09-07 14:26:04 -07:00
}
// Get size of segment.
func getSegmentSize ( dir string , index int ) ( int64 , error ) {
i := int64 ( - 1 )
2019-09-19 02:15:41 -07:00
fi , err := os . Stat ( SegmentName ( dir , index ) )
2018-09-07 14:26:04 -07:00
if err == nil {
i = fi . Size ( )
}
return i , err
}
func isClosed ( c chan struct { } ) bool {
select {
case <- c :
return true
default :
return false
}
}