storage: Several optimizations of checkpointing

- checkpointSeriesMapAndHeads accepts a context now to allow cancelling. - If a shutdown is initiated, cancel the ongoing checkpoint. (We will create a final checkpoint anyway.) - Always wait for at least as long as the last checkpoint took before starting the next checkpoint (to cap the time spending checkpointing at 50%). - If an error has occurred during checkpointing, don't bother to sync the write. - Make sure the temporary checkpoint file is deleted, even if an error has occurred. - Clean up the checkpoint loop a bit. (The concurrent Timer.Reset(0) call might have cause a race.)
2025-03-05 20:59:13 -08:00 · 2017-04-06 16:57:25 +02:00 · 2017-04-06 16:57:25 +02:00 · f338d791d2
parent 934d86b936
commit f338d791d2
3 changed files with 93 additions and 23 deletions
--- a/storage/local/persistence.go
+++ b/storage/local/persistence.go
@ -15,6 +15,7 @@ package local

 import (
 	"bufio"
+	"context"
 	"encoding/binary"
 	"fmt"
 	"io"
@ -626,7 +627,9 @@ func (p *persistence) loadChunkDescs(fp model.Fingerprint, offsetFromEnd int) ([
 // NOTE: Above, varint encoding is used consistently although uvarint would have
 // made more sense in many cases. This was simply a glitch while designing the
 // format.
-func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap, fpLocker *fingerprintLocker) (err error) {
+func (p *persistence) checkpointSeriesMapAndHeads(
+	ctx context.Context, fingerprintToSeries *seriesMap, fpLocker *fingerprintLocker,
+) (err error) {
 	log.Info("Checkpointing in-memory metrics and chunks...")
 	p.checkpointing.Set(1)
 	defer p.checkpointing.Set(0)
@ -637,11 +640,16 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap
 	}

 	defer func() {
-		syncErr := f.Sync()
-		closeErr := f.Close()
+		defer os.Remove(p.headsTempFileName()) // Just in case it was left behind.
+
 		if err != nil {
+			// If we already had an error, do not bother to sync,
+			// just close, ignoring any further error.
+			f.Close()
 			return
 		}
+		syncErr := f.Sync()
+		closeErr := f.Close()
 		err = syncErr
 		if err != nil {
 			return
@ -683,6 +691,11 @@ func (p *persistence) checkpointSeriesMapAndHeads(fingerprintToSeries *seriesMap

 	var realNumberOfSeries uint64
 	for m := range iter {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
 		func() { // Wrapped in function to use defer for unlocking the fp.
 			fpLocker.Lock(m.fp)
 			defer fpLocker.Unlock(m.fp)
--- a/storage/local/persistence_test.go
+++ b/storage/local/persistence_test.go
@ -15,6 +15,7 @@ package local

 import (
 	"bufio"
+	"context"
 	"errors"
 	"os"
 	"path/filepath"
@ -547,6 +548,27 @@ func TestPersistLoadDropChunksType1(t *testing.T) {
 	testPersistLoadDropChunks(t, 1)
 }

+func TestCancelCheckpoint(t *testing.T) {
+	p, closer := newTestPersistence(t, 2)
+	defer closer.Close()
+
+	fpLocker := newFingerprintLocker(10)
+	sm := newSeriesMap()
+	s, _ := newMemorySeries(m1, nil, time.Time{})
+	sm.put(m1.FastFingerprint(), s)
+	sm.put(m2.FastFingerprint(), s)
+	sm.put(m3.FastFingerprint(), s)
+	sm.put(m4.FastFingerprint(), s)
+	sm.put(m5.FastFingerprint(), s)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	// Cancel right now to avoid races.
+	cancel()
+	if err := p.checkpointSeriesMapAndHeads(ctx, sm, fpLocker); err != context.Canceled {
+		t.Fatalf("expected error %v, got %v", context.Canceled, err)
+	}
+}
+
 func testCheckpointAndLoadSeriesMapAndHeads(t *testing.T, encoding chunk.Encoding) {
 	p, closer := newTestPersistence(t, encoding)
 	defer closer.Close()
@ -584,7 +606,7 @@ func testCheckpointAndLoadSeriesMapAndHeads(t *testing.T, encoding chunk.Encodin
 	sm.put(m4.FastFingerprint(), s4)
 	sm.put(m5.FastFingerprint(), s5)

-	if err := p.checkpointSeriesMapAndHeads(sm, fpLocker); err != nil {
+	if err := p.checkpointSeriesMapAndHeads(context.Background(), sm, fpLocker); err != nil {
 		t.Fatal(err)
 	}

--- a/storage/local/storage.go
+++ b/storage/local/storage.go
@ -462,7 +462,9 @@ func (s *MemorySeriesStorage) Stop() error {
 	<-s.evictStopped

 	// One final checkpoint of the series map and the head chunks.
-	if err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker); err != nil {
+	if err := s.persistence.checkpointSeriesMapAndHeads(
+		context.Background(), s.fpToSeries, s.fpLocker,
+	); err != nil {
 		return err
 	}
 	if err := s.mapper.checkpoint(); err != nil {
@ -1421,11 +1423,13 @@ func (s *MemorySeriesStorage) cycleThroughArchivedFingerprints() chan model.Fing

 func (s *MemorySeriesStorage) loop() {
 	checkpointTimer := time.NewTimer(s.checkpointInterval)
+	checkpointMinTimer := time.NewTimer(0)

 	var dirtySeriesCount int64

 	defer func() {
 		checkpointTimer.Stop()
+		checkpointMinTimer.Stop()
 		log.Info("Maintenance loop stopped.")
 		close(s.loopStopped)
 	}()
@ -1433,32 +1437,57 @@ func (s *MemorySeriesStorage) loop() {
 	memoryFingerprints := s.cycleThroughMemoryFingerprints()
 	archivedFingerprints := s.cycleThroughArchivedFingerprints()

+	checkpointCtx, checkpointCancel := context.WithCancel(context.Background())
+	checkpointNow := make(chan struct{}, 1)
+
+	doCheckpoint := func() time.Duration {
+		start := time.Now()
+		// We clear this before the checkpoint so that dirtySeriesCount
+		// is an upper bound.
+		atomic.StoreInt64(&dirtySeriesCount, 0)
+		s.dirtySeries.Set(0)
+		select {
+		case <-checkpointNow:
+			// Signal cleared.
+		default:
+			// No signal pending.
+		}
+		err := s.persistence.checkpointSeriesMapAndHeads(
+			checkpointCtx, s.fpToSeries, s.fpLocker,
+		)
+		if err == context.Canceled {
+			log.Info("Checkpoint canceled.")
+		} else if err != nil {
+			s.persistErrors.Inc()
+			log.Errorln("Error while checkpointing:", err)
+		}
+		return time.Since(start)
+	}
+
 	// Checkpoints can happen concurrently with maintenance so even with heavy
 	// checkpointing there will still be sufficient progress on maintenance.
 	checkpointLoopStopped := make(chan struct{})
 	go func() {
 		for {
 			select {
-			case <-s.loopStopping:
+			case <-checkpointCtx.Done():
 				checkpointLoopStopped <- struct{}{}
 				return
-			case <-checkpointTimer.C:
-				// We clear this before the checkpoint so that dirtySeriesCount
-				// is an upper bound.
-				atomic.StoreInt64(&dirtySeriesCount, 0)
-				s.dirtySeries.Set(0)
-				err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker)
-				if err != nil {
-					s.persistErrors.Inc()
-					log.Errorln("Error while checkpointing:", err)
-				}
-				// If a checkpoint takes longer than checkpointInterval, unluckily timed
-				// combination with the Reset(0) call below can lead to a case where a
-				// time is lurking in C leading to repeated checkpointing without break.
+			case <-checkpointMinTimer.C:
+				var took time.Duration
 				select {
-				case <-checkpointTimer.C: // Get rid of the lurking time.
-				default:
+				case <-checkpointCtx.Done():
+					checkpointLoopStopped <- struct{}{}
+					return
+				case <-checkpointTimer.C:
+					took = doCheckpoint()
+				case <-checkpointNow:
+					if !checkpointTimer.Stop() {
+						<-checkpointTimer.C
+					}
+					took = doCheckpoint()
 				}
+				checkpointMinTimer.Reset(took)
 				checkpointTimer.Reset(s.checkpointInterval)
 			}
 		}
@ -1468,6 +1497,7 @@ loop:
 	for {
 		select {
 		case <-s.loopStopping:
+			checkpointCancel()
 			break loop
 		case fp := <-memoryFingerprints:
 			if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) {
@ -1478,10 +1508,15 @@ loop:
 				// would be counterproductive, as it would slow down chunk persisting even more,
 				// while in a situation like that, where we are clearly lacking speed of disk
 				// maintenance, the best we can do for crash recovery is to persist chunks as
-				// quickly as possible. So only checkpoint if the urgency score is < 1.
+				// quickly as possible. So only checkpoint if we are not in rushed mode.
 				if _, rushed := s.getPersistenceUrgencyScore(); !rushed &&
 					dirty >= int64(s.checkpointDirtySeriesLimit) {
-					checkpointTimer.Reset(0)
+					select {
+					case checkpointNow <- struct{}{}:
+						// Signal sent.
+					default:
+						// Signal already pending.
+					}
 				}
 			}
 		case fp := <-archivedFingerprints: