diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b95d5e4a4..ee9a6dd775 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,14 @@ +## 2.4.2 / 2018-09-21 + + The last release didn't have bugfix included due to a vendoring error. + + * [BUGFIX] Handle WAL corruptions properly prometheus/tsdb#389 + * [BUGFIX] Handle WAL migrations correctly on Windows prometheus/tsdb#392 + ## 2.4.1 / 2018-09-19 * [ENHANCEMENT] New TSDB metrics prometheus/tsdb#375 prometheus/tsdb#363 * [BUGFIX] Render UI correctly for Windows #4616 -* [BUGFIX] Handle WAL corruptions properly prometheus/tsdb#389 ## 2.4.0 / 2018-09-11 diff --git a/VERSION b/VERSION index 005119baaa..8e8299dcc0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.4.1 +2.4.2 diff --git a/vendor/github.com/prometheus/tsdb/block.go b/vendor/github.com/prometheus/tsdb/block.go index 342a8d0205..7c4ccf0dce 100644 --- a/vendor/github.com/prometheus/tsdb/block.go +++ b/vendor/github.com/prometheus/tsdb/block.go @@ -504,10 +504,13 @@ Outer: func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, error) { numStones := 0 - pb.tombstones.Iter(func(id uint64, ivs Intervals) error { + if err := pb.tombstones.Iter(func(id uint64, ivs Intervals) error { numStones += len(ivs) return nil - }) + }); err != nil { + // This should never happen, as the iteration function only returns nil. + panic(err) + } if numStones == 0 { return nil, nil } diff --git a/vendor/github.com/prometheus/tsdb/checkpoint.go b/vendor/github.com/prometheus/tsdb/checkpoint.go index d988d35615..f45f3791f0 100644 --- a/vendor/github.com/prometheus/tsdb/checkpoint.go +++ b/vendor/github.com/prometheus/tsdb/checkpoint.go @@ -109,6 +109,10 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo stats := &CheckpointStats{} var sr io.Reader + // We close everything explicitly because Windows needs files to be + // closed before being deleted. But we also have defer so that we close + // files if there is an error somewhere. + var closers []io.Closer { lastFn, k, err := LastCheckpoint(w.Dir()) if err != nil && err != ErrNotFound { @@ -126,6 +130,7 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo return nil, errors.Wrap(err, "open last checkpoint") } defer last.Close() + closers = append(closers, last) sr = last } @@ -134,6 +139,7 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo return nil, errors.Wrap(err, "create segment reader") } defer segsr.Close() + closers = append(closers, segsr) if sr != nil { sr = io.MultiReader(sr, segsr) @@ -263,6 +269,9 @@ func Checkpoint(logger log.Logger, w *wal.WAL, m, n int, keep func(id uint64) bo if err := fileutil.Replace(cpdirtmp, cpdir); err != nil { return nil, errors.Wrap(err, "rename checkpoint directory") } + if err := closeAll(closers...); err != nil { + return stats, errors.Wrap(err, "close opened files") + } if err := w.Truncate(n + 1); err != nil { // If truncating fails, we'll just try again at the next checkpoint. // Leftover segments will just be ignored in the future if there's a checkpoint diff --git a/vendor/github.com/prometheus/tsdb/compact.go b/vendor/github.com/prometheus/tsdb/compact.go index 1b8f20fa53..3f5fa367c6 100644 --- a/vendor/github.com/prometheus/tsdb/compact.go +++ b/vendor/github.com/prometheus/tsdb/compact.go @@ -452,6 +452,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe if err != nil { return errors.Wrap(err, "open chunk writer") } + defer chunkw.Close() // Record written chunk sizes on level 1 compactions. if meta.Compaction.Level == 1 { chunkw = &instrumentedChunkWriter{ @@ -466,6 +467,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe if err != nil { return errors.Wrap(err, "open index writer") } + defer indexw.Close() if err := c.populateBlock(blocks, meta, indexw, chunkw); err != nil { return errors.Wrap(err, "write compaction") @@ -475,6 +477,10 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe return errors.Wrap(err, "write merged meta") } + // We are explicitly closing them here to check for error even + // though these are covered under defer. This is because in Windows, + // you cannot delete these unless they are closed and the defer is to + // make sure they are closed if the function exits due to an error above. if err = chunkw.Close(); err != nil { return errors.Wrap(err, "close chunk writer") } @@ -626,7 +632,9 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, } for _, chk := range chks { - c.chunkPool.Put(chk.Chunk) + if err := c.chunkPool.Put(chk.Chunk); err != nil { + return errors.Wrap(err, "put chunk") + } } for _, l := range lset { diff --git a/vendor/github.com/prometheus/tsdb/fileutil/fileutil.go b/vendor/github.com/prometheus/tsdb/fileutil/fileutil.go index 2158bfd265..1154e7307b 100644 --- a/vendor/github.com/prometheus/tsdb/fileutil/fileutil.go +++ b/vendor/github.com/prometheus/tsdb/fileutil/fileutil.go @@ -48,7 +48,7 @@ func Rename(from, to string) error { // It is not atomic. func Replace(from, to string) error { if err := os.RemoveAll(to); err != nil { - return nil + return err } if err := os.Rename(from, to); err != nil { return err diff --git a/vendor/github.com/prometheus/tsdb/head.go b/vendor/github.com/prometheus/tsdb/head.go index b342c8f57a..bc8cdfbe4b 100644 --- a/vendor/github.com/prometheus/tsdb/head.go +++ b/vendor/github.com/prometheus/tsdb/head.go @@ -793,7 +793,7 @@ func (h *Head) gc() { symbols := make(map[string]struct{}) values := make(map[string]stringset, len(h.values)) - h.postings.Iter(func(t labels.Label, _ index.Postings) error { + if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error { symbols[t.Name] = struct{}{} symbols[t.Value] = struct{}{} @@ -804,7 +804,10 @@ func (h *Head) gc() { } ss.set(t.Value) return nil - }) + }); err != nil { + // This should never happen, as the iteration function only returns nil. + panic(err) + } h.symMtx.Lock() diff --git a/vendor/github.com/prometheus/tsdb/index/index.go b/vendor/github.com/prometheus/tsdb/index/index.go index c58ff6ea83..c75796d734 100644 --- a/vendor/github.com/prometheus/tsdb/index/index.go +++ b/vendor/github.com/prometheus/tsdb/index/index.go @@ -271,7 +271,9 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta } // We add padding to 16 bytes to increase the addressable space we get through 4 byte // series references. - w.addPadding(16) + if err := w.addPadding(16); err != nil { + return errors.Errorf("failed to write padding bytes: %v", err) + } if w.pos%16 != 0 { return errors.Errorf("series write not 16-byte aligned at %d", w.pos) diff --git a/vendor/github.com/prometheus/tsdb/tombstones.go b/vendor/github.com/prometheus/tsdb/tombstones.go index d4a3d0ef1b..ad820a05fb 100644 --- a/vendor/github.com/prometheus/tsdb/tombstones.go +++ b/vendor/github.com/prometheus/tsdb/tombstones.go @@ -16,12 +16,13 @@ package tsdb import ( "encoding/binary" "fmt" - "github.com/pkg/errors" "io" "io/ioutil" "os" "path/filepath" "sync" + + "github.com/pkg/errors" ) const tombstoneFilename = "tombstones" @@ -72,7 +73,7 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { mw := io.MultiWriter(f, hash) - tr.Iter(func(ref uint64, ivs Intervals) error { + if err := tr.Iter(func(ref uint64, ivs Intervals) error { for _, iv := range ivs { buf.reset() @@ -86,7 +87,9 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { } } return nil - }) + }); err != nil { + return fmt.Errorf("error writing tombstones: %v", err) + } _, err = f.Write(hash.Sum(nil)) if err != nil { diff --git a/vendor/github.com/prometheus/tsdb/wal.go b/vendor/github.com/prometheus/tsdb/wal.go index ff978766cc..59206d8d07 100644 --- a/vendor/github.com/prometheus/tsdb/wal.go +++ b/vendor/github.com/prometheus/tsdb/wal.go @@ -723,6 +723,13 @@ func (w *SegmentWAL) run(interval time.Duration) { // Close syncs all data and closes the underlying resources. func (w *SegmentWAL) Close() error { + // Make sure you can call Close() multiple times. + select { + case <-w.stopc: + return nil // Already closed. + default: + } + close(w.stopc) <-w.donec @@ -735,10 +742,12 @@ func (w *SegmentWAL) Close() error { // On opening, a WAL must be fully consumed once. Afterwards // only the current segment will still be open. if hf := w.head(); hf != nil { - return errors.Wrapf(hf.Close(), "closing WAL head %s", hf.Name()) + if err := hf.Close(); err != nil { + return errors.Wrapf(err, "closing WAL head %s", hf.Name()) + } } - return w.dirFile.Close() + return errors.Wrapf(w.dirFile.Close(), "closing WAL dir %s", w.dirFile.Name()) } const ( @@ -1260,6 +1269,7 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { if err != nil { return errors.Wrap(err, "open new WAL") } + // It should've already been closed as part of the previous finalization. // Do it once again in case of prior errors. defer func() { @@ -1306,6 +1316,12 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { if err != nil { return errors.Wrap(err, "write new entries") } + // We explicitly close even when there is a defer for Windows to be + // able to delete it. The defer is in place to close it in-case there + // are errors above. + if err := w.Close(); err != nil { + return errors.Wrap(err, "close old WAL") + } if err := repl.Close(); err != nil { return errors.Wrap(err, "close new WAL") } diff --git a/vendor/github.com/prometheus/tsdb/wal/wal.go b/vendor/github.com/prometheus/tsdb/wal/wal.go index 0e95ba2a79..aa52738fa2 100644 --- a/vendor/github.com/prometheus/tsdb/wal/wal.go +++ b/vendor/github.com/prometheus/tsdb/wal/wal.go @@ -255,15 +255,17 @@ Loop: // Repair attempts to repair the WAL based on the error. // It discards all data after the corruption. -func (w *WAL) Repair(err error) error { +func (w *WAL) Repair(origErr error) error { // We could probably have a mode that only discards torn records right around // the corruption to preserve as data much as possible. // But that's not generally applicable if the records have any kind of causality. // Maybe as an extra mode in the future if mid-WAL corruptions become // a frequent concern. + err := errors.Cause(origErr) // So that we can pick up errors even if wrapped. + cerr, ok := err.(*CorruptionErr) if !ok { - return errors.New("cannot handle error") + return errors.Wrap(origErr, "cannot handle error") } if cerr.Segment < 0 { return errors.New("corruption error does not specify position") @@ -283,6 +285,15 @@ func (w *WAL) Repair(err error) error { if s.n <= cerr.Segment { continue } + if w.segment.i == s.n { + // The active segment needs to be removed, + // close it first (Windows!). Can be closed safely + // as we set the current segment to repaired file + // below. + if err := w.segment.Close(); err != nil { + return errors.Wrap(err, "close active segment") + } + } if err := os.Remove(filepath.Join(w.dir, s.s)); err != nil { return errors.Wrap(err, "delete segment") } @@ -310,6 +321,7 @@ func (w *WAL) Repair(err error) error { return errors.Wrap(err, "open segment") } defer f.Close() + r := NewReader(bufio.NewReader(f)) for r.Next() { @@ -317,8 +329,14 @@ func (w *WAL) Repair(err error) error { return errors.Wrap(err, "insert record") } } - // We expect an error here, so nothing to handle. + // We expect an error here from r.Err(), so nothing to handle. + // We explicitly close even when there is a defer for Windows to be + // able to delete it. The defer is in place to close it in-case there + // are errors above. + if err := f.Close(); err != nil { + return errors.Wrap(err, "close corrupted file") + } if err := os.Remove(tmpfn); err != nil { return errors.Wrap(err, "delete corrupted segment") } diff --git a/vendor/vendor.json b/vendor/vendor.json index c145f77eed..8f555776d5 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -841,46 +841,46 @@ "revisionTime": "2016-04-11T19:08:41Z" }, { - "checksumSHA1": "E6jWeQZVEyeMLNq4ceHx9NRw4B8=", + "checksumSHA1": "JLELG+wyXa02O8G/RTLpfPijOE8=", "path": "github.com/prometheus/tsdb", - "revision": "dfcb7d0d5034b97de2abdf5369f4813e7fb7c07c", - "revisionTime": "2018-09-19T06:47:24Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { "checksumSHA1": "QI0UME2olSr4kH6Z8UkpffM59Mc=", "path": "github.com/prometheus/tsdb/chunkenc", - "revision": "06f01d45ad2ca2853c9dc1a0d5db6c75c8af6a5a", - "revisionTime": "2018-08-07T11:25:08Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { "checksumSHA1": "+5bPifRe479zdFeTYhZ+CZRLMgw=", "path": "github.com/prometheus/tsdb/chunks", - "revision": "06f01d45ad2ca2853c9dc1a0d5db6c75c8af6a5a", - "revisionTime": "2018-08-07T11:25:08Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { - "checksumSHA1": "bL3t5K2Q8e1GuM6gy5PAJ05go14=", + "checksumSHA1": "gOTpgLnuc2Ygwof6pCpLVedz00I=", "path": "github.com/prometheus/tsdb/fileutil", - "revision": "06f01d45ad2ca2853c9dc1a0d5db6c75c8af6a5a", - "revisionTime": "2018-08-07T11:25:08Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { - "checksumSHA1": "AZGFK4UtJe8/j8pHqGTNQ8wu27g=", + "checksumSHA1": "dd5JqbMKbRKI6XlGzrvEAuMUJY4=", "path": "github.com/prometheus/tsdb/index", - "revision": "06f01d45ad2ca2853c9dc1a0d5db6c75c8af6a5a", - "revisionTime": "2018-08-07T11:25:08Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { "checksumSHA1": "Va8HWvOFTwFeewZFadMAOzNGDps=", "path": "github.com/prometheus/tsdb/labels", - "revision": "06f01d45ad2ca2853c9dc1a0d5db6c75c8af6a5a", - "revisionTime": "2018-08-07T11:25:08Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { - "checksumSHA1": "6GXK7RnUngyM9OT/M2uzv8T3DOY=", + "checksumSHA1": "/NURBB5JOqg7fhWrVq8R8Sl3dAI=", "path": "github.com/prometheus/tsdb/wal", - "revision": "06f01d45ad2ca2853c9dc1a0d5db6c75c8af6a5a", - "revisionTime": "2018-08-07T11:25:08Z" + "revision": "9c8ca47399a7f53f57b440464c103f5067e9b7b6", + "revisionTime": "2018-09-21T05:31:22Z" }, { "checksumSHA1": "5SYLEhADhdBVZAGPVHWggQl7H8k=",