diff --git a/CHANGELOG.md b/CHANGELOG.md index 49a3affef..cbd749a9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,17 +1,17 @@ -## v2.0.0-beta.4 / 2017-09-14 +## v2.0.0-beta.5 / 2017-09-21 This release includes numerous changes to the new storage layer. The main changes are: -* [CHANGES] Single, compacted write ahead log -* [CHANGES] Single in-memory block with garbage collection -* [ENHANCEMENTS] Cache series dropped via `metric_relabel_configs` -* [ENHANCEMENTS] Pool byte buffers for scraping +* [BUGFIX] Remove deadlock on startup when restoring WAL +* [BUGFIX] Fix semantical races resulting in invalid persisted files +* [BUGFIX] Correctly read back WAL in certain edge cases +* [BUGFIX] Prevent crashes caused by changing metric representations in target's /metrics +* [ENHANCEMENT] Overall memory usage reduction +* [ENHANCEMENT] Serve debugging endpoints while TSDB is loading +* [ENHANCEMENT] Healthy endpoint correctly reflects liveness during startup +* [ENHANCEMENT] Switch to consistent usage of go-kit/log -Overall the changes achieve a baseline reduction in memory consumption and reduce -peak memory usage by 30-40% compared to the 2.0.0-beta.2 release. - -This release requires a clean storage directory and is not compatible with files -created by previous beta releases. +This release may have issues with files written by previous beta releases. ## 1.7.1 / 2017-06-12 diff --git a/VERSION b/VERSION index 20c4e2607..ec7f186d3 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0-beta.4 +2.0.0-beta.5 diff --git a/vendor/github.com/prometheus/tsdb/compact.go b/vendor/github.com/prometheus/tsdb/compact.go index a3bc7d17a..17a4da041 100644 --- a/vendor/github.com/prometheus/tsdb/compact.go +++ b/vendor/github.com/prometheus/tsdb/compact.go @@ -457,7 +457,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, indexr := b.Index() - all, err := indexr.Postings("", "") + all, err := indexr.Postings(allPostingsKey.Name, allPostingsKey.Value) if err != nil { return err } diff --git a/vendor/github.com/prometheus/tsdb/head.go b/vendor/github.com/prometheus/tsdb/head.go index a5ce94e45..72973c46c 100644 --- a/vendor/github.com/prometheus/tsdb/head.go +++ b/vendor/github.com/prometheus/tsdb/head.go @@ -190,6 +190,10 @@ func (h *Head) ReadWAL() error { r := h.wal.Reader() mint := h.MinTime() + // Track number of samples that referenced a series we don't know about + // for error reporting. + var unknownRefs int + seriesFunc := func(series []RefSeries) error { for _, s := range series { h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels) @@ -207,7 +211,7 @@ func (h *Head) ReadWAL() error { } ms := h.series.getByID(s.Ref) if ms == nil { - h.logger.Log("msg", "unknown series reference in WAL", "ref", s.Ref) + unknownRefs++ continue } _, chunkCreated := ms.append(s.T, s.V) @@ -230,6 +234,10 @@ func (h *Head) ReadWAL() error { return nil } + if unknownRefs > 0 { + h.logger.Log("msg", "unknown series references in WAL samples", "count", unknownRefs) + } + if err := r.Read(seriesFunc, samplesFunc, deletesFunc); err != nil { return errors.Wrap(err, "consume WAL") } @@ -267,12 +275,10 @@ func (h *Head) Truncate(mint int64) error { start = time.Now() - p, err := h.indexRange(mint, math.MaxInt64).Postings("", "") - if err != nil { - return err + keep := func(id uint64) bool { + return h.series.getByID(id) != nil } - - if err := h.wal.Truncate(mint, p); err == nil { + if err := h.wal.Truncate(mint, keep); err == nil { h.logger.Log("msg", "WAL truncation completed", "duration", time.Since(start)) } else { h.logger.Log("msg", "WAL truncation failed", "err", err, "duration", time.Since(start)) @@ -1038,8 +1044,6 @@ func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, boo return prev, false } s.hashes[i].set(hash, series) - - s.hashes[i][hash] = append(s.hashes[i][hash], series) s.locks[i].Unlock() i = series.ref & stripeMask diff --git a/vendor/github.com/prometheus/tsdb/postings.go b/vendor/github.com/prometheus/tsdb/postings.go index 97a29ab19..0e51b221b 100644 --- a/vendor/github.com/prometheus/tsdb/postings.go +++ b/vendor/github.com/prometheus/tsdb/postings.go @@ -45,7 +45,7 @@ func (p *memPostings) get(name, value string) Postings { return newListPostings(l) } -var allLabel = labels.Label{} +var allPostingsKey = labels.Label{} // add adds a document to the index. The caller has to ensure that no // term argument appears twice. @@ -53,13 +53,36 @@ func (p *memPostings) add(id uint64, lset labels.Labels) { p.mtx.Lock() for _, l := range lset { - p.m[l] = append(p.m[l], id) + p.addFor(id, l) } - p.m[allLabel] = append(p.m[allLabel], id) + p.addFor(id, allPostingsKey) p.mtx.Unlock() } +func (p *memPostings) addFor(id uint64, l labels.Label) { + list := append(p.m[l], id) + p.m[l] = list + + // There is no guarantee that no higher ID was inserted before as they may + // be generated independently before adding them to postings. + // We repair order violations on insert. The invariant is that the first n-1 + // items in the list are already sorted. + for i := len(list) - 1; i >= 1; i-- { + if list[i] >= list[i-1] { + break + } + list[i], list[i-1] = list[i-1], list[i] + } +} + +func expandPostings(p Postings) (res []uint64, err error) { + for p.Next() { + res = append(res, p.At()) + } + return res, p.Err() +} + // Postings provides iterative access over a postings list. type Postings interface { // Next advances the iterator and returns true if another value was found. diff --git a/vendor/github.com/prometheus/tsdb/wal.go b/vendor/github.com/prometheus/tsdb/wal.go index 27984ea0c..c52bca86d 100644 --- a/vendor/github.com/prometheus/tsdb/wal.go +++ b/vendor/github.com/prometheus/tsdb/wal.go @@ -71,7 +71,7 @@ type WAL interface { LogSeries([]RefSeries) error LogSamples([]RefSample) error LogDeletes([]Stone) error - Truncate(int64, Postings) error + Truncate(mint int64, keep func(uint64) bool) error Close() error } @@ -87,7 +87,7 @@ func (w nopWAL) Reader() WALReader { return w } func (nopWAL) LogSeries([]RefSeries) error { return nil } func (nopWAL) LogSamples([]RefSample) error { return nil } func (nopWAL) LogDeletes([]Stone) error { return nil } -func (nopWAL) Truncate(int64, Postings) error { return nil } +func (nopWAL) Truncate(int64, func(uint64) bool) error { return nil } func (nopWAL) Close() error { return nil } // WALReader reads entries from a WAL. @@ -272,8 +272,9 @@ func (w *SegmentWAL) putBuffer(b *encbuf) { w.buffers.Put(b) } -// Truncate deletes the values prior to mint and the series entries not in p. -func (w *SegmentWAL) Truncate(mint int64, p Postings) error { +// Truncate deletes the values prior to mint and the series which the keep function +// does not indiciate to preserve. +func (w *SegmentWAL) Truncate(mint int64, keep func(uint64) bool) error { // The last segment is always active. if len(w.files) < 2 { return nil @@ -314,7 +315,6 @@ func (w *SegmentWAL) Truncate(mint int64, p Postings) error { activeSeries = []RefSeries{} ) -Loop: for r.next() { rt, flag, byt := r.at() @@ -328,10 +328,7 @@ Loop: activeSeries = activeSeries[:0] for _, s := range series { - if !p.Seek(s.Ref) { - break Loop - } - if p.At() == s.Ref { + if keep(s.Ref) { activeSeries = append(activeSeries, s) } } diff --git a/vendor/vendor.json b/vendor/vendor.json index 2b639724e..2ab1a537a 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -871,22 +871,22 @@ "revisionTime": "2016-04-11T19:08:41Z" }, { - "checksumSHA1": "B5ndMoK8lqgFJ8xUZ/0V4zCpUw0=", + "checksumSHA1": "evkeOdR0mTFS7yyREas6oa1QvHY=", "path": "github.com/prometheus/tsdb", - "revision": "162a48e4f2c6e486a0ebf61cf9cea73a8023ef0a", - "revisionTime": "2017-09-19T08:20:19Z" + "revision": "69f105f4f9478e929ef2a7d7553a7558b1de5c84", + "revisionTime": "2017-09-21T12:57:51Z" }, { "checksumSHA1": "Gua979gmISm4cJP/fR2hL8m5To8=", "path": "github.com/prometheus/tsdb/chunks", - "revision": "162a48e4f2c6e486a0ebf61cf9cea73a8023ef0a", - "revisionTime": "2017-09-19T08:20:19Z" + "revision": "69f105f4f9478e929ef2a7d7553a7558b1de5c84", + "revisionTime": "2017-09-21T12:57:51Z" }, { "checksumSHA1": "zhmlvc322RH1L3l9DaA9d/HVVWs=", "path": "github.com/prometheus/tsdb/labels", - "revision": "162a48e4f2c6e486a0ebf61cf9cea73a8023ef0a", - "revisionTime": "2017-09-19T08:20:19Z" + "revision": "69f105f4f9478e929ef2a7d7553a7558b1de5c84", + "revisionTime": "2017-09-21T12:57:51Z" }, { "checksumSHA1": "5SYLEhADhdBVZAGPVHWggQl7H8k=",