From 5579efbd5bb3a478ae1d849ab3ae6024b4ab43eb Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Sun, 14 May 2017 14:36:26 +0530 Subject: [PATCH 01/25] Initial implentation of Deletes on persistedBlock Very much a WIP Signed-off-by: Goutham Veeramachaneni --- block.go | 162 ++++++++++++++++++++++++++++++++++++++++++++ chunks.go | 64 +++++++++++++++++ compact.go | 9 +++ encoding_helpers.go | 6 +- head.go | 20 +++++- index.go | 29 ++++++++ 6 files changed, 285 insertions(+), 5 deletions(-) diff --git a/block.go b/block.go index 72ebb1f8a8..40126a3624 100644 --- a/block.go +++ b/block.go @@ -14,14 +14,17 @@ package tsdb import ( + "bufio" "encoding/json" "fmt" "io/ioutil" "os" "path/filepath" + "sort" "github.com/oklog/ulid" "github.com/pkg/errors" + "github.com/prometheus/tsdb/labels" ) // DiskBlock handles reads against a Block of time series data. @@ -38,6 +41,9 @@ type DiskBlock interface { // Chunks returns a ChunkReader over the block's data. Chunks() ChunkReader + // Delete deletes data from the block. + Delete(mint, maxt int64, ms ...labels.Matcher) error + // Close releases all underlying resources of the block. Close() error } @@ -106,6 +112,7 @@ type blockMeta struct { } const metaFilename = "meta.json" +const tombstoneFilename = "tombstones" func readMetaFile(dir string) (*BlockMeta, error) { b, err := ioutil.ReadFile(filepath.Join(dir, metaFilename)) @@ -207,6 +214,161 @@ func (pb *persistedBlock) Index() IndexReader { return pb.indexr } func (pb *persistedBlock) Chunks() ChunkReader { return pb.chunkr } func (pb *persistedBlock) Meta() BlockMeta { return pb.meta } +func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { + pr := newPostingsReader(pb.indexr) + p, absent := pr.Select(ms...) + + ir := pb.indexr + + // Choose only valid postings which have chunks in the time-range. + vPostings := []uint32{} + +Outer: + for p.Next() { + lset, chunks, err := ir.Series(p.At()) + + for _, abs := range absent { + if lset.Get(abs) != "" { + continue Outer + } + } + + // XXX(gouthamve): Adjust mint and maxt to match the time-range in the chunks? + for _, chk := range chunks { + if (mint <= chk.MinTime && maxt >= MinTime) || + (mint > chk.MinTime && mint <= chk.MaxTime) { + vPostings = append(vPostings, p.At()) + continue + } + } + } + + if p.Err() != nil { + return p.Err() + } + + // Merge the current and new tombstones. + tr := ir.tombstones() + stones := make([]rip, 0, len(vPostings)) + i := 0 + for tr.Next() { + stone := tr.At() + for stone.ref > vPostings[i] { + stones = append(stones, rip{ref: vPostings[i], mint: mint, maxt: maxt}) + i++ + } + + if stone.ref == vPostings[i] { + if stone.mint > mint { + stone.mint = mint + } + if stone.maxt < maxt { + stone.maxt = maxt + } + + stones = append(stones, stone) + continue + } + + stones = append(stones, stone) + } + + path := filepath.Join(pb.dir, tombstoneFilename) + tmp := path + ".tmp" + + f, err := os.Create(tmp) + if err != nil { + return err + } + + // TODO: Proper format and all. + buf := encbuf{b: make([]byte, 0, 20)} + fbuf := bufio.NewWriterSize(f, 20) + + for _, stone := range stones { + buf.reset() + buf.putBE32(stone.ref) + buf.putBE64int64(stone.mint) + buf.putBE64int64(stone.maxt) + + _, err := fbuf.Write(buf.get()) + if err != nil { + return err + } + } + + if err := fbuf.Flush(); err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + + return renameFile(tmp, path) +} + +// rip (after rest-in-peace) holds the information on the posting and time-range +// that is deleted. +type rip struct { + ref uint32 + mint, maxt int64 +} + +// TODO(gouthamve): Move to cur and reduction in byte-array vis-a-vis BEPostings. +type tombstoneReader struct { + data []byte + idx int + len int +} + +func newTombStoneReader(data []byte) *tombstoneReader { + // TODO(gouthamve): Error handling. + return &tombstoneReader{data: data, idx: -1, len: len(data) / 20} +} + +func (t *tombstoneReader) Next() bool { + t.idx++ + + return t.idx < t.len +} + +func (t *tombstoneReader) At() rip { + bytIdx := t.idx * (4 + 8 + 8) + dat := t.data[bytIdx : bytIdx+20] + + db := &decbuf{b: dat} + ref := db.be32() + mint := db.be64int64() + maxt := db.be64int64() + + // TODO(gouthamve): Handle errors. + return rip{ref: ref, mint: mint, maxt: maxt} +} + +func (t *tombstoneReader) Seek(ref uint32) bool { + if s := t.At(); s.ref >= ref { + return true + } + + i := sort.Search(t.len-t.idx, func(i int) bool { + bytIdx := (t.idx + i) * 20 + dat := t.data[bytIdx : bytIdx+20] + + db := &decbuf{b: dat} + ref2 := db.be32() + if ref >= ref2 { + return true + } + }) + + t.idx += idx + return t.idx < t.len +} + +func (t *tombstoneReader) Err() error { + return nil +} + func chunkDir(dir string) string { return filepath.Join(dir, "chunks") } func walDir(dir string) string { return filepath.Join(dir, "wal") } diff --git a/chunks.go b/chunks.go index 77663359cf..1edb6d2ea1 100644 --- a/chunks.go +++ b/chunks.go @@ -41,6 +41,18 @@ type ChunkMeta struct { Chunk chunks.Chunk MinTime, MaxTime int64 // time range the data covers + + // To handle deleted time-ranges. + deleted bool + dranges []trange +} + +type trange struct { + mint, maxt int64 +} + +func (tr trange) inBounds(t int64) bool { + return t >= tr.mint && t <= tr.maxt } // writeHash writes the chunk encoding and raw data into the provided hash. @@ -54,6 +66,58 @@ func (cm *ChunkMeta) writeHash(h hash.Hash) error { return nil } +// Iterator returns a chunks.Iterator that honors any deleted ranges. +// If there is no deleted range then the underlying iterator is returned. +func (cm *ChunkMeta) Iterator() chunks.Iterator { + if cm.Chunk == nil { + return nil + } + + if cm.deleted { + return &deletedIterator{it: cm.Chunk.Iterator(), dranges: cm.dranges} + } + + return cm.Chunk.Iterator() +} + +// deletedIterator wraps an Iterator and makes sure any deleted metrics are not +// returned. +type deletedIterator struct { + it chunks.Iterator + + dranges []trange +} + +func (it *deletedIterator) At() (int64, float64) { + return it.it.At() +} + +func (it *deletedIterator) Next() bool { +Outer: + for it.it.Next() { + ts, _ := it.it.At() + for _, tr := range it.dranges { + if tr.inBounds(ts) { + continue Outer + } + if ts > tr.maxt { + it.dranges = it.dranges[1:] + continue + } + + return true + } + + return true + } + + return false +} + +func (it *deletedIterator) Err() { + return it.Err() +} + // ChunkWriter serializes a time block of chunked series data. type ChunkWriter interface { // WriteChunks writes several chunks. The Chunk field of the ChunkMetas diff --git a/compact.go b/compact.go index 938697419f..39e780b03e 100644 --- a/compact.go +++ b/compact.go @@ -240,6 +240,15 @@ func (c *compactor) write(dir string, blocks ...Block) (err error) { return errors.Wrap(err, "close index writer") } + // Create an empty tombstones file. + tf, err := os.Create(filepath.Join(tmp, tombstoneFilename)) + if err != nil { + return errors.Wrap(err, "touch tombstones file") + } + if err := tf.Close(); err != nil { + return errors.Wrap(err, "close tombstones file") + } + // Block successfully written, make visible and remove old ones. if err := renameFile(tmp, dir); err != nil { return errors.Wrap(err, "rename block dir") diff --git a/encoding_helpers.go b/encoding_helpers.go index 91f73a54c7..486930d222 100644 --- a/encoding_helpers.go +++ b/encoding_helpers.go @@ -22,6 +22,7 @@ func (e *encbuf) putByte(c byte) { e.b = append(e.b, c) } func (e *encbuf) putBE32int(x int) { e.putBE32(uint32(x)) } func (e *encbuf) putBE64int(x int) { e.putBE64(uint64(x)) } +func (e *encbuf) putBE64int64(x int64) { e.putBE64(uint64(x)) } func (e *encbuf) putUvarint32(x uint32) { e.putUvarint64(uint64(x)) } func (e *encbuf) putUvarint(x int) { e.putUvarint64(uint64(x)) } @@ -71,8 +72,9 @@ type decbuf struct { e error } -func (d *decbuf) uvarint() int { return int(d.uvarint64()) } -func (d *decbuf) be32int() int { return int(d.be32()) } +func (d *decbuf) uvarint() int { return int(d.uvarint64()) } +func (d *decbuf) be32int() int { return int(d.be32()) } +func (d *decbuf) be64int64() int { return int64(d.be64()) } func (d *decbuf) uvarintStr() string { l := d.uvarint64() diff --git a/head.go b/head.go index b71bbafc09..1c7e90d97e 100644 --- a/head.go +++ b/head.go @@ -176,6 +176,7 @@ func (h *HeadBlock) Close() error { return nil } +// Meta implements headBlock func (h *HeadBlock) Meta() BlockMeta { m := BlockMeta{ ULID: h.meta.ULID, @@ -192,11 +193,22 @@ func (h *HeadBlock) Meta() BlockMeta { return m } -func (h *HeadBlock) Dir() string { return h.dir } -func (h *HeadBlock) Persisted() bool { return false } -func (h *HeadBlock) Index() IndexReader { return &headIndexReader{h} } +// Dir implements headBlock +func (h *HeadBlock) Dir() string { return h.dir } + +// Persisted implements headBlock +func (h *HeadBlock) Persisted() bool { return false } + +// Index implements headBlock +func (h *HeadBlock) Index() IndexReader { return &headIndexReader{h} } + +// Chunks implements headBlock func (h *HeadBlock) Chunks() ChunkReader { return &headChunkReader{h} } +// Delete implements headBlock +func (h *HeadBlock) Delete(int64, int64, ...labels.Matcher) error { return nil } + +// Querier implements Queryable and headBlock func (h *HeadBlock) Querier(mint, maxt int64) Querier { h.mtx.RLock() defer h.mtx.RUnlock() @@ -236,6 +248,7 @@ func (h *HeadBlock) Querier(mint, maxt int64) Querier { } } +// Appender implements headBlock func (h *HeadBlock) Appender() Appender { atomic.AddUint64(&h.activeWriters, 1) @@ -247,6 +260,7 @@ func (h *HeadBlock) Appender() Appender { return &headAppender{HeadBlock: h, samples: getHeadAppendBuffer()} } +// Busy implements headBlock func (h *HeadBlock) Busy() bool { return atomic.LoadUint64(&h.activeWriters) > 0 } diff --git a/index.go b/index.go index c0e96381f3..051555dd21 100644 --- a/index.go +++ b/index.go @@ -537,6 +537,9 @@ type indexReader struct { // Cached hashmaps of section offsets. labels map[string]uint32 postings map[string]uint32 + + // The underlying byte slice holding the tombstone data. + tomb []byte } var ( @@ -573,6 +576,12 @@ func newIndexReader(dir string) (*indexReader, error) { return nil, errors.Wrap(err, "read postings table") } + tf, err := openMmapFile(filepath.Join(dir, tombstoneFilename)) + if err != nil { + return err + } + r.tomb = tf.b + return r, nil } @@ -741,6 +750,18 @@ func (r *indexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error) { lbls = append(lbls, labels.Label{Name: ln, Value: lv}) } + // TODO: This sucks! Put tombstones in map. + tr := r.tombstones() + dmint, dmaxt := 0 + del := false + if tr.Seek(ref) { + s := tr.At() + if s.ref == ref { + del = true + dmint, dmaxt = s.mint, s.maxt + } + } + // Read the chunks meta data. k = int(d2.uvarint()) chunks := make([]*ChunkMeta, 0, k) @@ -754,10 +775,14 @@ func (r *indexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error) { return nil, nil, errors.Wrapf(d2.err(), "read meta for chunk %d", i) } + // TODO(gouthamve): Donot add the chunk if its completely deleted. chunks = append(chunks, &ChunkMeta{ Ref: off, MinTime: mint, MaxTime: maxt, + + deleted: del, + dranges: []trange{{dmint, dmaxt}}, }) } @@ -789,6 +814,10 @@ func (r *indexReader) Postings(name, value string) (Postings, error) { return newBigEndianPostings(d2.get()), nil } +func (r *indexReader) tombstones() *tombstoneReader { + return newTombStoneReader(r.tomb[:]) +} + type stringTuples struct { l int // tuple length s []string // flattened tuple entries From 4f1d857590c110ba60e2c3a4b3c7e5b1bb44eaf9 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Mon, 15 May 2017 23:28:14 +0530 Subject: [PATCH 02/25] Implement Delete on HeadBlock Signed-off-by: Goutham Veeramachaneni --- block.go | 378 +++++++++++++++++++++++++++++++++----------- chunks.go | 55 ++++++- encoding_helpers.go | 6 +- head.go | 78 +++++++-- index.go | 39 ++--- 5 files changed, 425 insertions(+), 131 deletions(-) diff --git a/block.go b/block.go index 40126a3624..365d2f6dc6 100644 --- a/block.go +++ b/block.go @@ -14,7 +14,7 @@ package tsdb import ( - "bufio" + "encoding/binary" "encoding/json" "fmt" "io/ioutil" @@ -155,6 +155,79 @@ func writeMetaFile(dir string, meta *BlockMeta) error { return renameFile(tmp, path) } +func readTombstoneFile(dir string) (TombstoneReader, error) { + return newTombStoneReader(dir) +} + +func writeTombstoneFile(dir string, tr TombstoneReader) error { + path := filepath.Join(dir, tombstoneFilename) + tmp := path + ".tmp" + + f, err := os.Create(tmp) + if err != nil { + return err + } + + stoneOff := make(map[uint32]int64) // The map that holds the ref to offset vals. + refs := []uint32{} // Sorted refs. + + pos := int64(0) + buf := encbuf{b: make([]byte, 2*binary.MaxVarintLen64)} + for tr.Next() { + s := tr.At() + + refs = append(refs, s.ref) + stoneOff[s.ref] = pos + + // Write the ranges. + buf.reset() + buf.putVarint64(int64(len(s.ranges))) + n, err := f.Write(buf.get()) + if err != nil { + return err + } + pos += int64(n) + + for _, r := range s.ranges { + buf.reset() + buf.putVarint64(r.mint) + buf.putVarint64(r.maxt) + n, err = f.Write(buf.get()) + if err != nil { + return err + } + pos += int64(n) + } + } + + // Write the offset table. + buf.reset() + buf.putBE32int(len(refs)) + for _, ref := range refs { + buf.reset() + buf.putBE32(ref) + buf.putBE64int64(stoneOff[ref]) + _, err = f.Write(buf.get()) + if err != nil { + return err + } + } + + // Write the offset to the offset table. + buf.reset() + buf.putBE64int64(pos) + _, err = f.Write(buf.get()) + if err != nil { + return err + } + + if err := f.Close(); err != nil { + return err + } + + return renameFile(tmp, path) +} + type persistedBlock struct { dir string meta BlockMeta @@ -226,6 +299,9 @@ func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { Outer: for p.Next() { lset, chunks, err := ir.Series(p.At()) + if err != nil { + return err + } for _, abs := range absent { if lset.Get(abs) != "" { @@ -235,10 +311,10 @@ Outer: // XXX(gouthamve): Adjust mint and maxt to match the time-range in the chunks? for _, chk := range chunks { - if (mint <= chk.MinTime && maxt >= MinTime) || + if (mint <= chk.MinTime && maxt >= chk.MinTime) || (mint > chk.MinTime && mint <= chk.MaxTime) { vPostings = append(vPostings, p.At()) - continue + continue Outer } } } @@ -248,127 +324,239 @@ Outer: } // Merge the current and new tombstones. - tr := ir.tombstones() - stones := make([]rip, 0, len(vPostings)) - i := 0 - for tr.Next() { - stone := tr.At() - for stone.ref > vPostings[i] { - stones = append(stones, rip{ref: vPostings[i], mint: mint, maxt: maxt}) - i++ - } + tr := newMapTombstoneReader(ir.tombstones) + str := newSimpleTombstoneReader(vPostings, []trange{mint, maxt}) + tombreader := newMergedTombstoneReader(tr, str) - if stone.ref == vPostings[i] { - if stone.mint > mint { - stone.mint = mint - } - if stone.maxt < maxt { - stone.maxt = maxt - } - - stones = append(stones, stone) - continue - } - - stones = append(stones, stone) - } - - path := filepath.Join(pb.dir, tombstoneFilename) - tmp := path + ".tmp" - - f, err := os.Create(tmp) - if err != nil { - return err - } - - // TODO: Proper format and all. - buf := encbuf{b: make([]byte, 0, 20)} - fbuf := bufio.NewWriterSize(f, 20) - - for _, stone := range stones { - buf.reset() - buf.putBE32(stone.ref) - buf.putBE64int64(stone.mint) - buf.putBE64int64(stone.maxt) - - _, err := fbuf.Write(buf.get()) - if err != nil { - return err - } - } - - if err := fbuf.Flush(); err != nil { - return err - } - if err := f.Close(); err != nil { - return err - } - - return renameFile(tmp, path) + return writeTombstoneFile(pb.dir, tombreader) } -// rip (after rest-in-peace) holds the information on the posting and time-range +// stone holds the information on the posting and time-range // that is deleted. -type rip struct { - ref uint32 - mint, maxt int64 +type stone struct { + ref uint32 + ranges []trange } -// TODO(gouthamve): Move to cur and reduction in byte-array vis-a-vis BEPostings. +// TombstoneReader is the iterator over tombstones. +type TombstoneReader interface { + Next() bool + At() stone + Err() error +} + +var emptyTombstoneReader = newMapTombstoneReader(make(map[uint32][]trange)) + type tombstoneReader struct { - data []byte - idx int - len int + stones []byte + idx int + len int + + b []byte + err error } -func newTombStoneReader(data []byte) *tombstoneReader { - // TODO(gouthamve): Error handling. - return &tombstoneReader{data: data, idx: -1, len: len(data) / 20} +func newTombStoneReader(dir string) (*tombstoneReader, error) { + // TODO(gouthamve): MMAP? + b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename)) + if err != nil { + return nil, err + } + + offsetBytes := b[len(b)-8:] + d := &decbuf{b: offsetBytes} + off := d.be64int64() + if err := d.err(); err != nil { + return nil, err + } + + d = &decbuf{b: b[off:]} + numStones := d.be64int64() + if err := d.err(); err != nil { + return nil, err + } + + return &tombstoneReader{ + stones: b[off+8 : (off+8)+(numStones*12)], + idx: -1, + len: int(numStones), + + b: b, + }, nil } func (t *tombstoneReader) Next() bool { + if t.err != nil { + return false + } + t.idx++ return t.idx < t.len } -func (t *tombstoneReader) At() rip { - bytIdx := t.idx * (4 + 8 + 8) - dat := t.data[bytIdx : bytIdx+20] +func (t *tombstoneReader) At() stone { + bytIdx := t.idx * (4 + 8) + dat := t.stones[bytIdx : bytIdx+12] - db := &decbuf{b: dat} - ref := db.be32() - mint := db.be64int64() - maxt := db.be64int64() + d := &decbuf{b: dat} + ref := d.be32() + off := d.be64int64() - // TODO(gouthamve): Handle errors. - return rip{ref: ref, mint: mint, maxt: maxt} -} - -func (t *tombstoneReader) Seek(ref uint32) bool { - if s := t.At(); s.ref >= ref { - return true + d = &decbuf{b: t.b[off:]} + numRanges := d.varint64() + if err := d.err(); err != nil { + t.err = err + return stone{ref: ref} } - i := sort.Search(t.len-t.idx, func(i int) bool { - bytIdx := (t.idx + i) * 20 - dat := t.data[bytIdx : bytIdx+20] - - db := &decbuf{b: dat} - ref2 := db.be32() - if ref >= ref2 { - return true + dranges := make([]trange, 0, numRanges) + for i := 0; i < int(numRanges); i++ { + mint := d.varint64() + maxt := d.varint64() + if err := d.err(); err != nil { + t.err = err + return stone{ref: ref, ranges: dranges} } - }) - t.idx += idx - return t.idx < t.len + dranges = append(dranges, trange{mint, maxt}) + } + + return stone{ref: ref, ranges: dranges} } func (t *tombstoneReader) Err() error { + return t.err +} + +type mapTombstoneReader struct { + refs []uint32 + cur uint32 + + stones map[uint32][]trange +} + +func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { + refs := make([]uint32, 0, len(ts)) + for k := range ts { + refs = append(refs, k) + } + sort.Sort(uint32slice(refs)) + return &mapTombstoneReader{stones: ts, refs: refs} +} + +func (t *mapTombstoneReader) Next() bool { + if len(t.refs) > 0 { + t.cur = t.refs[0] + return true + } + + t.cur = 0 + return false +} + +func (t *mapTombstoneReader) At() stone { + return stone{ref: t.cur, ranges: t.stones[t.cur]} +} + +func (t *mapTombstoneReader) Err() error { return nil } +type simpleTombstoneReader struct { + refs []uint32 + cur uint32 + + ranges []trange +} + +func newSimpleTombstoneReader(refs []uint32, drange []trange) *simpleTombstoneReader { + return &simpleTombstoneReader{refs: refs, ranges: drange} +} + +func (t *simpleTombstoneReader) Next() bool { + if len(t.refs) > 0 { + t.cur = t.refs[0] + return true + } + + t.cur = 0 + return false +} + +func (t *simpleTombstoneReader) At() stone { + return stone{ref: t.cur, ranges: t.ranges} +} + +func (t *simpleTombstoneReader) Err() error { + return nil +} + +type mergedTombstoneReader struct { + a, b TombstoneReader + cur stone + + initialized bool + aok, bok bool +} + +func newMergedTombstoneReader(a, b TombstoneReader) *mergedTombstoneReader { + return &mergedTombstoneReader{a: a, b: b} +} + +func (t *mergedTombstoneReader) Next() bool { + if !t.initialized { + t.aok = t.a.Next() + t.bok = t.b.Next() + t.initialized = true + } + + if !t.aok && !t.bok { + return false + } + + if !t.aok { + t.cur = t.b.At() + t.bok = t.b.Next() + return true + } + if !t.bok { + t.cur = t.a.At() + t.aok = t.a.Next() + return true + } + + acur, bcur := t.a.At(), t.b.At() + + if acur.ref < bcur.ref { + t.cur = acur + t.aok = t.a.Next() + } else if acur.ref > bcur.ref { + t.cur = bcur + t.bok = t.b.Next() + } else { + t.cur = acur + // Merge time ranges. + for _, r := range bcur.ranges { + acur.ranges = addNewInterval(acur.ranges, r) + } + t.aok = t.a.Next() + t.bok = t.b.Next() + } + return true +} + +func (t *mergedTombstoneReader) At() stone { + return t.cur +} + +func (t *mergedTombstoneReader) Err() error { + if t.a.Err() != nil { + return t.a.Err() + } + return t.b.Err() +} + func chunkDir(dir string) string { return filepath.Join(dir, "chunks") } func walDir(dir string) string { return filepath.Join(dir, "wal") } diff --git a/chunks.go b/chunks.go index 1edb6d2ea1..5e89267e2b 100644 --- a/chunks.go +++ b/chunks.go @@ -55,6 +55,38 @@ func (tr trange) inBounds(t int64) bool { return t >= tr.mint && t <= tr.maxt } +// This adds the new time-range to the existing ones. +// The existing ones must be sorted. +func addNewInterval(existing []trange, n trange) []trange { + for i, r := range existing { + if r.inBounds(n.mint) { + if n.maxt > r.maxt { + existing[i].maxt = n.maxt + } + + return existing + } + if r.inBounds(n.maxt) { + if n.mint < r.maxt { + existing[i].mint = n.mint + } + + return existing + } + + if n.mint < r.mint { + newRange := existing[:i] + newRange = append(newRange, n) + newRange = append(newRange, existing[i:]...) + + return newRange + } + } + + existing = append(existing, n) + return existing +} + // writeHash writes the chunk encoding and raw data into the provided hash. func (cm *ChunkMeta) writeHash(h hash.Hash) error { if _, err := h.Write([]byte{byte(cm.Chunk.Encoding())}); err != nil { @@ -114,7 +146,7 @@ Outer: return false } -func (it *deletedIterator) Err() { +func (it *deletedIterator) Err() error { return it.Err() } @@ -252,6 +284,27 @@ func (w *chunkWriter) WriteChunks(chks ...*ChunkMeta) error { maxLen := int64(binary.MaxVarintLen32) // The number of chunks. for _, c := range chks { maxLen += binary.MaxVarintLen32 + 1 // The number of bytes in the chunk and its encoding. + + // Remove the deleted parts. + if c.deleted { + // TODO(gouthamve): Try to do it in-place somehow? + chk := chunks.NewXORChunk() + app, err := chk.Appender() + if err != nil { + return err + } + it := c.Iterator() + for it.Next() { + ts, v := it.At() + app.Append(ts, v) + } + + if err := it.Err(); err != nil { + return err + } + c.Chunk = chk + } + maxLen += int64(len(c.Chunk.Bytes())) } newsz := w.n + maxLen diff --git a/encoding_helpers.go b/encoding_helpers.go index 486930d222..50189e0bbe 100644 --- a/encoding_helpers.go +++ b/encoding_helpers.go @@ -72,9 +72,9 @@ type decbuf struct { e error } -func (d *decbuf) uvarint() int { return int(d.uvarint64()) } -func (d *decbuf) be32int() int { return int(d.be32()) } -func (d *decbuf) be64int64() int { return int64(d.be64()) } +func (d *decbuf) uvarint() int { return int(d.uvarint64()) } +func (d *decbuf) be32int() int { return int(d.be32()) } +func (d *decbuf) be64int64() int64 { return int64(d.be64()) } func (d *decbuf) uvarintStr() string { l := d.uvarint64() diff --git a/head.go b/head.go index 1c7e90d97e..4b59a9d749 100644 --- a/head.go +++ b/head.go @@ -66,6 +66,8 @@ type HeadBlock struct { values map[string]stringset // label names to possible values postings *memPostings // postings lists for terms + tombstones map[uint32][]trange + meta BlockMeta } @@ -94,6 +96,7 @@ func TouchHeadBlock(dir string, seq int, mint, maxt int64) error { }); err != nil { return err } + return renameFile(tmp, dir) } @@ -105,13 +108,14 @@ func OpenHeadBlock(dir string, l log.Logger, wal WAL) (*HeadBlock, error) { } h := &HeadBlock{ - dir: dir, - wal: wal, - series: []*memSeries{nil}, // 0 is not a valid posting, filled with nil. - hashes: map[uint64][]*memSeries{}, - values: map[string]stringset{}, - postings: &memPostings{m: make(map[term][]uint32)}, - meta: *meta, + dir: dir, + wal: wal, + series: []*memSeries{nil}, // 0 is not a valid posting, filled with nil. + hashes: map[uint64][]*memSeries{}, + values: map[string]stringset{}, + postings: &memPostings{m: make(map[term][]uint32)}, + meta: *meta, + tombstones: make(map[uint32][]trange), } return h, h.init() } @@ -138,7 +142,20 @@ func (h *HeadBlock) init() error { h.meta.Stats.NumSamples++ } } - return errors.Wrap(r.Err(), "consume WAL") + if err := r.Err(); err != nil { + return errors.Wrap(err, "consume WAL") + } + + tr, err := readTombstoneFile(h.dir) + if err != nil { + return errors.Wrap(err, "read tombstones file") + } + + for tr.Next() { + s := tr.At() + h.tombstones[s.ref] = s.ranges + } + return errors.Wrap(err, "tombstones reader iteration") } // inBounds returns true if the given timestamp is within the valid @@ -206,7 +223,44 @@ func (h *HeadBlock) Index() IndexReader { return &headIndexReader{h} } func (h *HeadBlock) Chunks() ChunkReader { return &headChunkReader{h} } // Delete implements headBlock -func (h *HeadBlock) Delete(int64, int64, ...labels.Matcher) error { return nil } +func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error { + h.mtx.RLock() + + ir := h.Index() + + pr := newPostingsReader(ir) + p, absent := pr.Select(ms...) + + h.mtx.RUnlock() + + h.mtx.Lock() // We are modifying the tombstones here. + defer h.mtx.Unlock() + +Outer: + for p.Next() { + ref := p.At() + lset := h.series[ref].lset + for _, abs := range absent { + if lset.Get(abs) != "" { + continue Outer + } + } + + rs, ok := h.tombstones[ref] + if !ok { + h.tombstones[ref] = []trange{{mint, maxt}} + continue + } + + h.tombstones[ref] = addNewInterval(rs, trange{mint, maxt}) + } + + if p.Err() != nil { + return p.Err() + } + + return writeTombstoneFile(h.dir, newMapTombstoneReader(h.tombstones)) +} // Querier implements Queryable and headBlock func (h *HeadBlock) Querier(mint, maxt int64) Querier { @@ -527,6 +581,9 @@ func (h *headIndexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error if int(ref) >= len(h.series) { return nil, nil, ErrNotFound } + + dranges, deleted := h.tombstones[ref] + s := h.series[ref] metas := make([]*ChunkMeta, 0, len(s.chunks)) @@ -538,6 +595,9 @@ func (h *headIndexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error MinTime: c.minTime, MaxTime: c.maxTime, Ref: (uint64(ref) << 32) | uint64(i), + + deleted: deleted, + dranges: dranges, }) } diff --git a/index.go b/index.go index 051555dd21..3536468185 100644 --- a/index.go +++ b/index.go @@ -165,6 +165,10 @@ func newIndexWriter(dir string) (*indexWriter, error) { if err := iw.writeMeta(); err != nil { return nil, err } + // TODO(gouthamve): Figure out where this function goes, index or block. + if err := writeTombstoneFile(dir, emptyTombstoneReader); err != nil { + return nil, err + } return iw, nil } @@ -538,8 +542,7 @@ type indexReader struct { labels map[string]uint32 postings map[string]uint32 - // The underlying byte slice holding the tombstone data. - tomb []byte + tombstones map[uint32][]trange } var ( @@ -576,13 +579,17 @@ func newIndexReader(dir string) (*indexReader, error) { return nil, errors.Wrap(err, "read postings table") } - tf, err := openMmapFile(filepath.Join(dir, tombstoneFilename)) + tr, err := readTombstoneFile(dir) if err != nil { - return err + return r, err + } + r.tombstones = make(map[uint32][]trange) + for tr.Next() { + s := tr.At() + r.tombstones[s.ref] = s.ranges } - r.tomb = tf.b - return r, nil + return r, tr.Err() } func (r *indexReader) readTOC() error { @@ -750,17 +757,7 @@ func (r *indexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error) { lbls = append(lbls, labels.Label{Name: ln, Value: lv}) } - // TODO: This sucks! Put tombstones in map. - tr := r.tombstones() - dmint, dmaxt := 0 - del := false - if tr.Seek(ref) { - s := tr.At() - if s.ref == ref { - del = true - dmint, dmaxt = s.mint, s.maxt - } - } + s, deleted := r.tombstones[ref] // Read the chunks meta data. k = int(d2.uvarint()) @@ -781,8 +778,8 @@ func (r *indexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error) { MinTime: mint, MaxTime: maxt, - deleted: del, - dranges: []trange{{dmint, dmaxt}}, + deleted: deleted, + dranges: s, }) } @@ -814,10 +811,6 @@ func (r *indexReader) Postings(name, value string) (Postings, error) { return newBigEndianPostings(d2.get()), nil } -func (r *indexReader) tombstones() *tombstoneReader { - return newTombStoneReader(r.tomb[:]) -} - type stringTuples struct { l int // tuple length s []string // flattened tuple entries From 3de55171d326004ed64046594cd9fca5a0b0c4ee Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Tue, 16 May 2017 12:43:33 +0530 Subject: [PATCH 03/25] Add tests for tombstones and deletedIterator Signed-off-by: Goutham Veeramachaneni --- block.go | 14 +++++-- block_test.go | 47 +++++++++++++++++++++ chunks.go | 85 +++++++++++++++++++------------------- chunks_test.go | 109 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 44 deletions(-) create mode 100644 block_test.go diff --git a/block.go b/block.go index 365d2f6dc6..94f57f2060 100644 --- a/block.go +++ b/block.go @@ -199,10 +199,17 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { pos += int64(n) } } + if err := tr.Err(); err != nil { + return err + } // Write the offset table. buf.reset() buf.putBE32int(len(refs)) + if _, err := f.Write(buf.get()); err != nil { + return err + } + for _, ref := range refs { buf.reset() buf.putBE32(ref) @@ -325,7 +332,7 @@ Outer: // Merge the current and new tombstones. tr := newMapTombstoneReader(ir.tombstones) - str := newSimpleTombstoneReader(vPostings, []trange{mint, maxt}) + str := newSimpleTombstoneReader(vPostings, []trange{{mint, maxt}}) tombreader := newMergedTombstoneReader(tr, str) return writeTombstoneFile(pb.dir, tombreader) @@ -371,13 +378,13 @@ func newTombStoneReader(dir string) (*tombstoneReader, error) { } d = &decbuf{b: b[off:]} - numStones := d.be64int64() + numStones := d.be32int() if err := d.err(); err != nil { return nil, err } return &tombstoneReader{ - stones: b[off+8 : (off+8)+(numStones*12)], + stones: b[off+4:], idx: -1, len: int(numStones), @@ -448,6 +455,7 @@ func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { func (t *mapTombstoneReader) Next() bool { if len(t.refs) > 0 { t.cur = t.refs[0] + t.refs = t.refs[1:] return true } diff --git a/block_test.go b/block_test.go new file mode 100644 index 0000000000..491c1c1387 --- /dev/null +++ b/block_test.go @@ -0,0 +1,47 @@ +package tsdb + +import ( + "io/ioutil" + "math/rand" + "os" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestWriteAndReadbackTombStones(t *testing.T) { + tmpdir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(tmpdir) + + ref := uint32(0) + + stones := make(map[uint32][]trange) + // Generate the tombstones. + for i := 0; i < 100; i++ { + ref += uint32(rand.Int31n(10)) + 1 + numRanges := rand.Intn(5) + dranges := make([]trange, numRanges) + mint := rand.Int63n(time.Now().UnixNano()) + for j := 0; j < numRanges; j++ { + dranges[j] = trange{mint, mint + rand.Int63n(1000)} + mint += rand.Int63n(1000) + 1 + } + stones[ref] = dranges + } + + require.NoError(t, writeTombstoneFile(tmpdir, newMapTombstoneReader(stones))) + + restr, err := readTombstoneFile(tmpdir) + require.NoError(t, err) + exptr := newMapTombstoneReader(stones) + // Compare the two readers. + for restr.Next() { + require.True(t, exptr.Next()) + + require.Equal(t, exptr.At(), restr.At()) + } + require.False(t, exptr.Next()) + require.NoError(t, restr.Err()) + require.NoError(t, exptr.Err()) +} diff --git a/chunks.go b/chunks.go index 5e89267e2b..ca030b76d9 100644 --- a/chunks.go +++ b/chunks.go @@ -47,46 +47,6 @@ type ChunkMeta struct { dranges []trange } -type trange struct { - mint, maxt int64 -} - -func (tr trange) inBounds(t int64) bool { - return t >= tr.mint && t <= tr.maxt -} - -// This adds the new time-range to the existing ones. -// The existing ones must be sorted. -func addNewInterval(existing []trange, n trange) []trange { - for i, r := range existing { - if r.inBounds(n.mint) { - if n.maxt > r.maxt { - existing[i].maxt = n.maxt - } - - return existing - } - if r.inBounds(n.maxt) { - if n.mint < r.maxt { - existing[i].mint = n.mint - } - - return existing - } - - if n.mint < r.mint { - newRange := existing[:i] - newRange = append(newRange, n) - newRange = append(newRange, existing[i:]...) - - return newRange - } - } - - existing = append(existing, n) - return existing -} - // writeHash writes the chunk encoding and raw data into the provided hash. func (cm *ChunkMeta) writeHash(h hash.Hash) error { if _, err := h.Write([]byte{byte(cm.Chunk.Encoding())}); err != nil { @@ -112,6 +72,47 @@ func (cm *ChunkMeta) Iterator() chunks.Iterator { return cm.Chunk.Iterator() } +type trange struct { + mint, maxt int64 +} + +func (tr trange) inBounds(t int64) bool { + return t >= tr.mint && t <= tr.maxt +} + +// This adds the new time-range to the existing ones. +// The existing ones must be sorted and should not be nil. +func addNewInterval(existing []trange, n trange) []trange { + for i, r := range existing { + if r.inBounds(n.mint) { + if n.maxt > r.maxt { + existing[i].maxt = n.maxt + } + + return existing + } + if r.inBounds(n.maxt) { + if n.mint < r.maxt { + existing[i].mint = n.mint + } + + return existing + } + + if n.mint < r.mint { + newRange := make([]trange, i, len(existing[:i])+1) + copy(newRange, existing[:i]) + newRange = append(newRange, n) + newRange = append(newRange, existing[i:]...) + + return newRange + } + } + + existing = append(existing, n) + return existing +} + // deletedIterator wraps an Iterator and makes sure any deleted metrics are not // returned. type deletedIterator struct { @@ -128,10 +129,12 @@ func (it *deletedIterator) Next() bool { Outer: for it.it.Next() { ts, _ := it.it.At() + for _, tr := range it.dranges { if tr.inBounds(ts) { continue Outer } + if ts > tr.maxt { it.dranges = it.dranges[1:] continue @@ -147,7 +150,7 @@ Outer: } func (it *deletedIterator) Err() error { - return it.Err() + return it.it.Err() } // ChunkWriter serializes a time block of chunked series data. diff --git a/chunks_test.go b/chunks_test.go index ae9d698768..2837259f89 100644 --- a/chunks_test.go +++ b/chunks_test.go @@ -14,8 +14,12 @@ package tsdb import ( + "math/rand" + "testing" + "github.com/pkg/errors" "github.com/prometheus/tsdb/chunks" + "github.com/stretchr/testify/require" ) type mockChunkReader map[uint64]chunks.Chunk @@ -32,3 +36,108 @@ func (cr mockChunkReader) Chunk(ref uint64) (chunks.Chunk, error) { func (cr mockChunkReader) Close() error { return nil } + +func TestAddingNewIntervals(t *testing.T) { + cases := []struct { + exist []trange + new trange + + exp []trange + }{ + { + new: trange{1, 2}, + exp: []trange{{1, 2}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{21, 23}, + exp: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{21, 25}, + exp: []trange{{1, 10}, {12, 20}, {21, 30}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{18, 23}, + exp: []trange{{1, 10}, {12, 23}, {25, 30}}, + }, + // TODO(gouthamve): (below) This is technically right, but fix it in the future. + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{9, 23}, + exp: []trange{{1, 23}, {12, 20}, {25, 30}}, + }, + { + exist: []trange{{5, 10}, {12, 20}, {25, 30}}, + new: trange{1, 4}, + exp: []trange{{1, 4}, {5, 10}, {12, 20}, {25, 30}}, + }, + } + + for _, c := range cases { + require.Equal(t, c.exp, addNewInterval(c.exist, c.new)) + } + return +} + +func TestDeletedIterator(t *testing.T) { + chk := chunks.NewXORChunk() + app, err := chk.Appender() + require.NoError(t, err) + // Insert random stuff from (0, 1000). + act := make([]sample, 1000) + for i := 0; i < 1000; i++ { + act[i].t = int64(i) + act[i].v = rand.Float64() + app.Append(act[i].t, act[i].v) + } + + cases := []struct { + r []trange + }{ + {r: []trange{{1, 20}}}, + {r: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, + {r: []trange{{1, 10}, {12, 20}, {20, 30}}}, + {r: []trange{{1, 10}, {12, 23}, {25, 30}}}, + {r: []trange{{1, 23}, {12, 20}, {25, 30}}}, + {r: []trange{{1, 23}, {12, 20}, {25, 3000}}}, + {r: []trange{{0, 2000}}}, + {r: []trange{{500, 2000}}}, + {r: []trange{{0, 200}}}, + {r: []trange{{1000, 20000}}}, + } + + for _, c := range cases { + i := int64(-1) + it := &deletedIterator{it: chk.Iterator(), dranges: c.r[:]} + ranges := c.r[:] + for it.Next() { + i++ + for _, tr := range ranges { + if tr.inBounds(i) { + i = tr.maxt + 1 + ranges = ranges[1:] + } + } + + require.True(t, i < 1000) + + ts, v := it.At() + require.Equal(t, act[i].t, ts) + require.Equal(t, act[i].v, v) + } + // There has been an extra call to Next(). + i++ + for _, tr := range ranges { + if tr.inBounds(i) { + i = tr.maxt + 1 + ranges = ranges[1:] + } + } + + require.False(t, i < 1000) + require.NoError(t, it.Err()) + } +} From cea3c88f170dd071d445501f89ed4fe69c54f7be Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Tue, 16 May 2017 19:48:28 +0530 Subject: [PATCH 04/25] Add Tombstones() method to Block. Also add Seek() to TombstoneReader Signed-off-by: Goutham Veeramachaneni --- block.go | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++- head.go | 25 ++++++++-------- querier.go | 5 ++-- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/block.go b/block.go index 94f57f2060..7843f232d7 100644 --- a/block.go +++ b/block.go @@ -41,6 +41,9 @@ type DiskBlock interface { // Chunks returns a ChunkReader over the block's data. Chunks() ChunkReader + // Tombstones returns a TombstoneReader over the block's deleted data. + Tombstones() TombstoneReader + // Delete deletes data from the block. Delete(mint, maxt int64, ms ...labels.Matcher) error @@ -241,6 +244,10 @@ type persistedBlock struct { chunkr *chunkReader indexr *indexReader + + // For tombstones. + stones []uint32 + tombstones map[uint32][]trange } func newPersistedBlock(dir string) (*persistedBlock, error) { @@ -258,11 +265,23 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { return nil, err } + tr, err := readTombstoneFile(dir) + if err != nil { + return nil, err + } + ts := make(map[uint32][]trange) + for tr.Next() { + s := tr.At() + ts[s.ref] = s.ranges + } + pb := &persistedBlock{ dir: dir, meta: *meta, chunkr: cr, indexr: ir, + + tombstones: ts, } return pb, nil } @@ -292,7 +311,10 @@ func (pb *persistedBlock) Querier(mint, maxt int64) Querier { func (pb *persistedBlock) Dir() string { return pb.dir } func (pb *persistedBlock) Index() IndexReader { return pb.indexr } func (pb *persistedBlock) Chunks() ChunkReader { return pb.chunkr } -func (pb *persistedBlock) Meta() BlockMeta { return pb.meta } +func (pb *persistedBlock) Tombstones() TombstoneReader { + return newMapTombstoneReader(pb.tombstones) +} +func (pb *persistedBlock) Meta() BlockMeta { return pb.meta } func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { pr := newPostingsReader(pb.indexr) @@ -348,6 +370,7 @@ type stone struct { // TombstoneReader is the iterator over tombstones. type TombstoneReader interface { Next() bool + Seek(ref uint32) bool At() stone Err() error } @@ -402,6 +425,16 @@ func (t *tombstoneReader) Next() bool { return t.idx < t.len } +func (t *tombstoneReader) Seek(ref uint32) bool { + bytIdx := t.idx * 12 + + t.idx += sort.Search(t.len-t.idx, func(i int) bool { + return binary.BigEndian.Uint32(t.b[bytIdx+i*12:]) >= ref + }) + + return t.idx < t.len +} + func (t *tombstoneReader) At() stone { bytIdx := t.idx * (4 + 8) dat := t.stones[bytIdx : bytIdx+12] @@ -443,6 +476,7 @@ type mapTombstoneReader struct { stones map[uint32][]trange } +// TODO(gouthamve): Take pre-sorted refs. func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { refs := make([]uint32, 0, len(ts)) for k := range ts { @@ -463,6 +497,25 @@ func (t *mapTombstoneReader) Next() bool { return false } +func (t *mapTombstoneReader) Seek(ref uint32) bool { + // If the current value satisfies, then return. + if t.cur >= ref { + return true + } + + // Do binary search between current position and end. + i := sort.Search(len(t.refs), func(i int) bool { + return t.refs[i] >= ref + }) + if i < len(t.refs) { + t.cur = t.refs[i] + t.refs = t.refs[i+1:] + return true + } + t.refs = nil + return false +} + func (t *mapTombstoneReader) At() stone { return stone{ref: t.cur, ranges: t.stones[t.cur]} } @@ -492,6 +545,25 @@ func (t *simpleTombstoneReader) Next() bool { return false } +func (t *simpleTombstoneReader) Seek(ref uint32) bool { + // If the current value satisfies, then return. + if t.cur >= ref { + return true + } + + // Do binary search between current position and end. + i := sort.Search(len(t.refs), func(i int) bool { + return t.refs[i] >= ref + }) + if i < len(t.refs) { + t.cur = t.refs[i] + t.refs = t.refs[i+1:] + return true + } + t.refs = nil + return false +} + func (t *simpleTombstoneReader) At() stone { return stone{ref: t.cur, ranges: t.ranges} } @@ -554,6 +626,17 @@ func (t *mergedTombstoneReader) Next() bool { return true } +func (t *mergedTombstoneReader) Seek(ref uint32) bool { + if t.cur.ref >= ref { + return true + } + + t.aok = t.a.Seek(ref) + t.bok = t.b.Seek(ref) + t.initialized = true + + return t.Next() +} func (t *mergedTombstoneReader) At() stone { return t.cur } diff --git a/head.go b/head.go index 4b59a9d749..559d1100c5 100644 --- a/head.go +++ b/head.go @@ -210,19 +210,24 @@ func (h *HeadBlock) Meta() BlockMeta { return m } -// Dir implements headBlock +// Dir implements headBlock. func (h *HeadBlock) Dir() string { return h.dir } -// Persisted implements headBlock +// Persisted implements headBlock. func (h *HeadBlock) Persisted() bool { return false } -// Index implements headBlock +// Index implements headBlock. func (h *HeadBlock) Index() IndexReader { return &headIndexReader{h} } -// Chunks implements headBlock +// Chunks implements headBlock. func (h *HeadBlock) Chunks() ChunkReader { return &headChunkReader{h} } -// Delete implements headBlock +// Tombstones implements headBlock. +func (h *HeadBlock) Tombstones() TombstoneReader { + return newMapTombstoneReader(h.tombstones) +} + +// Delete implements headBlock. func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error { h.mtx.RLock() @@ -246,13 +251,7 @@ Outer: } } - rs, ok := h.tombstones[ref] - if !ok { - h.tombstones[ref] = []trange{{mint, maxt}} - continue - } - - h.tombstones[ref] = addNewInterval(rs, trange{mint, maxt}) + h.tombstones[ref] = addNewInterval(h.tombstones[ref], trange{mint, maxt}) } if p.Err() != nil { @@ -262,7 +261,7 @@ Outer: return writeTombstoneFile(h.dir, newMapTombstoneReader(h.tombstones)) } -// Querier implements Queryable and headBlock +// Querier implements Queryable and headBlock. func (h *HeadBlock) Querier(mint, maxt int64) Querier { h.mtx.RLock() defer h.mtx.RUnlock() diff --git a/querier.go b/querier.go index 86dd76b99e..a7b67ea2f0 100644 --- a/querier.go +++ b/querier.go @@ -126,8 +126,9 @@ func (q *querier) Close() error { // blockQuerier provides querying access to a single block database. type blockQuerier struct { - index IndexReader - chunks ChunkReader + index IndexReader + chunks ChunkReader + tombstones TombstoneReader postingsMapper func(Postings) Postings From 34a86af3c6d627f5607e308380e3b2c5563d897f Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 17 May 2017 08:36:56 +0530 Subject: [PATCH 05/25] Move tombstones to their own thing. Signed-off-by: Goutham Veeramachaneni --- block.go | 373 +------------------------------------------- block_test.go | 46 ------ db_test.go | 2 +- head.go | 5 + index.go | 28 +--- tombstones.go | 379 +++++++++++++++++++++++++++++++++++++++++++++ tombstones_test.go | 47 ++++++ 7 files changed, 434 insertions(+), 446 deletions(-) create mode 100644 tombstones.go create mode 100644 tombstones_test.go diff --git a/block.go b/block.go index 7843f232d7..132da1c735 100644 --- a/block.go +++ b/block.go @@ -14,13 +14,11 @@ package tsdb import ( - "encoding/binary" "encoding/json" "fmt" "io/ioutil" "os" "path/filepath" - "sort" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -115,7 +113,6 @@ type blockMeta struct { } const metaFilename = "meta.json" -const tombstoneFilename = "tombstones" func readMetaFile(dir string) (*BlockMeta, error) { b, err := ioutil.ReadFile(filepath.Join(dir, metaFilename)) @@ -158,86 +155,6 @@ func writeMetaFile(dir string, meta *BlockMeta) error { return renameFile(tmp, path) } -func readTombstoneFile(dir string) (TombstoneReader, error) { - return newTombStoneReader(dir) -} - -func writeTombstoneFile(dir string, tr TombstoneReader) error { - path := filepath.Join(dir, tombstoneFilename) - tmp := path + ".tmp" - - f, err := os.Create(tmp) - if err != nil { - return err - } - - stoneOff := make(map[uint32]int64) // The map that holds the ref to offset vals. - refs := []uint32{} // Sorted refs. - - pos := int64(0) - buf := encbuf{b: make([]byte, 2*binary.MaxVarintLen64)} - for tr.Next() { - s := tr.At() - - refs = append(refs, s.ref) - stoneOff[s.ref] = pos - - // Write the ranges. - buf.reset() - buf.putVarint64(int64(len(s.ranges))) - n, err := f.Write(buf.get()) - if err != nil { - return err - } - pos += int64(n) - - for _, r := range s.ranges { - buf.reset() - buf.putVarint64(r.mint) - buf.putVarint64(r.maxt) - n, err = f.Write(buf.get()) - if err != nil { - return err - } - pos += int64(n) - } - } - if err := tr.Err(); err != nil { - return err - } - - // Write the offset table. - buf.reset() - buf.putBE32int(len(refs)) - if _, err := f.Write(buf.get()); err != nil { - return err - } - - for _, ref := range refs { - buf.reset() - buf.putBE32(ref) - buf.putBE64int64(stoneOff[ref]) - _, err = f.Write(buf.get()) - if err != nil { - return err - } - } - - // Write the offset to the offset table. - buf.reset() - buf.putBE64int64(pos) - _, err = f.Write(buf.get()) - if err != nil { - return err - } - - if err := f.Close(); err != nil { - return err - } - - return renameFile(tmp, path) -} - type persistedBlock struct { dir string meta BlockMeta @@ -353,301 +270,13 @@ Outer: } // Merge the current and new tombstones. - tr := newMapTombstoneReader(ir.tombstones) + tr := newMapTombstoneReader(pb.tombstones) str := newSimpleTombstoneReader(vPostings, []trange{{mint, maxt}}) tombreader := newMergedTombstoneReader(tr, str) return writeTombstoneFile(pb.dir, tombreader) } -// stone holds the information on the posting and time-range -// that is deleted. -type stone struct { - ref uint32 - ranges []trange -} - -// TombstoneReader is the iterator over tombstones. -type TombstoneReader interface { - Next() bool - Seek(ref uint32) bool - At() stone - Err() error -} - -var emptyTombstoneReader = newMapTombstoneReader(make(map[uint32][]trange)) - -type tombstoneReader struct { - stones []byte - idx int - len int - - b []byte - err error -} - -func newTombStoneReader(dir string) (*tombstoneReader, error) { - // TODO(gouthamve): MMAP? - b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename)) - if err != nil { - return nil, err - } - - offsetBytes := b[len(b)-8:] - d := &decbuf{b: offsetBytes} - off := d.be64int64() - if err := d.err(); err != nil { - return nil, err - } - - d = &decbuf{b: b[off:]} - numStones := d.be32int() - if err := d.err(); err != nil { - return nil, err - } - - return &tombstoneReader{ - stones: b[off+4:], - idx: -1, - len: int(numStones), - - b: b, - }, nil -} - -func (t *tombstoneReader) Next() bool { - if t.err != nil { - return false - } - - t.idx++ - - return t.idx < t.len -} - -func (t *tombstoneReader) Seek(ref uint32) bool { - bytIdx := t.idx * 12 - - t.idx += sort.Search(t.len-t.idx, func(i int) bool { - return binary.BigEndian.Uint32(t.b[bytIdx+i*12:]) >= ref - }) - - return t.idx < t.len -} - -func (t *tombstoneReader) At() stone { - bytIdx := t.idx * (4 + 8) - dat := t.stones[bytIdx : bytIdx+12] - - d := &decbuf{b: dat} - ref := d.be32() - off := d.be64int64() - - d = &decbuf{b: t.b[off:]} - numRanges := d.varint64() - if err := d.err(); err != nil { - t.err = err - return stone{ref: ref} - } - - dranges := make([]trange, 0, numRanges) - for i := 0; i < int(numRanges); i++ { - mint := d.varint64() - maxt := d.varint64() - if err := d.err(); err != nil { - t.err = err - return stone{ref: ref, ranges: dranges} - } - - dranges = append(dranges, trange{mint, maxt}) - } - - return stone{ref: ref, ranges: dranges} -} - -func (t *tombstoneReader) Err() error { - return t.err -} - -type mapTombstoneReader struct { - refs []uint32 - cur uint32 - - stones map[uint32][]trange -} - -// TODO(gouthamve): Take pre-sorted refs. -func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { - refs := make([]uint32, 0, len(ts)) - for k := range ts { - refs = append(refs, k) - } - sort.Sort(uint32slice(refs)) - return &mapTombstoneReader{stones: ts, refs: refs} -} - -func (t *mapTombstoneReader) Next() bool { - if len(t.refs) > 0 { - t.cur = t.refs[0] - t.refs = t.refs[1:] - return true - } - - t.cur = 0 - return false -} - -func (t *mapTombstoneReader) Seek(ref uint32) bool { - // If the current value satisfies, then return. - if t.cur >= ref { - return true - } - - // Do binary search between current position and end. - i := sort.Search(len(t.refs), func(i int) bool { - return t.refs[i] >= ref - }) - if i < len(t.refs) { - t.cur = t.refs[i] - t.refs = t.refs[i+1:] - return true - } - t.refs = nil - return false -} - -func (t *mapTombstoneReader) At() stone { - return stone{ref: t.cur, ranges: t.stones[t.cur]} -} - -func (t *mapTombstoneReader) Err() error { - return nil -} - -type simpleTombstoneReader struct { - refs []uint32 - cur uint32 - - ranges []trange -} - -func newSimpleTombstoneReader(refs []uint32, drange []trange) *simpleTombstoneReader { - return &simpleTombstoneReader{refs: refs, ranges: drange} -} - -func (t *simpleTombstoneReader) Next() bool { - if len(t.refs) > 0 { - t.cur = t.refs[0] - return true - } - - t.cur = 0 - return false -} - -func (t *simpleTombstoneReader) Seek(ref uint32) bool { - // If the current value satisfies, then return. - if t.cur >= ref { - return true - } - - // Do binary search between current position and end. - i := sort.Search(len(t.refs), func(i int) bool { - return t.refs[i] >= ref - }) - if i < len(t.refs) { - t.cur = t.refs[i] - t.refs = t.refs[i+1:] - return true - } - t.refs = nil - return false -} - -func (t *simpleTombstoneReader) At() stone { - return stone{ref: t.cur, ranges: t.ranges} -} - -func (t *simpleTombstoneReader) Err() error { - return nil -} - -type mergedTombstoneReader struct { - a, b TombstoneReader - cur stone - - initialized bool - aok, bok bool -} - -func newMergedTombstoneReader(a, b TombstoneReader) *mergedTombstoneReader { - return &mergedTombstoneReader{a: a, b: b} -} - -func (t *mergedTombstoneReader) Next() bool { - if !t.initialized { - t.aok = t.a.Next() - t.bok = t.b.Next() - t.initialized = true - } - - if !t.aok && !t.bok { - return false - } - - if !t.aok { - t.cur = t.b.At() - t.bok = t.b.Next() - return true - } - if !t.bok { - t.cur = t.a.At() - t.aok = t.a.Next() - return true - } - - acur, bcur := t.a.At(), t.b.At() - - if acur.ref < bcur.ref { - t.cur = acur - t.aok = t.a.Next() - } else if acur.ref > bcur.ref { - t.cur = bcur - t.bok = t.b.Next() - } else { - t.cur = acur - // Merge time ranges. - for _, r := range bcur.ranges { - acur.ranges = addNewInterval(acur.ranges, r) - } - t.aok = t.a.Next() - t.bok = t.b.Next() - } - return true -} - -func (t *mergedTombstoneReader) Seek(ref uint32) bool { - if t.cur.ref >= ref { - return true - } - - t.aok = t.a.Seek(ref) - t.bok = t.b.Seek(ref) - t.initialized = true - - return t.Next() -} -func (t *mergedTombstoneReader) At() stone { - return t.cur -} - -func (t *mergedTombstoneReader) Err() error { - if t.a.Err() != nil { - return t.a.Err() - } - return t.b.Err() -} - func chunkDir(dir string) string { return filepath.Join(dir, "chunks") } func walDir(dir string) string { return filepath.Join(dir, "wal") } diff --git a/block_test.go b/block_test.go index 491c1c1387..35178ff493 100644 --- a/block_test.go +++ b/block_test.go @@ -1,47 +1 @@ package tsdb - -import ( - "io/ioutil" - "math/rand" - "os" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func TestWriteAndReadbackTombStones(t *testing.T) { - tmpdir, _ := ioutil.TempDir("", "test") - defer os.RemoveAll(tmpdir) - - ref := uint32(0) - - stones := make(map[uint32][]trange) - // Generate the tombstones. - for i := 0; i < 100; i++ { - ref += uint32(rand.Int31n(10)) + 1 - numRanges := rand.Intn(5) - dranges := make([]trange, numRanges) - mint := rand.Int63n(time.Now().UnixNano()) - for j := 0; j < numRanges; j++ { - dranges[j] = trange{mint, mint + rand.Int63n(1000)} - mint += rand.Int63n(1000) + 1 - } - stones[ref] = dranges - } - - require.NoError(t, writeTombstoneFile(tmpdir, newMapTombstoneReader(stones))) - - restr, err := readTombstoneFile(tmpdir) - require.NoError(t, err) - exptr := newMapTombstoneReader(stones) - // Compare the two readers. - for restr.Next() { - require.True(t, exptr.Next()) - - require.Equal(t, exptr.At(), restr.At()) - } - require.False(t, exptr.Next()) - require.NoError(t, restr.Err()) - require.NoError(t, exptr.Err()) -} diff --git a/db_test.go b/db_test.go index f3b2dca2a1..ce714bbdbf 100644 --- a/db_test.go +++ b/db_test.go @@ -100,7 +100,7 @@ func TestDataNotAvailableAfterRollback(t *testing.T) { } func TestDBAppenderAddRef(t *testing.T) { - tmpdir, _ := ioutil.TempDir("", "test") + tmpdir, _ := ioutil.TempDir("", "test1") defer os.RemoveAll(tmpdir) db, err := Open(tmpdir, nil, nil, nil) diff --git a/head.go b/head.go index 559d1100c5..ab9614c4ba 100644 --- a/head.go +++ b/head.go @@ -97,6 +97,11 @@ func TouchHeadBlock(dir string, seq int, mint, maxt int64) error { return err } + // Write an empty tombstones file. + if err := writeTombstoneFile(tmp, emptyTombstoneReader); err != nil { + return err + } + return renameFile(tmp, dir) } diff --git a/index.go b/index.go index 3536468185..b36eb49bd9 100644 --- a/index.go +++ b/index.go @@ -165,10 +165,6 @@ func newIndexWriter(dir string) (*indexWriter, error) { if err := iw.writeMeta(); err != nil { return nil, err } - // TODO(gouthamve): Figure out where this function goes, index or block. - if err := writeTombstoneFile(dir, emptyTombstoneReader); err != nil { - return nil, err - } return iw, nil } @@ -541,8 +537,6 @@ type indexReader struct { // Cached hashmaps of section offsets. labels map[string]uint32 postings map[string]uint32 - - tombstones map[uint32][]trange } var ( @@ -575,21 +569,7 @@ func newIndexReader(dir string) (*indexReader, error) { return nil, errors.Wrap(err, "read label index table") } r.postings, err = r.readOffsetTable(r.toc.postingsTable) - if err != nil { - return nil, errors.Wrap(err, "read postings table") - } - - tr, err := readTombstoneFile(dir) - if err != nil { - return r, err - } - r.tombstones = make(map[uint32][]trange) - for tr.Next() { - s := tr.At() - r.tombstones[s.ref] = s.ranges - } - - return r, tr.Err() + return r, errors.Wrap(err, "read postings table") } func (r *indexReader) readTOC() error { @@ -757,8 +737,6 @@ func (r *indexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error) { lbls = append(lbls, labels.Label{Name: ln, Value: lv}) } - s, deleted := r.tombstones[ref] - // Read the chunks meta data. k = int(d2.uvarint()) chunks := make([]*ChunkMeta, 0, k) @@ -772,14 +750,10 @@ func (r *indexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error) { return nil, nil, errors.Wrapf(d2.err(), "read meta for chunk %d", i) } - // TODO(gouthamve): Donot add the chunk if its completely deleted. chunks = append(chunks, &ChunkMeta{ Ref: off, MinTime: mint, MaxTime: maxt, - - deleted: deleted, - dranges: s, }) } diff --git a/tombstones.go b/tombstones.go new file mode 100644 index 0000000000..6136c306c1 --- /dev/null +++ b/tombstones.go @@ -0,0 +1,379 @@ +package tsdb + +import ( + "encoding/binary" + "io/ioutil" + "os" + "path/filepath" + "sort" +) + +const tombstoneFilename = "tombstones" + +func readTombstoneFile(dir string) (TombstoneReader, error) { + return newTombStoneReader(dir) +} + +func writeTombstoneFile(dir string, tr TombstoneReader) error { + path := filepath.Join(dir, tombstoneFilename) + tmp := path + ".tmp" + + f, err := os.Create(tmp) + if err != nil { + return err + } + + stoneOff := make(map[uint32]int64) // The map that holds the ref to offset vals. + refs := []uint32{} // Sorted refs. + + pos := int64(0) + buf := encbuf{b: make([]byte, 2*binary.MaxVarintLen64)} + for tr.Next() { + s := tr.At() + + refs = append(refs, s.ref) + stoneOff[s.ref] = pos + + // Write the ranges. + buf.reset() + buf.putVarint64(int64(len(s.ranges))) + n, err := f.Write(buf.get()) + if err != nil { + return err + } + pos += int64(n) + + for _, r := range s.ranges { + buf.reset() + buf.putVarint64(r.mint) + buf.putVarint64(r.maxt) + n, err = f.Write(buf.get()) + if err != nil { + return err + } + pos += int64(n) + } + } + if err := tr.Err(); err != nil { + return err + } + + // Write the offset table. + buf.reset() + buf.putBE32int(len(refs)) + if _, err := f.Write(buf.get()); err != nil { + return err + } + + for _, ref := range refs { + buf.reset() + buf.putBE32(ref) + buf.putBE64int64(stoneOff[ref]) + _, err = f.Write(buf.get()) + if err != nil { + return err + } + } + + // Write the offset to the offset table. + buf.reset() + buf.putBE64int64(pos) + _, err = f.Write(buf.get()) + if err != nil { + return err + } + + if err := f.Close(); err != nil { + return err + } + + return renameFile(tmp, path) +} + +// stone holds the information on the posting and time-range +// that is deleted. +type stone struct { + ref uint32 + ranges []trange +} + +// TombstoneReader is the iterator over tombstones. +type TombstoneReader interface { + Next() bool + Seek(ref uint32) bool + At() stone + Err() error +} + +var emptyTombstoneReader = newMapTombstoneReader(make(map[uint32][]trange)) + +type tombstoneReader struct { + stones []byte + idx int + len int + + b []byte + err error +} + +func newTombStoneReader(dir string) (*tombstoneReader, error) { + // TODO(gouthamve): MMAP? + b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename)) + if err != nil { + return nil, err + } + + offsetBytes := b[len(b)-8:] + d := &decbuf{b: offsetBytes} + off := d.be64int64() + if err := d.err(); err != nil { + return nil, err + } + + d = &decbuf{b: b[off:]} + numStones := d.be32int() + if err := d.err(); err != nil { + return nil, err + } + + return &tombstoneReader{ + stones: b[off+4:], + idx: -1, + len: int(numStones), + + b: b, + }, nil +} + +func (t *tombstoneReader) Next() bool { + if t.err != nil { + return false + } + + t.idx++ + + return t.idx < t.len +} + +func (t *tombstoneReader) Seek(ref uint32) bool { + bytIdx := t.idx * 12 + + t.idx += sort.Search(t.len-t.idx, func(i int) bool { + return binary.BigEndian.Uint32(t.b[bytIdx+i*12:]) >= ref + }) + + return t.idx < t.len +} + +func (t *tombstoneReader) At() stone { + bytIdx := t.idx * (4 + 8) + dat := t.stones[bytIdx : bytIdx+12] + + d := &decbuf{b: dat} + ref := d.be32() + off := d.be64int64() + + d = &decbuf{b: t.b[off:]} + numRanges := d.varint64() + if err := d.err(); err != nil { + t.err = err + return stone{ref: ref} + } + + dranges := make([]trange, 0, numRanges) + for i := 0; i < int(numRanges); i++ { + mint := d.varint64() + maxt := d.varint64() + if err := d.err(); err != nil { + t.err = err + return stone{ref: ref, ranges: dranges} + } + + dranges = append(dranges, trange{mint, maxt}) + } + + return stone{ref: ref, ranges: dranges} +} + +func (t *tombstoneReader) Err() error { + return t.err +} + +type mapTombstoneReader struct { + refs []uint32 + cur uint32 + + stones map[uint32][]trange +} + +// TODO(gouthamve): Take pre-sorted refs. +func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { + refs := make([]uint32, 0, len(ts)) + for k := range ts { + refs = append(refs, k) + } + sort.Sort(uint32slice(refs)) + return &mapTombstoneReader{stones: ts, refs: refs} +} + +func (t *mapTombstoneReader) Next() bool { + if len(t.refs) > 0 { + t.cur = t.refs[0] + t.refs = t.refs[1:] + return true + } + + t.cur = 0 + return false +} + +func (t *mapTombstoneReader) Seek(ref uint32) bool { + // If the current value satisfies, then return. + if t.cur >= ref { + return true + } + + // Do binary search between current position and end. + i := sort.Search(len(t.refs), func(i int) bool { + return t.refs[i] >= ref + }) + if i < len(t.refs) { + t.cur = t.refs[i] + t.refs = t.refs[i+1:] + return true + } + t.refs = nil + return false +} + +func (t *mapTombstoneReader) At() stone { + return stone{ref: t.cur, ranges: t.stones[t.cur]} +} + +func (t *mapTombstoneReader) Err() error { + return nil +} + +type simpleTombstoneReader struct { + refs []uint32 + cur uint32 + + ranges []trange +} + +func newSimpleTombstoneReader(refs []uint32, drange []trange) *simpleTombstoneReader { + return &simpleTombstoneReader{refs: refs, ranges: drange} +} + +func (t *simpleTombstoneReader) Next() bool { + if len(t.refs) > 0 { + t.cur = t.refs[0] + return true + } + + t.cur = 0 + return false +} + +func (t *simpleTombstoneReader) Seek(ref uint32) bool { + // If the current value satisfies, then return. + if t.cur >= ref { + return true + } + + // Do binary search between current position and end. + i := sort.Search(len(t.refs), func(i int) bool { + return t.refs[i] >= ref + }) + if i < len(t.refs) { + t.cur = t.refs[i] + t.refs = t.refs[i+1:] + return true + } + t.refs = nil + return false +} + +func (t *simpleTombstoneReader) At() stone { + return stone{ref: t.cur, ranges: t.ranges} +} + +func (t *simpleTombstoneReader) Err() error { + return nil +} + +type mergedTombstoneReader struct { + a, b TombstoneReader + cur stone + + initialized bool + aok, bok bool +} + +func newMergedTombstoneReader(a, b TombstoneReader) *mergedTombstoneReader { + return &mergedTombstoneReader{a: a, b: b} +} + +func (t *mergedTombstoneReader) Next() bool { + if !t.initialized { + t.aok = t.a.Next() + t.bok = t.b.Next() + t.initialized = true + } + + if !t.aok && !t.bok { + return false + } + + if !t.aok { + t.cur = t.b.At() + t.bok = t.b.Next() + return true + } + if !t.bok { + t.cur = t.a.At() + t.aok = t.a.Next() + return true + } + + acur, bcur := t.a.At(), t.b.At() + + if acur.ref < bcur.ref { + t.cur = acur + t.aok = t.a.Next() + } else if acur.ref > bcur.ref { + t.cur = bcur + t.bok = t.b.Next() + } else { + t.cur = acur + // Merge time ranges. + for _, r := range bcur.ranges { + acur.ranges = addNewInterval(acur.ranges, r) + } + t.aok = t.a.Next() + t.bok = t.b.Next() + } + return true +} + +func (t *mergedTombstoneReader) Seek(ref uint32) bool { + if t.cur.ref >= ref { + return true + } + + t.aok = t.a.Seek(ref) + t.bok = t.b.Seek(ref) + t.initialized = true + + return t.Next() +} +func (t *mergedTombstoneReader) At() stone { + return t.cur +} + +func (t *mergedTombstoneReader) Err() error { + if t.a.Err() != nil { + return t.a.Err() + } + return t.b.Err() +} diff --git a/tombstones_test.go b/tombstones_test.go new file mode 100644 index 0000000000..491c1c1387 --- /dev/null +++ b/tombstones_test.go @@ -0,0 +1,47 @@ +package tsdb + +import ( + "io/ioutil" + "math/rand" + "os" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestWriteAndReadbackTombStones(t *testing.T) { + tmpdir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(tmpdir) + + ref := uint32(0) + + stones := make(map[uint32][]trange) + // Generate the tombstones. + for i := 0; i < 100; i++ { + ref += uint32(rand.Int31n(10)) + 1 + numRanges := rand.Intn(5) + dranges := make([]trange, numRanges) + mint := rand.Int63n(time.Now().UnixNano()) + for j := 0; j < numRanges; j++ { + dranges[j] = trange{mint, mint + rand.Int63n(1000)} + mint += rand.Int63n(1000) + 1 + } + stones[ref] = dranges + } + + require.NoError(t, writeTombstoneFile(tmpdir, newMapTombstoneReader(stones))) + + restr, err := readTombstoneFile(tmpdir) + require.NoError(t, err) + exptr := newMapTombstoneReader(stones) + // Compare the two readers. + for restr.Next() { + require.True(t, exptr.Next()) + + require.Equal(t, exptr.At(), restr.At()) + } + require.False(t, exptr.Next()) + require.NoError(t, restr.Err()) + require.NoError(t, exptr.Err()) +} From 22c1b5b4926d411e57cde8e4ad7298c9771cadce Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 17 May 2017 14:49:42 +0530 Subject: [PATCH 06/25] Make SeriesSets use tombstones. Signed-off-by: Goutham Veeramachaneni --- chunks.go | 80 -------------------------------- chunks_test.go | 45 ------------------ compact.go | 111 +++++++++++++++++++++++++++++++++------------ db_test.go | 2 +- head.go | 15 +++--- querier.go | 90 +++++++++++++++++++++++++++--------- querier_test.go | 20 ++++---- tombstones.go | 102 +++++++++++++++++++++++++++++++++++++++++ tombstones_test.go | 49 ++++++++++++++++++++ 9 files changed, 319 insertions(+), 195 deletions(-) diff --git a/chunks.go b/chunks.go index ca030b76d9..0cc64cb469 100644 --- a/chunks.go +++ b/chunks.go @@ -41,10 +41,6 @@ type ChunkMeta struct { Chunk chunks.Chunk MinTime, MaxTime int64 // time range the data covers - - // To handle deleted time-ranges. - deleted bool - dranges []trange } // writeHash writes the chunk encoding and raw data into the provided hash. @@ -58,61 +54,6 @@ func (cm *ChunkMeta) writeHash(h hash.Hash) error { return nil } -// Iterator returns a chunks.Iterator that honors any deleted ranges. -// If there is no deleted range then the underlying iterator is returned. -func (cm *ChunkMeta) Iterator() chunks.Iterator { - if cm.Chunk == nil { - return nil - } - - if cm.deleted { - return &deletedIterator{it: cm.Chunk.Iterator(), dranges: cm.dranges} - } - - return cm.Chunk.Iterator() -} - -type trange struct { - mint, maxt int64 -} - -func (tr trange) inBounds(t int64) bool { - return t >= tr.mint && t <= tr.maxt -} - -// This adds the new time-range to the existing ones. -// The existing ones must be sorted and should not be nil. -func addNewInterval(existing []trange, n trange) []trange { - for i, r := range existing { - if r.inBounds(n.mint) { - if n.maxt > r.maxt { - existing[i].maxt = n.maxt - } - - return existing - } - if r.inBounds(n.maxt) { - if n.mint < r.maxt { - existing[i].mint = n.mint - } - - return existing - } - - if n.mint < r.mint { - newRange := make([]trange, i, len(existing[:i])+1) - copy(newRange, existing[:i]) - newRange = append(newRange, n) - newRange = append(newRange, existing[i:]...) - - return newRange - } - } - - existing = append(existing, n) - return existing -} - // deletedIterator wraps an Iterator and makes sure any deleted metrics are not // returned. type deletedIterator struct { @@ -287,27 +228,6 @@ func (w *chunkWriter) WriteChunks(chks ...*ChunkMeta) error { maxLen := int64(binary.MaxVarintLen32) // The number of chunks. for _, c := range chks { maxLen += binary.MaxVarintLen32 + 1 // The number of bytes in the chunk and its encoding. - - // Remove the deleted parts. - if c.deleted { - // TODO(gouthamve): Try to do it in-place somehow? - chk := chunks.NewXORChunk() - app, err := chk.Appender() - if err != nil { - return err - } - it := c.Iterator() - for it.Next() { - ts, v := it.At() - app.Append(ts, v) - } - - if err := it.Err(); err != nil { - return err - } - c.Chunk = chk - } - maxLen += int64(len(c.Chunk.Bytes())) } newsz := w.n + maxLen diff --git a/chunks_test.go b/chunks_test.go index 2837259f89..4633dc7f06 100644 --- a/chunks_test.go +++ b/chunks_test.go @@ -37,51 +37,6 @@ func (cr mockChunkReader) Close() error { return nil } -func TestAddingNewIntervals(t *testing.T) { - cases := []struct { - exist []trange - new trange - - exp []trange - }{ - { - new: trange{1, 2}, - exp: []trange{{1, 2}}, - }, - { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{21, 23}, - exp: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}, - }, - { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{21, 25}, - exp: []trange{{1, 10}, {12, 20}, {21, 30}}, - }, - { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{18, 23}, - exp: []trange{{1, 10}, {12, 23}, {25, 30}}, - }, - // TODO(gouthamve): (below) This is technically right, but fix it in the future. - { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{9, 23}, - exp: []trange{{1, 23}, {12, 20}, {25, 30}}, - }, - { - exist: []trange{{5, 10}, {12, 20}, {25, 30}}, - new: trange{1, 4}, - exp: []trange{{1, 4}, {5, 10}, {12, 20}, {25, 30}}, - }, - } - - for _, c := range cases { - require.Equal(t, c.exp, addNewInterval(c.exist, c.new)) - } - return -} - func TestDeletedIterator(t *testing.T) { chk := chunks.NewXORChunk() app, err := chk.Appender() diff --git a/compact.go b/compact.go index 39e780b03e..d63f5cec7e 100644 --- a/compact.go +++ b/compact.go @@ -25,6 +25,7 @@ import ( "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/labels" ) @@ -280,7 +281,7 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri if err != nil { return nil, err } - s := newCompactionSeriesSet(b.Index(), b.Chunks(), all) + s := newCompactionSeriesSet(b.Index(), b.Chunks(), b.Tombstones(), all) if i == 0 { set = s @@ -301,14 +302,37 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri ) for set.Next() { - lset, chunks := set.At() - if err := chunkw.WriteChunks(chunks...); err != nil { + lset, chks, ranges := set.At() // The chunks here are not fully deleted. + + if len(ranges) > 0 { + // Re-encode the chunk to not have deleted values. + for _, chk := range chks { + // Checks Overlap: http://stackoverflow.com/questions/3269434/ + if ranges[0].mint <= chk.MaxTime && chk.MinTime <= ranges[len(ranges)-1].maxt { + newChunk := chunks.NewXORChunk() + app, err := newChunk.Appender() + if err != nil { + return nil, err + } + + it := &deletedIterator{it: chk.Chunk.Iterator(), dranges: ranges} + for it.Next() { + ts, v := it.At() + app.Append(ts, v) + } + + chk.Chunk = newChunk + } + } + } + + if err := chunkw.WriteChunks(chks...); err != nil { return nil, err } - indexw.AddSeries(i, lset, chunks...) + indexw.AddSeries(i, lset, chks...) - meta.Stats.NumChunks += uint64(len(chunks)) + meta.Stats.NumChunks += uint64(len(chks)) meta.Stats.NumSeries++ for _, l := range lset { @@ -358,25 +382,28 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri type compactionSet interface { Next() bool - At() (labels.Labels, []*ChunkMeta) + At() (labels.Labels, []*ChunkMeta, []trange) Err() error } type compactionSeriesSet struct { - p Postings - index IndexReader - chunks ChunkReader + p Postings + index IndexReader + chunks ChunkReader + tombstones TombstoneReader - l labels.Labels - c []*ChunkMeta - err error + l labels.Labels + c []*ChunkMeta + dranges []trange + err error } -func newCompactionSeriesSet(i IndexReader, c ChunkReader, p Postings) *compactionSeriesSet { +func newCompactionSeriesSet(i IndexReader, c ChunkReader, t TombstoneReader, p Postings) *compactionSeriesSet { return &compactionSeriesSet{ - index: i, - chunks: c, - p: p, + index: i, + chunks: c, + tombstones: t, + p: p, } } @@ -385,15 +412,34 @@ func (c *compactionSeriesSet) Next() bool { return false } + if c.tombstones.Seek(c.p.At()) { + s := c.tombstones.At() + if c.p.At() == s.ref { + c.dranges = s.ranges + } else { + c.dranges = nil + } + } c.l, c.c, c.err = c.index.Series(c.p.At()) if c.err != nil { return false } - for _, chk := range c.c { - chk.Chunk, c.err = c.chunks.Chunk(chk.Ref) - if c.err != nil { - return false + + // Remove completely deleted chunks and re-encode partial ones. + if len(c.dranges) > 0 { + chks := make([]*ChunkMeta, 0, len(c.c)) + for _, chk := range c.c { + if !(trange{chk.MinTime, chk.MaxTime}.isSubrange(c.dranges)) { + chk.Chunk, c.err = c.chunks.Chunk(chk.Ref) + if c.err != nil { + return false + } + + chks = append(chks, chk) + } } + + c.c = chks } return true @@ -406,8 +452,8 @@ func (c *compactionSeriesSet) Err() error { return c.p.Err() } -func (c *compactionSeriesSet) At() (labels.Labels, []*ChunkMeta) { - return c.l, c.c +func (c *compactionSeriesSet) At() (labels.Labels, []*ChunkMeta, []trange) { + return c.l, c.c, c.dranges } type compactionMerger struct { @@ -416,6 +462,7 @@ type compactionMerger struct { aok, bok bool l labels.Labels c []*ChunkMeta + dranges []trange } type compactionSeries struct { @@ -443,8 +490,8 @@ func (c *compactionMerger) compare() int { if !c.bok { return -1 } - a, _ := c.a.At() - b, _ := c.b.At() + a, _, _ := c.a.At() + b, _, _ := c.b.At() return labels.Compare(a, b) } @@ -456,17 +503,21 @@ func (c *compactionMerger) Next() bool { d := c.compare() // Both sets contain the current series. Chain them into a single one. if d > 0 { - c.l, c.c = c.b.At() + c.l, c.c, c.dranges = c.b.At() c.bok = c.b.Next() } else if d < 0 { - c.l, c.c = c.a.At() + c.l, c.c, c.dranges = c.a.At() c.aok = c.a.Next() } else { - l, ca := c.a.At() - _, cb := c.b.At() + l, ca, ra := c.a.At() + _, cb, rb := c.b.At() + for _, r := range rb { + ra = addNewInterval(ra, r) + } c.l = l c.c = append(ca, cb...) + c.dranges = ra c.aok = c.a.Next() c.bok = c.b.Next() @@ -481,8 +532,8 @@ func (c *compactionMerger) Err() error { return c.b.Err() } -func (c *compactionMerger) At() (labels.Labels, []*ChunkMeta) { - return c.l, c.c +func (c *compactionMerger) At() (labels.Labels, []*ChunkMeta, []trange) { + return c.l, c.c, c.dranges } func renameFile(from, to string) error { diff --git a/db_test.go b/db_test.go index ce714bbdbf..f3b2dca2a1 100644 --- a/db_test.go +++ b/db_test.go @@ -100,7 +100,7 @@ func TestDataNotAvailableAfterRollback(t *testing.T) { } func TestDBAppenderAddRef(t *testing.T) { - tmpdir, _ := ioutil.TempDir("", "test1") + tmpdir, _ := ioutil.TempDir("", "test") defer os.RemoveAll(tmpdir) db, err := Open(tmpdir, nil, nil, nil) diff --git a/head.go b/head.go index ab9614c4ba..5adda74c02 100644 --- a/head.go +++ b/head.go @@ -279,10 +279,12 @@ func (h *HeadBlock) Querier(mint, maxt int64) Querier { series := h.series[:] return &blockQuerier{ - mint: mint, - maxt: maxt, - index: h.Index(), - chunks: h.Chunks(), + mint: mint, + maxt: maxt, + index: h.Index(), + chunks: h.Chunks(), + tombstones: h.Tombstones(), + postingsMapper: func(p Postings) Postings { ep := make([]uint32, 0, 64) @@ -586,8 +588,6 @@ func (h *headIndexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error return nil, nil, ErrNotFound } - dranges, deleted := h.tombstones[ref] - s := h.series[ref] metas := make([]*ChunkMeta, 0, len(s.chunks)) @@ -599,9 +599,6 @@ func (h *headIndexReader) Series(ref uint32) (labels.Labels, []*ChunkMeta, error MinTime: c.minTime, MaxTime: c.maxTime, Ref: (uint64(ref) << 32) | uint64(i), - - deleted: deleted, - dranges: dranges, }) } diff --git a/querier.go b/querier.go index a7b67ea2f0..a2f0159fbd 100644 --- a/querier.go +++ b/querier.go @@ -150,6 +150,8 @@ func (q *blockQuerier) Select(ms ...labels.Matcher) SeriesSet { p: p, index: q.index, absent: absent, + + tombstones: q.tombstones.Copy(), }, chunks: q.chunks, mint: q.mint, @@ -367,29 +369,35 @@ func (s *mergedSeriesSet) Next() bool { type chunkSeriesSet interface { Next() bool - At() (labels.Labels, []*ChunkMeta) + At() (labels.Labels, []*ChunkMeta, stone) Err() error } // baseChunkSeries loads the label set and chunk references for a postings // list from an index. It filters out series that have labels set that should be unset. type baseChunkSeries struct { - p Postings - index IndexReader - absent []string // labels that must be unset in results. + p Postings + index IndexReader + tombstones TombstoneReader + absent []string // labels that must be unset in results. - lset labels.Labels - chks []*ChunkMeta - err error + lset labels.Labels + chks []*ChunkMeta + stone stone + err error } -func (s *baseChunkSeries) At() (labels.Labels, []*ChunkMeta) { return s.lset, s.chks } -func (s *baseChunkSeries) Err() error { return s.err } +func (s *baseChunkSeries) At() (labels.Labels, []*ChunkMeta, stone) { + return s.lset, s.chks, s.stone +} + +func (s *baseChunkSeries) Err() error { return s.err } func (s *baseChunkSeries) Next() bool { Outer: for s.p.Next() { - lset, chunks, err := s.index.Series(s.p.At()) + ref := s.p.At() + lset, chunks, err := s.index.Series(ref) if err != nil { s.err = err return false @@ -404,6 +412,19 @@ Outer: s.lset = lset s.chks = chunks + if s.tombstones.Seek(ref) && s.tombstones.At().ref == ref { + s.stone = s.tombstones.At() + + // Only those chunks that are not entirely deleted. + chks := make([]*ChunkMeta, 0, len(s.chks)) + for _, chk := range s.chks { + if !(trange{chk.MinTime, chk.MaxTime}.isSubrange(s.stone.ranges)) { + chks = append(chks, chk) + } + } + + s.chks = chks + } return true } @@ -421,17 +442,20 @@ type populatedChunkSeries struct { chunks ChunkReader mint, maxt int64 - err error - chks []*ChunkMeta - lset labels.Labels + err error + chks []*ChunkMeta + lset labels.Labels + stone stone } -func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta) { return s.lset, s.chks } -func (s *populatedChunkSeries) Err() error { return s.err } +func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta, stone) { + return s.lset, s.chks, stone{} +} +func (s *populatedChunkSeries) Err() error { return s.err } func (s *populatedChunkSeries) Next() bool { for s.set.Next() { - lset, chks := s.set.At() + lset, chks, stn := s.set.At() for len(chks) > 0 { if chks[0].MaxTime >= s.mint { @@ -458,6 +482,7 @@ func (s *populatedChunkSeries) Next() bool { s.lset = lset s.chks = chks + s.stone = stn return true } @@ -478,8 +503,15 @@ type blockSeriesSet struct { func (s *blockSeriesSet) Next() bool { for s.set.Next() { - lset, chunks := s.set.At() - s.cur = &chunkSeries{labels: lset, chunks: chunks, mint: s.mint, maxt: s.maxt} + lset, chunks, stn := s.set.At() + s.cur = &chunkSeries{ + labels: lset, + chunks: chunks, + mint: s.mint, + maxt: s.maxt, + + stone: stn, + } return true } if s.set.Err() != nil { @@ -498,6 +530,8 @@ type chunkSeries struct { chunks []*ChunkMeta // in-order chunk refs mint, maxt int64 + + stone stone } func (s *chunkSeries) Labels() labels.Labels { @@ -505,7 +539,7 @@ func (s *chunkSeries) Labels() labels.Labels { } func (s *chunkSeries) Iterator() SeriesIterator { - return newChunkSeriesIterator(s.chunks, s.mint, s.maxt) + return newChunkSeriesIterator(s.chunks, s.stone, s.mint, s.maxt) } // SeriesIterator iterates over the data of a time series. @@ -602,16 +636,24 @@ type chunkSeriesIterator struct { cur chunks.Iterator maxt, mint int64 + + stone stone } -func newChunkSeriesIterator(cs []*ChunkMeta, mint, maxt int64) *chunkSeriesIterator { +func newChunkSeriesIterator(cs []*ChunkMeta, s stone, mint, maxt int64) *chunkSeriesIterator { + it := cs[0].Chunk.Iterator() + if len(s.ranges) > 0 { + it = &deletedIterator{it: it, dranges: s.ranges} + } return &chunkSeriesIterator{ chunks: cs, i: 0, - cur: cs[0].Chunk.Iterator(), + cur: it, mint: mint, maxt: maxt, + + stone: s, } } @@ -646,6 +688,9 @@ func (it *chunkSeriesIterator) Seek(t int64) (ok bool) { it.i = x it.cur = it.chunks[x].Chunk.Iterator() + if len(it.stone.ranges) > 0 { + it.cur = &deletedIterator{it: it.cur, dranges: it.stone.ranges} + } for it.cur.Next() { t0, _ := it.cur.At() @@ -677,6 +722,9 @@ func (it *chunkSeriesIterator) Next() bool { it.i++ it.cur = it.chunks[it.i].Chunk.Iterator() + if len(it.stone.ranges) > 0 { + it.cur = &deletedIterator{it: it.cur, dranges: it.stone.ranges} + } return it.Next() } diff --git a/querier_test.go b/querier_test.go index 864e76cbba..9502b5b4f9 100644 --- a/querier_test.go +++ b/querier_test.go @@ -378,8 +378,9 @@ Outer: for _, c := range cases.queries { ir, cr := createIdxChkReaders(cases.data) querier := &blockQuerier{ - index: ir, - chunks: cr, + index: ir, + chunks: cr, + tombstones: emptyTombstoneReader, mint: c.mint, maxt: c.maxt, @@ -487,13 +488,14 @@ func TestBaseChunkSeries(t *testing.T) { } bcs := &baseChunkSeries{ - p: newListPostings(tc.postings), - index: mi, + p: newListPostings(tc.postings), + index: mi, + tombstones: emptyTombstoneReader, } i := 0 for bcs.Next() { - lset, chks := bcs.At() + lset, chks, _ := bcs.At() idx := tc.expIdxs[i] @@ -701,7 +703,7 @@ func TestSeriesIterator(t *testing.T) { chunkFromSamples(tc.b), chunkFromSamples(tc.c), } - res := newChunkSeriesIterator(chkMetas, tc.mint, tc.maxt) + res := newChunkSeriesIterator(chkMetas, stone{}, tc.mint, tc.maxt) smplValid := make([]sample, 0) for _, s := range tc.exp { @@ -772,7 +774,7 @@ func TestSeriesIterator(t *testing.T) { chunkFromSamples(tc.b), chunkFromSamples(tc.c), } - res := newChunkSeriesIterator(chkMetas, tc.mint, tc.maxt) + res := newChunkSeriesIterator(chkMetas, stone{}, tc.mint, tc.maxt) smplValid := make([]sample, 0) for _, s := range tc.exp { @@ -919,8 +921,8 @@ func (m *mockChunkSeriesSet) Next() bool { return m.i < len(m.l) } -func (m *mockChunkSeriesSet) At() (labels.Labels, []*ChunkMeta) { - return m.l[m.i], m.cm[m.i] +func (m *mockChunkSeriesSet) At() (labels.Labels, []*ChunkMeta, stone) { + return m.l[m.i], m.cm[m.i], stone{} } func (m *mockChunkSeriesSet) Err() error { diff --git a/tombstones.go b/tombstones.go index 6136c306c1..4f8131c12c 100644 --- a/tombstones.go +++ b/tombstones.go @@ -102,6 +102,8 @@ type TombstoneReader interface { Next() bool Seek(ref uint32) bool At() stone + // A copy of the current instance. Changes to the copy will not affect parent. + Copy() TombstoneReader Err() error } @@ -195,6 +197,16 @@ func (t *tombstoneReader) At() stone { return stone{ref: ref, ranges: dranges} } +func (t *tombstoneReader) Copy() TombstoneReader { + return &tombstoneReader{ + stones: t.stones[:], + idx: t.idx, + len: t.len, + + b: t.b, + } +} + func (t *tombstoneReader) Err() error { return t.err } @@ -250,6 +262,15 @@ func (t *mapTombstoneReader) At() stone { return stone{ref: t.cur, ranges: t.stones[t.cur]} } +func (t *mapTombstoneReader) Copy() TombstoneReader { + return &mapTombstoneReader{ + refs: t.refs[:], + cur: t.cur, + + stones: t.stones, + } +} + func (t *mapTombstoneReader) Err() error { return nil } @@ -298,6 +319,10 @@ func (t *simpleTombstoneReader) At() stone { return stone{ref: t.cur, ranges: t.ranges} } +func (t *simpleTombstoneReader) Copy() TombstoneReader { + return &simpleTombstoneReader{refs: t.refs[:], cur: t.cur, ranges: t.ranges} +} + func (t *simpleTombstoneReader) Err() error { return nil } @@ -371,9 +396,86 @@ func (t *mergedTombstoneReader) At() stone { return t.cur } +func (t *mergedTombstoneReader) Copy() TombstoneReader { + return &mergedTombstoneReader{ + a: t.a.Copy(), + b: t.b.Copy(), + + cur: t.cur, + + initialized: t.initialized, + aok: t.aok, + bok: t.bok, + } +} + func (t *mergedTombstoneReader) Err() error { if t.a.Err() != nil { return t.a.Err() } return t.b.Err() } + +type trange struct { + mint, maxt int64 +} + +func (tr trange) inBounds(t int64) bool { + return t >= tr.mint && t <= tr.maxt +} + +func (tr trange) isSubrange(ranges []trange) bool { + for _, r := range ranges { + if r.inBounds(tr.mint) && r.inBounds(tr.maxt) { + return true + } + } + + return false +} + +// This adds the new time-range to the existing ones. +// The existing ones must be sorted and should not be nil. +func addNewInterval(existing []trange, n trange) []trange { + for i, r := range existing { + // TODO(gouthamve): Make this codepath easier to digest. + if r.inBounds(n.mint) { + if n.maxt > r.maxt { + existing[i].maxt = n.maxt + } + + j := 0 + for _, r2 := range existing[i+1:] { + if n.maxt < r2.mint { + break + } + j++ + } + if j != 0 { + if existing[i+j].maxt > n.maxt { + existing[i].maxt = existing[i+j].maxt + } + existing = append(existing[:i+1], existing[i+j+1:]...) + } + return existing + } + + if r.inBounds(n.maxt) { + if n.mint < r.maxt { + existing[i].mint = n.mint + } + return existing + } + if n.mint < r.mint { + newRange := make([]trange, i, len(existing[:i])+1) + copy(newRange, existing[:i]) + newRange = append(newRange, n) + newRange = append(newRange, existing[i:]...) + + return newRange + } + } + + existing = append(existing, n) + return existing +} diff --git a/tombstones_test.go b/tombstones_test.go index 491c1c1387..8a3460a95c 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -45,3 +45,52 @@ func TestWriteAndReadbackTombStones(t *testing.T) { require.NoError(t, restr.Err()) require.NoError(t, exptr.Err()) } + +func TestAddingNewIntervals(t *testing.T) { + cases := []struct { + exist []trange + new trange + + exp []trange + }{ + { + new: trange{1, 2}, + exp: []trange{{1, 2}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{21, 23}, + exp: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{21, 25}, + exp: []trange{{1, 10}, {12, 20}, {21, 30}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{18, 23}, + exp: []trange{{1, 10}, {12, 23}, {25, 30}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{9, 23}, + exp: []trange{{1, 23}, {25, 30}}, + }, + { + exist: []trange{{1, 10}, {12, 20}, {25, 30}}, + new: trange{9, 230}, + exp: []trange{{1, 230}}, + }, + { + exist: []trange{{5, 10}, {12, 20}, {25, 30}}, + new: trange{1, 4}, + exp: []trange{{1, 4}, {5, 10}, {12, 20}, {25, 30}}, + }, + } + + for _, c := range cases { + require.Equal(t, c.exp, addNewInterval(c.exist, c.new)) + } + return +} From 45d3db4e9ed3475ca499dec07fad757d364f8b72 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 19 May 2017 11:52:15 +0530 Subject: [PATCH 07/25] Use a *mapTombstoneReader instead of map We need to recalculate the sorted ref list everytime we make a Tombstones() call. This avoids that. Signed-off-by: Goutham Veeramachaneni --- block.go | 10 +++++----- head.go | 21 +++++++++------------ tombstones.go | 1 - 3 files changed, 14 insertions(+), 18 deletions(-) diff --git a/block.go b/block.go index 132da1c735..9f35744649 100644 --- a/block.go +++ b/block.go @@ -163,8 +163,7 @@ type persistedBlock struct { indexr *indexReader // For tombstones. - stones []uint32 - tombstones map[uint32][]trange + tombstones *mapTombstoneReader } func newPersistedBlock(dir string) (*persistedBlock, error) { @@ -186,6 +185,7 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { if err != nil { return nil, err } + ts := make(map[uint32][]trange) for tr.Next() { s := tr.At() @@ -198,7 +198,7 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { chunkr: cr, indexr: ir, - tombstones: ts, + tombstones: newMapTombstoneReader(ts), } return pb, nil } @@ -229,7 +229,7 @@ func (pb *persistedBlock) Dir() string { return pb.dir } func (pb *persistedBlock) Index() IndexReader { return pb.indexr } func (pb *persistedBlock) Chunks() ChunkReader { return pb.chunkr } func (pb *persistedBlock) Tombstones() TombstoneReader { - return newMapTombstoneReader(pb.tombstones) + return pb.tombstones.Copy() } func (pb *persistedBlock) Meta() BlockMeta { return pb.meta } @@ -270,7 +270,7 @@ Outer: } // Merge the current and new tombstones. - tr := newMapTombstoneReader(pb.tombstones) + tr := pb.tombstones.Copy() str := newSimpleTombstoneReader(vPostings, []trange{{mint, maxt}}) tombreader := newMergedTombstoneReader(tr, str) diff --git a/head.go b/head.go index 5adda74c02..e4899cb68f 100644 --- a/head.go +++ b/head.go @@ -66,7 +66,7 @@ type HeadBlock struct { values map[string]stringset // label names to possible values postings *memPostings // postings lists for terms - tombstones map[uint32][]trange + tombstones *mapTombstoneReader meta BlockMeta } @@ -120,7 +120,7 @@ func OpenHeadBlock(dir string, l log.Logger, wal WAL) (*HeadBlock, error) { values: map[string]stringset{}, postings: &memPostings{m: make(map[term][]uint32)}, meta: *meta, - tombstones: make(map[uint32][]trange), + tombstones: emptyTombstoneReader, } return h, h.init() } @@ -158,7 +158,8 @@ func (h *HeadBlock) init() error { for tr.Next() { s := tr.At() - h.tombstones[s.ref] = s.ranges + h.tombstones.refs = append(h.tombstones.refs, s.ref) + h.tombstones.stones[s.ref] = s.ranges } return errors.Wrap(err, "tombstones reader iteration") } @@ -229,23 +230,19 @@ func (h *HeadBlock) Chunks() ChunkReader { return &headChunkReader{h} } // Tombstones implements headBlock. func (h *HeadBlock) Tombstones() TombstoneReader { - return newMapTombstoneReader(h.tombstones) + return h.tombstones.Copy() } // Delete implements headBlock. func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error { - h.mtx.RLock() + h.mtx.Lock() // We are modifying the tombstones here. + defer h.mtx.Unlock() ir := h.Index() pr := newPostingsReader(ir) p, absent := pr.Select(ms...) - h.mtx.RUnlock() - - h.mtx.Lock() // We are modifying the tombstones here. - defer h.mtx.Unlock() - Outer: for p.Next() { ref := p.At() @@ -256,14 +253,14 @@ Outer: } } - h.tombstones[ref] = addNewInterval(h.tombstones[ref], trange{mint, maxt}) + h.tombstones.stones[ref] = addNewInterval(h.tombstones.stones[ref], trange{mint, maxt}) } if p.Err() != nil { return p.Err() } - return writeTombstoneFile(h.dir, newMapTombstoneReader(h.tombstones)) + return writeTombstoneFile(h.dir, newMapTombstoneReader(h.tombstones.stones)) } // Querier implements Queryable and headBlock. diff --git a/tombstones.go b/tombstones.go index 4f8131c12c..02c3ad1ec9 100644 --- a/tombstones.go +++ b/tombstones.go @@ -218,7 +218,6 @@ type mapTombstoneReader struct { stones map[uint32][]trange } -// TODO(gouthamve): Take pre-sorted refs. func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { refs := make([]uint32, 0, len(ts)) for k := range ts { From d6bd64357b18aca15e3f3c32a107d58d4429b313 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 19 May 2017 22:54:29 +0530 Subject: [PATCH 08/25] Fix Delete on HeadBlock Signed-off-by: Goutham Veeramachaneni --- head.go | 12 +- head_test.go | 305 ++++++++++++++++++++++++++++++++++++++++++++++++ querier.go | 2 +- querier_test.go | 4 +- tombstones.go | 7 +- 5 files changed, 318 insertions(+), 12 deletions(-) diff --git a/head.go b/head.go index e4899cb68f..84434753d2 100644 --- a/head.go +++ b/head.go @@ -98,7 +98,7 @@ func TouchHeadBlock(dir string, seq int, mint, maxt int64) error { } // Write an empty tombstones file. - if err := writeTombstoneFile(tmp, emptyTombstoneReader); err != nil { + if err := writeTombstoneFile(tmp, newEmptyTombstoneReader()); err != nil { return err } @@ -120,7 +120,7 @@ func OpenHeadBlock(dir string, l log.Logger, wal WAL) (*HeadBlock, error) { values: map[string]stringset{}, postings: &memPostings{m: make(map[term][]uint32)}, meta: *meta, - tombstones: emptyTombstoneReader, + tombstones: newEmptyTombstoneReader(), } return h, h.init() } @@ -235,9 +235,6 @@ func (h *HeadBlock) Tombstones() TombstoneReader { // Delete implements headBlock. func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error { - h.mtx.Lock() // We are modifying the tombstones here. - defer h.mtx.Unlock() - ir := h.Index() pr := newPostingsReader(ir) @@ -260,7 +257,8 @@ Outer: return p.Err() } - return writeTombstoneFile(h.dir, newMapTombstoneReader(h.tombstones.stones)) + h.tombstones = newMapTombstoneReader(h.tombstones.stones) + return writeTombstoneFile(h.dir, h.tombstones.Copy()) } // Querier implements Queryable and headBlock. @@ -280,7 +278,7 @@ func (h *HeadBlock) Querier(mint, maxt int64) Querier { maxt: maxt, index: h.Index(), chunks: h.Chunks(), - tombstones: h.Tombstones(), + tombstones: h.Tombstones().Copy(), postingsMapper: func(p Postings) Postings { ep := make([]uint32, 0, 64) diff --git a/head_test.go b/head_test.go index aa9138060d..36f6a49df6 100644 --- a/head_test.go +++ b/head_test.go @@ -36,6 +36,10 @@ func createTestHeadBlock(t testing.TB, dir string, mint, maxt int64) *HeadBlock err := TouchHeadBlock(dir, 0, mint, maxt) require.NoError(t, err) + return openTestHeadBlock(t, dir) +} + +func openTestHeadBlock(t testing.TB, dir string) *HeadBlock { wal, err := OpenSegmentWAL(dir, nil, 5*time.Second) require.NoError(t, err) @@ -378,6 +382,291 @@ func TestHeadBlock_e2e(t *testing.T) { return } +func TestDelete_simple(t *testing.T) { + numSamples := int64(10) + + dir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(dir) + + hb := createTestHeadBlock(t, dir, 0, numSamples) + app := hb.Appender() + + smpls := make([]float64, numSamples) + for i := int64(0); i < numSamples; i++ { + smpls[i] = rand.Float64() + app.Add(labels.Labels{{"a", "b"}}, i, smpls[i]) + } + + require.NoError(t, app.Commit()) + cases := []struct { + dranges []trange + remaint []int64 + }{ + { + dranges: []trange{{0, 3}}, + remaint: []int64{4, 5, 6, 7, 8, 9}, + }, + { + dranges: []trange{{1, 3}}, + remaint: []int64{0, 4, 5, 6, 7, 8, 9}, + }, + { + dranges: []trange{{1, 3}, {4, 7}}, + remaint: []int64{0, 8, 9}, + }, + { + dranges: []trange{{1, 3}, {4, 700}}, + remaint: []int64{0}, + }, + { + dranges: []trange{{0, 9}}, + remaint: []int64{}, + }, + } + +Outer: + for _, c := range cases { + // Reset the tombstones. + writeTombstoneFile(hb.dir, newEmptyTombstoneReader()) + hb.tombstones = newEmptyTombstoneReader() + + // Delete the ranges. + for _, r := range c.dranges { + require.NoError(t, hb.Delete(r.mint, r.maxt, labels.NewEqualMatcher("a", "b"))) + } + + // Compare the result. + q := hb.Querier(0, numSamples) + res := q.Select(labels.NewEqualMatcher("a", "b")) + + expSamples := make([]sample, 0, len(c.remaint)) + for _, ts := range c.remaint { + expSamples = append(expSamples, sample{ts, smpls[ts]}) + } + + expss := newListSeriesSet([]Series{ + newSeries(map[string]string{"a": "b"}, expSamples), + }) + + if len(expSamples) == 0 { + require.False(t, res.Next()) + continue + } + + for { + eok, rok := expss.Next(), res.Next() + require.Equal(t, eok, rok, "next") + + if !eok { + continue Outer + } + sexp := expss.At() + sres := res.At() + + require.Equal(t, sexp.Labels(), sres.Labels(), "labels") + + smplExp, errExp := expandSeriesIterator(sexp.Iterator()) + smplRes, errRes := expandSeriesIterator(sres.Iterator()) + + require.Equal(t, errExp, errRes, "samples error") + require.Equal(t, smplExp, smplRes, "samples") + } + } +} + +func TestDelete_e2e(t *testing.T) { + numDatapoints := 1000 + numRanges := 1000 + timeInterval := int64(2) + maxTime := int64(2 * 1000) + minTime := int64(200) + // Create 8 series with 1000 data-points of different ranges, delete and run queries. + lbls := [][]labels.Label{ + { + {"a", "b"}, + {"instance", "localhost:9090"}, + {"job", "prometheus"}, + }, + { + {"a", "b"}, + {"instance", "127.0.0.1:9090"}, + {"job", "prometheus"}, + }, + { + {"a", "b"}, + {"instance", "127.0.0.1:9090"}, + {"job", "prom-k8s"}, + }, + { + {"a", "b"}, + {"instance", "localhost:9090"}, + {"job", "prom-k8s"}, + }, + { + {"a", "c"}, + {"instance", "localhost:9090"}, + {"job", "prometheus"}, + }, + { + {"a", "c"}, + {"instance", "127.0.0.1:9090"}, + {"job", "prometheus"}, + }, + { + {"a", "c"}, + {"instance", "127.0.0.1:9090"}, + {"job", "prom-k8s"}, + }, + { + {"a", "c"}, + {"instance", "localhost:9090"}, + {"job", "prom-k8s"}, + }, + } + + seriesMap := map[string][]sample{} + for _, l := range lbls { + seriesMap[labels.New(l...).String()] = []sample{} + } + + dir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(dir) + + hb := createTestHeadBlock(t, dir, minTime, maxTime) + app := hb.Appender() + + for _, l := range lbls { + ls := labels.New(l...) + series := []sample{} + + ts := rand.Int63n(300) + for i := 0; i < numDatapoints; i++ { + v := rand.Float64() + if ts >= minTime && ts <= maxTime { + series = append(series, sample{ts, v}) + } + + _, err := app.Add(ls, ts, v) + if ts >= minTime && ts <= maxTime { + require.NoError(t, err) + } else { + require.Error(t, ErrOutOfBounds, err) + } + + ts += rand.Int63n(timeInterval) + 1 + } + + seriesMap[labels.New(l...).String()] = series + } + + require.NoError(t, app.Commit()) + + // Delete a time-range from each-selector. + dels := []struct { + ms []labels.Matcher + drange []trange + }{ + { + ms: []labels.Matcher{labels.NewEqualMatcher("a", "b")}, + drange: []trange{{300, 500}, {600, 670}}, + }, + { + ms: []labels.Matcher{ + labels.NewEqualMatcher("a", "b"), + labels.NewEqualMatcher("job", "prom-k8s"), + }, + drange: []trange{{300, 500}, {100, 670}}, + }, + { + ms: []labels.Matcher{ + labels.NewEqualMatcher("a", "c"), + labels.NewEqualMatcher("instance", "localhost:9090"), + labels.NewEqualMatcher("job", "prometheus"), + }, + drange: []trange{{300, 400}, {100, 6700}}, + }, + // TODO: Add Regexp Matchers. + } + + for _, del := range dels { + // Reset the deletes everytime. + writeTombstoneFile(hb.dir, newEmptyTombstoneReader()) + hb.tombstones = newEmptyTombstoneReader() + + for _, r := range del.drange { + require.NoError(t, hb.Delete(r.mint, r.maxt, del.ms...)) + } + + matched := labels.Slice{} + for _, ls := range lbls { + s := labels.Selector(del.ms) + if s.Matches(ls) { + matched = append(matched, ls) + } + } + + sort.Sort(matched) + + for i := 0; i < numRanges; i++ { + mint := rand.Int63n(200) + maxt := mint + rand.Int63n(timeInterval*int64(numDatapoints)) + + q := hb.Querier(mint, maxt) + ss := q.Select(del.ms...) + + // Build the mockSeriesSet. + matchedSeries := make([]Series, 0, len(matched)) + for _, m := range matched { + smpls := boundedSamples(seriesMap[m.String()], mint, maxt) + smpls = deletedSamples(smpls, del.drange) + + // Only append those series for which samples exist as mockSeriesSet + // doesn't skip series with no samples. + // TODO: But sometimes SeriesSet returns an empty SeriesIterator + if len(smpls) > 0 { + matchedSeries = append(matchedSeries, newSeries( + m.Map(), + smpls, + )) + } + } + expSs := newListSeriesSet(matchedSeries) + + // Compare both SeriesSets. + for { + eok, rok := expSs.Next(), ss.Next() + + // Skip a series if iterator is empty. + if rok { + for !ss.At().Iterator().Next() { + rok = ss.Next() + if !rok { + break + } + } + } + require.Equal(t, eok, rok, "next") + + if !eok { + break + } + sexp := expSs.At() + sres := ss.At() + + require.Equal(t, sexp.Labels(), sres.Labels(), "labels") + + smplExp, errExp := expandSeriesIterator(sexp.Iterator()) + smplRes, errRes := expandSeriesIterator(sres.Iterator()) + + require.Equal(t, errExp, errRes, "samples error") + require.Equal(t, smplExp, smplRes, "samples") + } + } + } + + return +} + func boundedSamples(full []sample, mint, maxt int64) []sample { for len(full) > 0 { if full[0].t >= mint { @@ -394,3 +683,19 @@ func boundedSamples(full []sample, mint, maxt int64) []sample { // maxt is after highest sample. return full } + +func deletedSamples(full []sample, dranges []trange) []sample { + ds := make([]sample, 0, len(full)) +Outer: + for _, s := range full { + for _, r := range dranges { + if r.inBounds(s.t) { + continue Outer + } + } + + ds = append(ds, s) + } + + return ds +} diff --git a/querier.go b/querier.go index a2f0159fbd..c67091cc64 100644 --- a/querier.go +++ b/querier.go @@ -449,7 +449,7 @@ type populatedChunkSeries struct { } func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta, stone) { - return s.lset, s.chks, stone{} + return s.lset, s.chks, s.stone } func (s *populatedChunkSeries) Err() error { return s.err } diff --git a/querier_test.go b/querier_test.go index 9502b5b4f9..c10536afc7 100644 --- a/querier_test.go +++ b/querier_test.go @@ -380,7 +380,7 @@ Outer: querier := &blockQuerier{ index: ir, chunks: cr, - tombstones: emptyTombstoneReader, + tombstones: newEmptyTombstoneReader(), mint: c.mint, maxt: c.maxt, @@ -490,7 +490,7 @@ func TestBaseChunkSeries(t *testing.T) { bcs := &baseChunkSeries{ p: newListPostings(tc.postings), index: mi, - tombstones: emptyTombstoneReader, + tombstones: newEmptyTombstoneReader(), } i := 0 diff --git a/tombstones.go b/tombstones.go index 02c3ad1ec9..d8182c706b 100644 --- a/tombstones.go +++ b/tombstones.go @@ -107,8 +107,6 @@ type TombstoneReader interface { Err() error } -var emptyTombstoneReader = newMapTombstoneReader(make(map[uint32][]trange)) - type tombstoneReader struct { stones []byte idx int @@ -223,10 +221,15 @@ func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { for k := range ts { refs = append(refs, k) } + sort.Sort(uint32slice(refs)) return &mapTombstoneReader{stones: ts, refs: refs} } +func newEmptyTombstoneReader() *mapTombstoneReader { + return &mapTombstoneReader{stones: make(map[uint32][]trange)} +} + func (t *mapTombstoneReader) Next() bool { if len(t.refs) > 0 { t.cur = t.refs[0] From 3dbb400bef8196306f57aff45b7116ae6eefb4a6 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Sat, 20 May 2017 00:35:50 +0530 Subject: [PATCH 09/25] Initial implementation of Delete on DB. Signed-off-by: Goutham Veeramachaneni --- block.go | 13 ++++++------- compact.go | 3 +-- db.go | 32 ++++++++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/block.go b/block.go index 9f35744649..69c351ccc8 100644 --- a/block.go +++ b/block.go @@ -218,10 +218,11 @@ func (pb *persistedBlock) String() string { func (pb *persistedBlock) Querier(mint, maxt int64) Querier { return &blockQuerier{ - mint: mint, - maxt: maxt, - index: pb.Index(), - chunks: pb.Chunks(), + mint: mint, + maxt: maxt, + index: pb.Index(), + chunks: pb.Chunks(), + tombstones: pb.tombstones.Copy(), } } @@ -255,10 +256,8 @@ Outer: } } - // XXX(gouthamve): Adjust mint and maxt to match the time-range in the chunks? for _, chk := range chunks { - if (mint <= chk.MinTime && maxt >= chk.MinTime) || - (mint > chk.MinTime && mint <= chk.MaxTime) { + if intervalOverlap(mint, maxt, chk.MinTime, chk.MaxTime) { vPostings = append(vPostings, p.At()) continue Outer } diff --git a/compact.go b/compact.go index d63f5cec7e..7ba22f5f75 100644 --- a/compact.go +++ b/compact.go @@ -307,8 +307,7 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri if len(ranges) > 0 { // Re-encode the chunk to not have deleted values. for _, chk := range chks { - // Checks Overlap: http://stackoverflow.com/questions/3269434/ - if ranges[0].mint <= chk.MaxTime && chk.MinTime <= ranges[len(ranges)-1].maxt { + if intervalOverlap(ranges[0].mint, ranges[len(ranges)-1].maxt, chk.MinTime, chk.MaxTime) { newChunk := chunks.NewXORChunk() app, err := newChunk.Appender() if err != nil { diff --git a/db.go b/db.go index c4e97abcc1..855a5af6ec 100644 --- a/db.go +++ b/db.go @@ -669,6 +669,32 @@ func (a *dbAppender) Rollback() error { return g.Wait() } +// Delete implements deletion of metrics. +func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error { + s.mtx.RLock() + + s.headmtx.RLock() + blocks := s.blocksForInterval(mint, maxt) + s.headmtx.RUnlock() + + // TODO(gouthamve): Wait for pending compactions and stop compactions until + // delete finishes. + var g errgroup.Group + + for _, b := range blocks { + f := func() error { + return b.Delete(mint, maxt, ms...) + } + g.Go(f) + } + + if err := g.Wait(); err != nil { + return err + } + + return db.reloadBlocks() +} + // appendable returns a copy of a slice of HeadBlocks that can still be appended to. func (db *DB) appendable() []headBlock { var i int @@ -681,10 +707,8 @@ func (db *DB) appendable() []headBlock { } func intervalOverlap(amin, amax, bmin, bmax int64) bool { - if bmin >= amin && bmin <= amax { - return true - } - if amin >= bmin && amin <= bmax { + // Checks Overlap: http://stackoverflow.com/questions/3269434/ + if amin <= bmax && bmin <= amax { return true } return false From d32eb25662b6e58042566aebbd0f53661b935353 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Sat, 20 May 2017 13:21:10 +0530 Subject: [PATCH 10/25] Synchronise deletes and compactions. Signed-off-by: Goutham Veeramachaneni --- db.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/db.go b/db.go index 855a5af6ec..b3b0558445 100644 --- a/db.go +++ b/db.go @@ -125,6 +125,9 @@ type DB struct { compactc chan struct{} donec chan struct{} stopc chan struct{} + + // compMtx is used to control compactions and deletions. + cmtx sync.Mutex } type dbMetrics struct { @@ -271,6 +274,9 @@ func (db *DB) retentionCutoff() (bool, error) { } func (db *DB) compact() (changes bool, err error) { + db.cmtx.Lock() + defer db.cmtx.Unlock() + db.headmtx.RLock() // Check whether we have pending head blocks that are ready to be persisted. @@ -671,14 +677,13 @@ func (a *dbAppender) Rollback() error { // Delete implements deletion of metrics. func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error { - s.mtx.RLock() + db.cmtx.Lock() + defer db.cmtx.Unlock() s.headmtx.RLock() blocks := s.blocksForInterval(mint, maxt) s.headmtx.RUnlock() - // TODO(gouthamve): Wait for pending compactions and stop compactions until - // delete finishes. var g errgroup.Group for _, b := range blocks { From 0b70333ef6d5588e0ad1816dc1d698d5a6f19419 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Sun, 21 May 2017 23:20:05 +0530 Subject: [PATCH 11/25] Add tests for tombstones. Signed-off-by: Goutham Veeramachaneni --- block.go | 5 ++- compact.go | 2 +- db.go | 6 +-- tombstones.go | 68 ++++++++++++++++------------- tombstones_test.go | 105 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 149 insertions(+), 37 deletions(-) diff --git a/block.go b/block.go index 69c351ccc8..03a2e569aa 100644 --- a/block.go +++ b/block.go @@ -198,6 +198,7 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { chunkr: cr, indexr: ir, + // TODO(gouthamve): We will be sorting the refs again internally, is it a big deal? tombstones: newMapTombstoneReader(ts), } return pb, nil @@ -222,7 +223,7 @@ func (pb *persistedBlock) Querier(mint, maxt int64) Querier { maxt: maxt, index: pb.Index(), chunks: pb.Chunks(), - tombstones: pb.tombstones.Copy(), + tombstones: pb.Tombstones(), } } @@ -269,7 +270,7 @@ Outer: } // Merge the current and new tombstones. - tr := pb.tombstones.Copy() + tr := pb.Tombstones() str := newSimpleTombstoneReader(vPostings, []trange{{mint, maxt}}) tombreader := newMergedTombstoneReader(tr, str) diff --git a/compact.go b/compact.go index 7ba22f5f75..8419f1e06f 100644 --- a/compact.go +++ b/compact.go @@ -424,7 +424,7 @@ func (c *compactionSeriesSet) Next() bool { return false } - // Remove completely deleted chunks and re-encode partial ones. + // Remove completely deleted chunks. if len(c.dranges) > 0 { chks := make([]*ChunkMeta, 0, len(c.c)) for _, chk := range c.c { diff --git a/db.go b/db.go index b3b0558445..28506498f2 100644 --- a/db.go +++ b/db.go @@ -680,9 +680,9 @@ func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error { db.cmtx.Lock() defer db.cmtx.Unlock() - s.headmtx.RLock() - blocks := s.blocksForInterval(mint, maxt) - s.headmtx.RUnlock() + db.headmtx.RLock() + blocks := db.blocksForInterval(mint, maxt) + db.headmtx.RUnlock() var g errgroup.Group diff --git a/tombstones.go b/tombstones.go index d8182c706b..b0d8eca41f 100644 --- a/tombstones.go +++ b/tombstones.go @@ -102,15 +102,15 @@ type TombstoneReader interface { Next() bool Seek(ref uint32) bool At() stone - // A copy of the current instance. Changes to the copy will not affect parent. + // Copy copies the current reader state. Changes to the copy will not affect parent. Copy() TombstoneReader Err() error } type tombstoneReader struct { stones []byte - idx int - len int + + cur stone b []byte err error @@ -135,11 +135,10 @@ func newTombStoneReader(dir string) (*tombstoneReader, error) { if err := d.err(); err != nil { return nil, err } + off += 4 // For the numStones which has been read. return &tombstoneReader{ - stones: b[off+4:], - idx: -1, - len: int(numStones), + stones: b[off : off+int64(numStones*12)], b: b, }, nil @@ -150,26 +149,11 @@ func (t *tombstoneReader) Next() bool { return false } - t.idx++ + if len(t.stones) < 12 { + return false + } - return t.idx < t.len -} - -func (t *tombstoneReader) Seek(ref uint32) bool { - bytIdx := t.idx * 12 - - t.idx += sort.Search(t.len-t.idx, func(i int) bool { - return binary.BigEndian.Uint32(t.b[bytIdx+i*12:]) >= ref - }) - - return t.idx < t.len -} - -func (t *tombstoneReader) At() stone { - bytIdx := t.idx * (4 + 8) - dat := t.stones[bytIdx : bytIdx+12] - - d := &decbuf{b: dat} + d := &decbuf{b: t.stones[:12]} ref := d.be32() off := d.be64int64() @@ -177,7 +161,7 @@ func (t *tombstoneReader) At() stone { numRanges := d.varint64() if err := d.err(); err != nil { t.err = err - return stone{ref: ref} + return false } dranges := make([]trange, 0, numRanges) @@ -186,20 +170,40 @@ func (t *tombstoneReader) At() stone { maxt := d.varint64() if err := d.err(); err != nil { t.err = err - return stone{ref: ref, ranges: dranges} + return false } dranges = append(dranges, trange{mint, maxt}) } - return stone{ref: ref, ranges: dranges} + t.stones = t.stones[12:] + t.cur = stone{ref: ref, ranges: dranges} + return true +} + +func (t *tombstoneReader) Seek(ref uint32) bool { + i := sort.Search(len(t.stones)/12, func(i int) bool { + x := binary.BigEndian.Uint32(t.stones[i*12:]) + return x >= ref + }) + + if i*12 < len(t.stones) { + t.stones = t.stones[i*12:] + return t.Next() + } + + t.stones = nil + return false +} + +func (t *tombstoneReader) At() stone { + return t.cur } func (t *tombstoneReader) Copy() TombstoneReader { return &tombstoneReader{ stones: t.stones[:], - idx: t.idx, - len: t.len, + cur: t.cur, b: t.b, } @@ -291,6 +295,7 @@ func newSimpleTombstoneReader(refs []uint32, drange []trange) *simpleTombstoneRe func (t *simpleTombstoneReader) Next() bool { if len(t.refs) > 0 { t.cur = t.refs[0] + t.refs = t.refs[1:] return true } @@ -437,7 +442,8 @@ func (tr trange) isSubrange(ranges []trange) bool { } // This adds the new time-range to the existing ones. -// The existing ones must be sorted and should not be nil. +// The existing ones must be sorted. +// TODO(gouthamve): {1, 2}, {3, 4} can be merged into {1, 4}. func addNewInterval(existing []trange, n trange) []trange { for i, r := range existing { // TODO(gouthamve): Make this codepath easier to digest. diff --git a/tombstones_test.go b/tombstones_test.go index 8a3460a95c..1506d74def 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "math/rand" "os" + "sort" "testing" "time" @@ -94,3 +95,107 @@ func TestAddingNewIntervals(t *testing.T) { } return } + +func TestTombstoneReadersSeek(t *testing.T) { + // This is assuming that the listPostings is perfect. + table := struct { + m map[uint32][]trange + + cases []uint32 + }{ + m: map[uint32][]trange{ + 2: []trange{{1, 2}}, + 3: []trange{{1, 4}, {5, 6}}, + 4: []trange{{10, 15}, {16, 20}}, + 5: []trange{{1, 4}, {5, 6}}, + 50: []trange{{10, 20}, {35, 50}}, + 600: []trange{{100, 2000}}, + 1000: []trange{}, + 1500: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + }, + + cases: []uint32{1, 10, 20, 40, 30, 20, 50, 599, 601, 1000, 1600, 1601, 2000}, + } + + testFunc := func(t *testing.T, tr TombstoneReader) { + for _, ref := range table.cases { + // Create the listPostings. + refs := make([]uint32, 0, len(table.m)) + for k := range table.m { + refs = append(refs, k) + } + sort.Sort(uint32slice(refs)) + pr := newListPostings(refs) + + // Compare both. + trc := tr.Copy() + require.Equal(t, pr.Seek(ref), trc.Seek(ref)) + if pr.Seek(ref) { + require.Equal(t, pr.At(), trc.At().ref) + require.Equal(t, table.m[pr.At()], trc.At().ranges) + } + + for pr.Next() { + require.True(t, trc.Next()) + require.Equal(t, pr.At(), trc.At().ref) + require.Equal(t, table.m[pr.At()], trc.At().ranges) + } + + require.False(t, trc.Next()) + require.NoError(t, pr.Err()) + require.NoError(t, tr.Err()) + } + } + + t.Run("tombstoneReader", func(t *testing.T) { + tmpdir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(tmpdir) + + mtr := newMapTombstoneReader(table.m) + writeTombstoneFile(tmpdir, mtr) + tr, err := readTombstoneFile(tmpdir) + require.NoError(t, err) + + testFunc(t, tr) + return + }) + t.Run("mapTombstoneReader", func(t *testing.T) { + mtr := newMapTombstoneReader(table.m) + + testFunc(t, mtr) + return + }) + t.Run("simpleTombstoneReader", func(t *testing.T) { + ranges := []trange{{1, 2}, {3, 4}, {5, 6}} + + for _, ref := range table.cases { + // Create the listPostings. + refs := make([]uint32, 0, len(table.m)) + for k := range table.m { + refs = append(refs, k) + } + sort.Sort(uint32slice(refs)) + pr := newListPostings(refs[:]) + tr := newSimpleTombstoneReader(refs[:], ranges) + + // Compare both. + trc := tr.Copy() + require.Equal(t, pr.Seek(ref), trc.Seek(ref)) + if pr.Seek(ref) { + require.Equal(t, pr.At(), trc.At().ref) + require.Equal(t, ranges, tr.At().ranges) + } + for pr.Next() { + require.True(t, trc.Next()) + require.Equal(t, pr.At(), trc.At().ref, "refs") + require.Equal(t, ranges, trc.At().ranges) + } + + require.False(t, trc.Next()) + require.NoError(t, pr.Err()) + require.NoError(t, tr.Err()) + } + return + }) +} From 009dd2cde506e5b0ae7191a08ec2867fdb5a2eab Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Mon, 22 May 2017 10:31:50 +0530 Subject: [PATCH 12/25] Add test for mergedTombstoneReader Signed-off-by: Goutham Veeramachaneni --- tombstones.go | 3 +- tombstones_test.go | 116 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) diff --git a/tombstones.go b/tombstones.go index b0d8eca41f..76ef2daccd 100644 --- a/tombstones.go +++ b/tombstones.go @@ -377,11 +377,12 @@ func (t *mergedTombstoneReader) Next() bool { t.cur = bcur t.bok = t.b.Next() } else { - t.cur = acur // Merge time ranges. for _, r := range bcur.ranges { acur.ranges = addNewInterval(acur.ranges, r) } + + t.cur = acur t.aok = t.a.Next() t.bok = t.b.Next() } diff --git a/tombstones_test.go b/tombstones_test.go index 1506d74def..0793a1cb32 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -63,6 +63,11 @@ func TestAddingNewIntervals(t *testing.T) { new: trange{21, 23}, exp: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}, }, + { + exist: []trange{{1, 2}, {3, 5}, {6, 7}}, + new: trange{6, 7}, + exp: []trange{{1, 2}, {3, 5}, {6, 7}}, + }, { exist: []trange{{1, 10}, {12, 20}, {25, 30}}, new: trange{21, 25}, @@ -199,3 +204,114 @@ func TestTombstoneReadersSeek(t *testing.T) { return }) } + +func TestMergedTombstoneReader(t *testing.T) { + cases := []struct { + a, b TombstoneReader + + exp TombstoneReader + }{ + { + a: newMapTombstoneReader( + map[uint32][]trange{ + 2: []trange{{1, 2}}, + 3: []trange{{1, 4}, {5, 6}}, + 4: []trange{{10, 15}, {16, 20}}, + 5: []trange{{1, 4}, {5, 6}}, + 50: []trange{{10, 20}, {35, 50}}, + 600: []trange{{100, 2000}}, + 1000: []trange{}, + 1500: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + }, + ), + b: newMapTombstoneReader( + map[uint32][]trange{ + 2: []trange{{1, 2}}, + 3: []trange{{1, 4}, {5, 6}}, + 4: []trange{{10, 15}, {16, 20}}, + 5: []trange{{1, 4}, {5, 6}}, + 50: []trange{{10, 20}, {35, 50}}, + 600: []trange{{100, 2000}}, + 1000: []trange{}, + 1500: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + }, + ), + + exp: newMapTombstoneReader( + map[uint32][]trange{ + 2: []trange{{1, 2}}, + 3: []trange{{1, 4}, {5, 6}}, + 4: []trange{{10, 15}, {16, 20}}, + 5: []trange{{1, 4}, {5, 6}}, + 50: []trange{{10, 20}, {35, 50}}, + 600: []trange{{100, 2000}}, + 1000: []trange{}, + 1500: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 5}, {6, 7}}, + }, + ), + }, + { + a: newMapTombstoneReader( + map[uint32][]trange{ + 2: []trange{{1, 2}}, + 3: []trange{{1, 4}, {5, 6}}, + 4: []trange{{10, 15}, {16, 20}}, + 5: []trange{{1, 4}, {5, 6}}, + 50: []trange{{10, 20}, {35, 50}}, + 600: []trange{{100, 2000}}, + 1000: []trange{}, + 1500: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + }, + ), + b: newMapTombstoneReader( + map[uint32][]trange{ + 20: []trange{{1, 2}}, + 30: []trange{{1, 4}, {5, 6}}, + 40: []trange{{10, 15}, {16, 20}}, + 60: []trange{{1, 4}, {5, 6}}, + 500: []trange{{10, 20}, {35, 50}}, + 6000: []trange{{100, 2000}}, + 10000: []trange{}, + 15000: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + }, + ), + + exp: newMapTombstoneReader( + map[uint32][]trange{ + 2: []trange{{1, 2}}, + 3: []trange{{1, 4}, {5, 6}}, + 4: []trange{{10, 15}, {16, 20}}, + 5: []trange{{1, 4}, {5, 6}}, + 50: []trange{{10, 20}, {35, 50}}, + 600: []trange{{100, 2000}}, + 1000: []trange{}, + 1500: []trange{{10000, 500000}}, + 20: []trange{{1, 2}}, + 30: []trange{{1, 4}, {5, 6}}, + 40: []trange{{10, 15}, {16, 20}}, + 60: []trange{{1, 4}, {5, 6}}, + 500: []trange{{10, 20}, {35, 50}}, + 6000: []trange{{100, 2000}}, + 10000: []trange{}, + 15000: []trange{{10000, 500000}}, + 1600: []trange{{1, 2}, {3, 5}, {6, 7}}, + }, + ), + }, + } + + for _, c := range cases { + res := newMergedTombstoneReader(c.a, c.b) + for c.exp.Next() { + require.True(t, res.Next()) + require.Equal(t, c.exp.At(), res.At()) + } + require.False(t, res.Next()) + } + return +} From 662d8173fedc1b5bb8c2701a22f0b08a38b395e0 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Mon, 22 May 2017 11:28:24 +0530 Subject: [PATCH 13/25] Make Appends after Delete visible. Signed-off-by: Goutham Veeramachaneni --- block.go | 11 ++++++++--- head.go | 7 ++++++- head_test.go | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/block.go b/block.go index 03a2e569aa..296b119ebb 100644 --- a/block.go +++ b/block.go @@ -242,7 +242,7 @@ func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { ir := pb.indexr // Choose only valid postings which have chunks in the time-range. - vPostings := []uint32{} + delStones := map[uint32][]trange{} Outer: for p.Next() { @@ -259,7 +259,12 @@ Outer: for _, chk := range chunks { if intervalOverlap(mint, maxt, chk.MinTime, chk.MaxTime) { - vPostings = append(vPostings, p.At()) + // Delete only until the current maxtime and not beyond. + maxtime := chunks[len(chunks)-1].MaxTime + if maxtime > maxt { + maxtime = maxt + } + delStones[p.At()] = []trange{{mint, maxtime}} continue Outer } } @@ -271,7 +276,7 @@ Outer: // Merge the current and new tombstones. tr := pb.Tombstones() - str := newSimpleTombstoneReader(vPostings, []trange{{mint, maxt}}) + str := newMapTombstoneReader(delStones) tombreader := newMergedTombstoneReader(tr, str) return writeTombstoneFile(pb.dir, tombreader) diff --git a/head.go b/head.go index 84434753d2..c98a4deff9 100644 --- a/head.go +++ b/head.go @@ -250,7 +250,12 @@ Outer: } } - h.tombstones.stones[ref] = addNewInterval(h.tombstones.stones[ref], trange{mint, maxt}) + // Delete only until the current values and not beyond. + maxtime := h.series[ref].head().maxTime + if maxtime > maxt { + maxtime = maxt + } + h.tombstones.stones[ref] = addNewInterval(h.tombstones.stones[ref], trange{mint, maxtime}) } if p.Err() != nil { diff --git a/head_test.go b/head_test.go index 36f6a49df6..ef841cdb05 100644 --- a/head_test.go +++ b/head_test.go @@ -382,7 +382,7 @@ func TestHeadBlock_e2e(t *testing.T) { return } -func TestDelete_simple(t *testing.T) { +func TestDeleteSimple(t *testing.T) { numSamples := int64(10) dir, _ := ioutil.TempDir("", "test") @@ -474,6 +474,39 @@ Outer: } } +func TestDeleteUntilCurMax(t *testing.T) { + numSamples := int64(10) + + dir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(dir) + + hb := createTestHeadBlock(t, dir, 0, 2*numSamples) + app := hb.Appender() + + smpls := make([]float64, numSamples) + for i := int64(0); i < numSamples; i++ { + smpls[i] = rand.Float64() + app.Add(labels.Labels{{"a", "b"}}, i, smpls[i]) + } + + require.NoError(t, app.Commit()) + require.NoError(t, hb.Delete(0, 10000, labels.NewEqualMatcher("a", "b"))) + app = hb.Appender() + _, err := app.Add(labels.Labels{{"a", "b"}}, 11, 1) + require.NoError(t, err) + require.NoError(t, app.Commit()) + + q := hb.Querier(0, 100000) + res := q.Select(labels.NewEqualMatcher("a", "b")) + + require.True(t, res.Next()) + exps := res.At() + it := exps.Iterator() + ressmpls, err := expandSeriesIterator(it) + require.NoError(t, err) + require.Equal(t, []sample{{11, 1}}, ressmpls) +} + func TestDelete_e2e(t *testing.T) { numDatapoints := 1000 numRanges := 1000 From 7ccf9d0211cc8fe64c3668f035f53e260f2bc21b Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Mon, 22 May 2017 13:31:57 +0530 Subject: [PATCH 14/25] Add test on blockQuerier which uses tombstones. Signed-off-by: Goutham Veeramachaneni --- querier_test.go | 173 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/querier_test.go b/querier_test.go index c10536afc7..71b741340c 100644 --- a/querier_test.go +++ b/querier_test.go @@ -232,6 +232,7 @@ func createIdxChkReaders(tc []struct { mi := newMockIndex() for i, s := range tc { + i = i + 1 // 0 is not a valid posting. metas := make([]*ChunkMeta, 0, len(s.chunks)) for _, chk := range s.chunks { // Collisions can be there, but for tests, its fine. @@ -411,6 +412,178 @@ Outer: return } +func TestBlockQuerierDelete(t *testing.T) { + newSeries := func(l map[string]string, s []sample) Series { + return &mockSeries{ + labels: func() labels.Labels { return labels.FromMap(l) }, + iterator: func() SeriesIterator { return newListSeriesIterator(s) }, + } + } + + type query struct { + mint, maxt int64 + ms []labels.Matcher + exp SeriesSet + } + + cases := struct { + data []struct { + lset map[string]string + chunks [][]sample + } + + tombstones *mapTombstoneReader + queries []query + }{ + data: []struct { + lset map[string]string + chunks [][]sample + }{ + { + lset: map[string]string{ + "a": "a", + }, + chunks: [][]sample{ + { + {1, 2}, {2, 3}, {3, 4}, + }, + { + {5, 2}, {6, 3}, {7, 4}, + }, + }, + }, + { + lset: map[string]string{ + "a": "a", + "b": "b", + }, + chunks: [][]sample{ + { + {1, 1}, {2, 2}, {3, 3}, + }, + { + {4, 15}, {5, 3}, {6, 6}, + }, + }, + }, + { + lset: map[string]string{ + "b": "b", + }, + chunks: [][]sample{ + { + {1, 3}, {2, 2}, {3, 6}, + }, + { + {5, 1}, {6, 7}, {7, 2}, + }, + }, + }, + }, + tombstones: newMapTombstoneReader( + map[uint32][]trange{ + 1: []trange{{1, 3}}, + 2: []trange{{1, 3}, {6, 10}}, + 3: []trange{{6, 10}}, + }, + ), + + queries: []query{ + { + mint: 2, + maxt: 7, + ms: []labels.Matcher{labels.NewEqualMatcher("a", "a")}, + exp: newListSeriesSet([]Series{ + newSeries(map[string]string{ + "a": "a", + }, + []sample{{5, 2}, {6, 3}, {7, 4}}, + ), + newSeries(map[string]string{ + "a": "a", + "b": "b", + }, + []sample{{4, 15}, {5, 3}}, + ), + }), + }, + { + mint: 2, + maxt: 7, + ms: []labels.Matcher{labels.NewEqualMatcher("b", "b")}, + exp: newListSeriesSet([]Series{ + newSeries(map[string]string{ + "a": "a", + "b": "b", + }, + []sample{{4, 15}, {5, 3}}, + ), + newSeries(map[string]string{ + "b": "b", + }, + []sample{{2, 2}, {3, 6}, {5, 1}}, + ), + }), + }, + { + mint: 1, + maxt: 4, + ms: []labels.Matcher{labels.NewEqualMatcher("a", "a")}, + exp: newListSeriesSet([]Series{ + newSeries(map[string]string{ + "a": "a", + "b": "b", + }, + []sample{{4, 15}}, + ), + }), + }, + { + mint: 1, + maxt: 3, + ms: []labels.Matcher{labels.NewEqualMatcher("a", "a")}, + exp: newListSeriesSet([]Series{}), + }, + }, + } + +Outer: + for _, c := range cases.queries { + ir, cr := createIdxChkReaders(cases.data) + querier := &blockQuerier{ + index: ir, + chunks: cr, + tombstones: cases.tombstones.Copy(), + + mint: c.mint, + maxt: c.maxt, + } + + res := querier.Select(c.ms...) + + for { + eok, rok := c.exp.Next(), res.Next() + require.Equal(t, eok, rok, "next") + + if !eok { + continue Outer + } + sexp := c.exp.At() + sres := res.At() + + require.Equal(t, sexp.Labels(), sres.Labels(), "labels") + + smplExp, errExp := expandSeriesIterator(sexp.Iterator()) + smplRes, errRes := expandSeriesIterator(sres.Iterator()) + + require.Equal(t, errExp, errRes, "samples error") + require.Equal(t, smplExp, smplRes, "samples") + } + } + + return +} + func TestBaseChunkSeries(t *testing.T) { type refdSeries struct { lset labels.Labels From 244b73fce16831ab094144ae730ce229aec4f24d Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Mon, 22 May 2017 16:42:36 +0530 Subject: [PATCH 15/25] Rename for clarity and consistency. Misc. changes for code cleanliness. Signed-off-by: Goutham Veeramachaneni --- block.go | 8 +- chunks.go | 6 +- chunks_test.go | 24 ++--- compact.go | 50 ++++----- head.go | 4 +- head_test.go | 36 +++---- querier.go | 60 +++++------ querier_test.go | 16 +-- tombstones.go | 82 +++++++-------- tombstones_test.go | 246 ++++++++++++++++++++++++--------------------- 10 files changed, 275 insertions(+), 257 deletions(-) diff --git a/block.go b/block.go index dcb4c7b4fb..a134acf435 100644 --- a/block.go +++ b/block.go @@ -182,10 +182,10 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { return nil, err } - ts := make(map[uint32][]trange) + ts := make(map[uint32]intervals) for tr.Next() { s := tr.At() - ts[s.ref] = s.ranges + ts[s.ref] = s.intervals } pb := &persistedBlock{ @@ -238,7 +238,7 @@ func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { ir := pb.indexr // Choose only valid postings which have chunks in the time-range. - delStones := map[uint32][]trange{} + delStones := map[uint32]intervals{} Outer: for p.Next() { @@ -260,7 +260,7 @@ Outer: if maxtime > maxt { maxtime = maxt } - delStones[p.At()] = []trange{{mint, maxtime}} + delStones[p.At()] = intervals{{mint, maxtime}} continue Outer } } diff --git a/chunks.go b/chunks.go index 0cc64cb469..075384cd59 100644 --- a/chunks.go +++ b/chunks.go @@ -59,7 +59,7 @@ func (cm *ChunkMeta) writeHash(h hash.Hash) error { type deletedIterator struct { it chunks.Iterator - dranges []trange + intervals intervals } func (it *deletedIterator) At() (int64, float64) { @@ -71,13 +71,13 @@ Outer: for it.it.Next() { ts, _ := it.it.At() - for _, tr := range it.dranges { + for _, tr := range it.intervals { if tr.inBounds(ts) { continue Outer } if ts > tr.maxt { - it.dranges = it.dranges[1:] + it.intervals = it.intervals[1:] continue } diff --git a/chunks_test.go b/chunks_test.go index 4633dc7f06..2a722d8fd1 100644 --- a/chunks_test.go +++ b/chunks_test.go @@ -50,23 +50,23 @@ func TestDeletedIterator(t *testing.T) { } cases := []struct { - r []trange + r intervals }{ - {r: []trange{{1, 20}}}, - {r: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, - {r: []trange{{1, 10}, {12, 20}, {20, 30}}}, - {r: []trange{{1, 10}, {12, 23}, {25, 30}}}, - {r: []trange{{1, 23}, {12, 20}, {25, 30}}}, - {r: []trange{{1, 23}, {12, 20}, {25, 3000}}}, - {r: []trange{{0, 2000}}}, - {r: []trange{{500, 2000}}}, - {r: []trange{{0, 200}}}, - {r: []trange{{1000, 20000}}}, + {r: intervals{{1, 20}}}, + {r: intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, + {r: intervals{{1, 10}, {12, 20}, {20, 30}}}, + {r: intervals{{1, 10}, {12, 23}, {25, 30}}}, + {r: intervals{{1, 23}, {12, 20}, {25, 30}}}, + {r: intervals{{1, 23}, {12, 20}, {25, 3000}}}, + {r: intervals{{0, 2000}}}, + {r: intervals{{500, 2000}}}, + {r: intervals{{0, 200}}}, + {r: intervals{{1000, 20000}}}, } for _, c := range cases { i := int64(-1) - it := &deletedIterator{it: chk.Iterator(), dranges: c.r[:]} + it := &deletedIterator{it: chk.Iterator(), intervals: c.r[:]} ranges := c.r[:] for it.Next() { i++ diff --git a/compact.go b/compact.go index 4cc716605c..948e2f8245 100644 --- a/compact.go +++ b/compact.go @@ -323,19 +323,19 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri ) for set.Next() { - lset, chks, ranges := set.At() // The chunks here are not fully deleted. + lset, chks, dranges := set.At() // The chunks here are not fully deleted. - if len(ranges) > 0 { + if len(dranges) > 0 { // Re-encode the chunk to not have deleted values. for _, chk := range chks { - if intervalOverlap(ranges[0].mint, ranges[len(ranges)-1].maxt, chk.MinTime, chk.MaxTime) { + if intervalOverlap(dranges[0].mint, dranges[len(dranges)-1].maxt, chk.MinTime, chk.MaxTime) { newChunk := chunks.NewXORChunk() app, err := newChunk.Appender() if err != nil { return nil, err } - it := &deletedIterator{it: chk.Chunk.Iterator(), dranges: ranges} + it := &deletedIterator{it: chk.Chunk.Iterator(), intervals: dranges} for it.Next() { ts, v := it.At() app.Append(ts, v) @@ -402,7 +402,7 @@ func (c *compactor) populate(blocks []Block, indexw IndexWriter, chunkw ChunkWri type compactionSet interface { Next() bool - At() (labels.Labels, []*ChunkMeta, []trange) + At() (labels.Labels, []*ChunkMeta, intervals) Err() error } @@ -412,10 +412,10 @@ type compactionSeriesSet struct { chunks ChunkReader tombstones TombstoneReader - l labels.Labels - c []*ChunkMeta - dranges []trange - err error + l labels.Labels + c []*ChunkMeta + intervals intervals + err error } func newCompactionSeriesSet(i IndexReader, c ChunkReader, t TombstoneReader, p Postings) *compactionSeriesSet { @@ -435,9 +435,9 @@ func (c *compactionSeriesSet) Next() bool { if c.tombstones.Seek(c.p.At()) { s := c.tombstones.At() if c.p.At() == s.ref { - c.dranges = s.ranges + c.intervals = s.intervals } else { - c.dranges = nil + c.intervals = nil } } c.l, c.c, c.err = c.index.Series(c.p.At()) @@ -446,10 +446,10 @@ func (c *compactionSeriesSet) Next() bool { } // Remove completely deleted chunks. - if len(c.dranges) > 0 { + if len(c.intervals) > 0 { chks := make([]*ChunkMeta, 0, len(c.c)) for _, chk := range c.c { - if !(trange{chk.MinTime, chk.MaxTime}.isSubrange(c.dranges)) { + if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) { chk.Chunk, c.err = c.chunks.Chunk(chk.Ref) if c.err != nil { return false @@ -472,17 +472,17 @@ func (c *compactionSeriesSet) Err() error { return c.p.Err() } -func (c *compactionSeriesSet) At() (labels.Labels, []*ChunkMeta, []trange) { - return c.l, c.c, c.dranges +func (c *compactionSeriesSet) At() (labels.Labels, []*ChunkMeta, intervals) { + return c.l, c.c, c.intervals } type compactionMerger struct { a, b compactionSet - aok, bok bool - l labels.Labels - c []*ChunkMeta - dranges []trange + aok, bok bool + l labels.Labels + c []*ChunkMeta + intervals intervals } type compactionSeries struct { @@ -523,21 +523,21 @@ func (c *compactionMerger) Next() bool { d := c.compare() // Both sets contain the current series. Chain them into a single one. if d > 0 { - c.l, c.c, c.dranges = c.b.At() + c.l, c.c, c.intervals = c.b.At() c.bok = c.b.Next() } else if d < 0 { - c.l, c.c, c.dranges = c.a.At() + c.l, c.c, c.intervals = c.a.At() c.aok = c.a.Next() } else { l, ca, ra := c.a.At() _, cb, rb := c.b.At() for _, r := range rb { - ra = addNewInterval(ra, r) + ra = ra.add(r) } c.l = l c.c = append(ca, cb...) - c.dranges = ra + c.intervals = ra c.aok = c.a.Next() c.bok = c.b.Next() @@ -552,8 +552,8 @@ func (c *compactionMerger) Err() error { return c.b.Err() } -func (c *compactionMerger) At() (labels.Labels, []*ChunkMeta, []trange) { - return c.l, c.c, c.dranges +func (c *compactionMerger) At() (labels.Labels, []*ChunkMeta, intervals) { + return c.l, c.c, c.intervals } func renameFile(from, to string) error { diff --git a/head.go b/head.go index 179c700f4d..db7becfb8a 100644 --- a/head.go +++ b/head.go @@ -162,7 +162,7 @@ func (h *HeadBlock) init() error { for tr.Next() { s := tr.At() h.tombstones.refs = append(h.tombstones.refs, s.ref) - h.tombstones.stones[s.ref] = s.ranges + h.tombstones.stones[s.ref] = s.intervals } return errors.Wrap(err, "tombstones reader iteration") } @@ -245,7 +245,7 @@ Outer: if maxtime > maxt { maxtime = maxt } - h.tombstones.stones[ref] = addNewInterval(h.tombstones.stones[ref], trange{mint, maxtime}) + h.tombstones.stones[ref] = h.tombstones.stones[ref].add(interval{mint, maxtime}) } if p.Err() != nil { diff --git a/head_test.go b/head_test.go index 8036c13533..07fbe377e8 100644 --- a/head_test.go +++ b/head_test.go @@ -399,28 +399,28 @@ func TestDeleteSimple(t *testing.T) { require.NoError(t, app.Commit()) cases := []struct { - dranges []trange - remaint []int64 + intervals intervals + remaint []int64 }{ { - dranges: []trange{{0, 3}}, - remaint: []int64{4, 5, 6, 7, 8, 9}, + intervals: intervals{{0, 3}}, + remaint: []int64{4, 5, 6, 7, 8, 9}, }, { - dranges: []trange{{1, 3}}, - remaint: []int64{0, 4, 5, 6, 7, 8, 9}, + intervals: intervals{{1, 3}}, + remaint: []int64{0, 4, 5, 6, 7, 8, 9}, }, { - dranges: []trange{{1, 3}, {4, 7}}, - remaint: []int64{0, 8, 9}, + intervals: intervals{{1, 3}, {4, 7}}, + remaint: []int64{0, 8, 9}, }, { - dranges: []trange{{1, 3}, {4, 700}}, - remaint: []int64{0}, + intervals: intervals{{1, 3}, {4, 700}}, + remaint: []int64{0}, }, { - dranges: []trange{{0, 9}}, - remaint: []int64{}, + intervals: intervals{{0, 9}}, + remaint: []int64{}, }, } @@ -431,7 +431,7 @@ Outer: hb.tombstones = newEmptyTombstoneReader() // Delete the ranges. - for _, r := range c.dranges { + for _, r := range c.intervals { require.NoError(t, hb.Delete(r.mint, r.maxt, labels.NewEqualMatcher("a", "b"))) } @@ -597,18 +597,18 @@ func TestDelete_e2e(t *testing.T) { // Delete a time-range from each-selector. dels := []struct { ms []labels.Matcher - drange []trange + drange intervals }{ { ms: []labels.Matcher{labels.NewEqualMatcher("a", "b")}, - drange: []trange{{300, 500}, {600, 670}}, + drange: intervals{{300, 500}, {600, 670}}, }, { ms: []labels.Matcher{ labels.NewEqualMatcher("a", "b"), labels.NewEqualMatcher("job", "prom-k8s"), }, - drange: []trange{{300, 500}, {100, 670}}, + drange: intervals{{300, 500}, {100, 670}}, }, { ms: []labels.Matcher{ @@ -616,7 +616,7 @@ func TestDelete_e2e(t *testing.T) { labels.NewEqualMatcher("instance", "localhost:9090"), labels.NewEqualMatcher("job", "prometheus"), }, - drange: []trange{{300, 400}, {100, 6700}}, + drange: intervals{{300, 400}, {100, 6700}}, }, // TODO: Add Regexp Matchers. } @@ -717,7 +717,7 @@ func boundedSamples(full []sample, mint, maxt int64) []sample { return full } -func deletedSamples(full []sample, dranges []trange) []sample { +func deletedSamples(full []sample, dranges intervals) []sample { ds := make([]sample, 0, len(full)) Outer: for _, s := range full { diff --git a/querier.go b/querier.go index c67091cc64..28bf3d21a0 100644 --- a/querier.go +++ b/querier.go @@ -369,7 +369,7 @@ func (s *mergedSeriesSet) Next() bool { type chunkSeriesSet interface { Next() bool - At() (labels.Labels, []*ChunkMeta, stone) + At() (labels.Labels, []*ChunkMeta, intervals) Err() error } @@ -381,14 +381,14 @@ type baseChunkSeries struct { tombstones TombstoneReader absent []string // labels that must be unset in results. - lset labels.Labels - chks []*ChunkMeta - stone stone - err error + lset labels.Labels + chks []*ChunkMeta + intervals intervals + err error } -func (s *baseChunkSeries) At() (labels.Labels, []*ChunkMeta, stone) { - return s.lset, s.chks, s.stone +func (s *baseChunkSeries) At() (labels.Labels, []*ChunkMeta, intervals) { + return s.lset, s.chks, s.intervals } func (s *baseChunkSeries) Err() error { return s.err } @@ -413,12 +413,12 @@ Outer: s.lset = lset s.chks = chunks if s.tombstones.Seek(ref) && s.tombstones.At().ref == ref { - s.stone = s.tombstones.At() + s.intervals = s.tombstones.At().intervals // Only those chunks that are not entirely deleted. chks := make([]*ChunkMeta, 0, len(s.chks)) for _, chk := range s.chks { - if !(trange{chk.MinTime, chk.MaxTime}.isSubrange(s.stone.ranges)) { + if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(s.intervals)) { chks = append(chks, chk) } } @@ -442,20 +442,20 @@ type populatedChunkSeries struct { chunks ChunkReader mint, maxt int64 - err error - chks []*ChunkMeta - lset labels.Labels - stone stone + err error + chks []*ChunkMeta + lset labels.Labels + intervals intervals } -func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta, stone) { - return s.lset, s.chks, s.stone +func (s *populatedChunkSeries) At() (labels.Labels, []*ChunkMeta, intervals) { + return s.lset, s.chks, s.intervals } func (s *populatedChunkSeries) Err() error { return s.err } func (s *populatedChunkSeries) Next() bool { for s.set.Next() { - lset, chks, stn := s.set.At() + lset, chks, dranges := s.set.At() for len(chks) > 0 { if chks[0].MaxTime >= s.mint { @@ -482,7 +482,7 @@ func (s *populatedChunkSeries) Next() bool { s.lset = lset s.chks = chks - s.stone = stn + s.intervals = dranges return true } @@ -503,14 +503,14 @@ type blockSeriesSet struct { func (s *blockSeriesSet) Next() bool { for s.set.Next() { - lset, chunks, stn := s.set.At() + lset, chunks, dranges := s.set.At() s.cur = &chunkSeries{ labels: lset, chunks: chunks, mint: s.mint, maxt: s.maxt, - stone: stn, + intervals: dranges, } return true } @@ -531,7 +531,7 @@ type chunkSeries struct { mint, maxt int64 - stone stone + intervals intervals } func (s *chunkSeries) Labels() labels.Labels { @@ -539,7 +539,7 @@ func (s *chunkSeries) Labels() labels.Labels { } func (s *chunkSeries) Iterator() SeriesIterator { - return newChunkSeriesIterator(s.chunks, s.stone, s.mint, s.maxt) + return newChunkSeriesIterator(s.chunks, s.intervals, s.mint, s.maxt) } // SeriesIterator iterates over the data of a time series. @@ -637,13 +637,13 @@ type chunkSeriesIterator struct { maxt, mint int64 - stone stone + intervals intervals } -func newChunkSeriesIterator(cs []*ChunkMeta, s stone, mint, maxt int64) *chunkSeriesIterator { +func newChunkSeriesIterator(cs []*ChunkMeta, dranges intervals, mint, maxt int64) *chunkSeriesIterator { it := cs[0].Chunk.Iterator() - if len(s.ranges) > 0 { - it = &deletedIterator{it: it, dranges: s.ranges} + if len(dranges) > 0 { + it = &deletedIterator{it: it, intervals: dranges} } return &chunkSeriesIterator{ chunks: cs, @@ -653,7 +653,7 @@ func newChunkSeriesIterator(cs []*ChunkMeta, s stone, mint, maxt int64) *chunkSe mint: mint, maxt: maxt, - stone: s, + intervals: dranges, } } @@ -688,8 +688,8 @@ func (it *chunkSeriesIterator) Seek(t int64) (ok bool) { it.i = x it.cur = it.chunks[x].Chunk.Iterator() - if len(it.stone.ranges) > 0 { - it.cur = &deletedIterator{it: it.cur, dranges: it.stone.ranges} + if len(it.intervals) > 0 { + it.cur = &deletedIterator{it: it.cur, intervals: it.intervals} } for it.cur.Next() { @@ -722,8 +722,8 @@ func (it *chunkSeriesIterator) Next() bool { it.i++ it.cur = it.chunks[it.i].Chunk.Iterator() - if len(it.stone.ranges) > 0 { - it.cur = &deletedIterator{it: it.cur, dranges: it.stone.ranges} + if len(it.intervals) > 0 { + it.cur = &deletedIterator{it: it.cur, intervals: it.intervals} } return it.Next() diff --git a/querier_test.go b/querier_test.go index 71b741340c..d63c36d930 100644 --- a/querier_test.go +++ b/querier_test.go @@ -481,10 +481,10 @@ func TestBlockQuerierDelete(t *testing.T) { }, }, tombstones: newMapTombstoneReader( - map[uint32][]trange{ - 1: []trange{{1, 3}}, - 2: []trange{{1, 3}, {6, 10}}, - 3: []trange{{6, 10}}, + map[uint32]intervals{ + 1: intervals{{1, 3}}, + 2: intervals{{1, 3}, {6, 10}}, + 3: intervals{{6, 10}}, }, ), @@ -876,7 +876,7 @@ func TestSeriesIterator(t *testing.T) { chunkFromSamples(tc.b), chunkFromSamples(tc.c), } - res := newChunkSeriesIterator(chkMetas, stone{}, tc.mint, tc.maxt) + res := newChunkSeriesIterator(chkMetas, nil, tc.mint, tc.maxt) smplValid := make([]sample, 0) for _, s := range tc.exp { @@ -947,7 +947,7 @@ func TestSeriesIterator(t *testing.T) { chunkFromSamples(tc.b), chunkFromSamples(tc.c), } - res := newChunkSeriesIterator(chkMetas, stone{}, tc.mint, tc.maxt) + res := newChunkSeriesIterator(chkMetas, nil, tc.mint, tc.maxt) smplValid := make([]sample, 0) for _, s := range tc.exp { @@ -1094,8 +1094,8 @@ func (m *mockChunkSeriesSet) Next() bool { return m.i < len(m.l) } -func (m *mockChunkSeriesSet) At() (labels.Labels, []*ChunkMeta, stone) { - return m.l[m.i], m.cm[m.i], stone{} +func (m *mockChunkSeriesSet) At() (labels.Labels, []*ChunkMeta, intervals) { + return m.l[m.i], m.cm[m.i], nil } func (m *mockChunkSeriesSet) Err() error { diff --git a/tombstones.go b/tombstones.go index 76ef2daccd..96916d23ab 100644 --- a/tombstones.go +++ b/tombstones.go @@ -36,14 +36,14 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { // Write the ranges. buf.reset() - buf.putVarint64(int64(len(s.ranges))) + buf.putVarint64(int64(len(s.intervals))) n, err := f.Write(buf.get()) if err != nil { return err } pos += int64(n) - for _, r := range s.ranges { + for _, r := range s.intervals { buf.reset() buf.putVarint64(r.mint) buf.putVarint64(r.maxt) @@ -93,8 +93,8 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { // stone holds the information on the posting and time-range // that is deleted. type stone struct { - ref uint32 - ranges []trange + ref uint32 + intervals intervals } // TombstoneReader is the iterator over tombstones. @@ -164,7 +164,7 @@ func (t *tombstoneReader) Next() bool { return false } - dranges := make([]trange, 0, numRanges) + dranges := make(intervals, 0, numRanges) for i := 0; i < int(numRanges); i++ { mint := d.varint64() maxt := d.varint64() @@ -173,11 +173,11 @@ func (t *tombstoneReader) Next() bool { return false } - dranges = append(dranges, trange{mint, maxt}) + dranges = append(dranges, interval{mint, maxt}) } t.stones = t.stones[12:] - t.cur = stone{ref: ref, ranges: dranges} + t.cur = stone{ref: ref, intervals: dranges} return true } @@ -217,10 +217,10 @@ type mapTombstoneReader struct { refs []uint32 cur uint32 - stones map[uint32][]trange + stones map[uint32]intervals } -func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { +func newMapTombstoneReader(ts map[uint32]intervals) *mapTombstoneReader { refs := make([]uint32, 0, len(ts)) for k := range ts { refs = append(refs, k) @@ -231,7 +231,7 @@ func newMapTombstoneReader(ts map[uint32][]trange) *mapTombstoneReader { } func newEmptyTombstoneReader() *mapTombstoneReader { - return &mapTombstoneReader{stones: make(map[uint32][]trange)} + return &mapTombstoneReader{stones: make(map[uint32]intervals)} } func (t *mapTombstoneReader) Next() bool { @@ -265,7 +265,7 @@ func (t *mapTombstoneReader) Seek(ref uint32) bool { } func (t *mapTombstoneReader) At() stone { - return stone{ref: t.cur, ranges: t.stones[t.cur]} + return stone{ref: t.cur, intervals: t.stones[t.cur]} } func (t *mapTombstoneReader) Copy() TombstoneReader { @@ -285,11 +285,11 @@ type simpleTombstoneReader struct { refs []uint32 cur uint32 - ranges []trange + intervals intervals } -func newSimpleTombstoneReader(refs []uint32, drange []trange) *simpleTombstoneReader { - return &simpleTombstoneReader{refs: refs, ranges: drange} +func newSimpleTombstoneReader(refs []uint32, dranges intervals) *simpleTombstoneReader { + return &simpleTombstoneReader{refs: refs, intervals: dranges} } func (t *simpleTombstoneReader) Next() bool { @@ -323,11 +323,11 @@ func (t *simpleTombstoneReader) Seek(ref uint32) bool { } func (t *simpleTombstoneReader) At() stone { - return stone{ref: t.cur, ranges: t.ranges} + return stone{ref: t.cur, intervals: t.intervals} } func (t *simpleTombstoneReader) Copy() TombstoneReader { - return &simpleTombstoneReader{refs: t.refs[:], cur: t.cur, ranges: t.ranges} + return &simpleTombstoneReader{refs: t.refs[:], cur: t.cur, intervals: t.intervals} } func (t *simpleTombstoneReader) Err() error { @@ -378,8 +378,8 @@ func (t *mergedTombstoneReader) Next() bool { t.bok = t.b.Next() } else { // Merge time ranges. - for _, r := range bcur.ranges { - acur.ranges = addNewInterval(acur.ranges, r) + for _, r := range bcur.intervals { + acur.intervals = acur.intervals.add(r) } t.cur = acur @@ -424,16 +424,16 @@ func (t *mergedTombstoneReader) Err() error { return t.b.Err() } -type trange struct { +type interval struct { mint, maxt int64 } -func (tr trange) inBounds(t int64) bool { +func (tr interval) inBounds(t int64) bool { return t >= tr.mint && t <= tr.maxt } -func (tr trange) isSubrange(ranges []trange) bool { - for _, r := range ranges { +func (tr interval) isSubrange(dranges intervals) bool { + for _, r := range dranges { if r.inBounds(tr.mint) && r.inBounds(tr.maxt) { return true } @@ -442,49 +442,51 @@ func (tr trange) isSubrange(ranges []trange) bool { return false } +type intervals []interval + // This adds the new time-range to the existing ones. // The existing ones must be sorted. -// TODO(gouthamve): {1, 2}, {3, 4} can be merged into {1, 4}. -func addNewInterval(existing []trange, n trange) []trange { - for i, r := range existing { +func (itvs intervals) add(n interval) intervals { + for i, r := range itvs { // TODO(gouthamve): Make this codepath easier to digest. - if r.inBounds(n.mint) { + if r.inBounds(n.mint-1) || r.inBounds(n.mint) { if n.maxt > r.maxt { - existing[i].maxt = n.maxt + itvs[i].maxt = n.maxt } j := 0 - for _, r2 := range existing[i+1:] { + for _, r2 := range itvs[i+1:] { if n.maxt < r2.mint { break } j++ } if j != 0 { - if existing[i+j].maxt > n.maxt { - existing[i].maxt = existing[i+j].maxt + if itvs[i+j].maxt > n.maxt { + itvs[i].maxt = itvs[i+j].maxt } - existing = append(existing[:i+1], existing[i+j+1:]...) + itvs = append(itvs[:i+1], itvs[i+j+1:]...) } - return existing + return itvs } - if r.inBounds(n.maxt) { + if r.inBounds(n.maxt+1) || r.inBounds(n.maxt) { if n.mint < r.maxt { - existing[i].mint = n.mint + itvs[i].mint = n.mint } - return existing + return itvs } + if n.mint < r.mint { - newRange := make([]trange, i, len(existing[:i])+1) - copy(newRange, existing[:i]) + newRange := make(intervals, i, len(itvs[:i])+1) + copy(newRange, itvs[:i]) newRange = append(newRange, n) - newRange = append(newRange, existing[i:]...) + newRange = append(newRange, itvs[i:]...) return newRange } } - existing = append(existing, n) - return existing + itvs = append(itvs, n) + return itvs } diff --git a/tombstones_test.go b/tombstones_test.go index 0793a1cb32..1807651c09 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -17,15 +17,15 @@ func TestWriteAndReadbackTombStones(t *testing.T) { ref := uint32(0) - stones := make(map[uint32][]trange) + stones := make(map[uint32]intervals) // Generate the tombstones. for i := 0; i < 100; i++ { ref += uint32(rand.Int31n(10)) + 1 numRanges := rand.Intn(5) - dranges := make([]trange, numRanges) + dranges := make(intervals, numRanges) mint := rand.Int63n(time.Now().UnixNano()) for j := 0; j < numRanges; j++ { - dranges[j] = trange{mint, mint + rand.Int63n(1000)} + dranges[j] = interval{mint, mint + rand.Int63n(1000)} mint += rand.Int63n(1000) + 1 } stones[ref] = dranges @@ -49,54 +49,70 @@ func TestWriteAndReadbackTombStones(t *testing.T) { func TestAddingNewIntervals(t *testing.T) { cases := []struct { - exist []trange - new trange + exist intervals + new interval - exp []trange + exp intervals }{ { - new: trange{1, 2}, - exp: []trange{{1, 2}}, + new: interval{1, 2}, + exp: intervals{{1, 2}}, }, { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{21, 23}, - exp: []trange{{1, 10}, {12, 20}, {21, 23}, {25, 30}}, + exist: intervals{{1, 2}}, + new: interval{1, 2}, + exp: intervals{{1, 2}}, }, { - exist: []trange{{1, 2}, {3, 5}, {6, 7}}, - new: trange{6, 7}, - exp: []trange{{1, 2}, {3, 5}, {6, 7}}, + exist: intervals{{1, 4}, {6, 6}}, + new: interval{5, 6}, + exp: intervals{{1, 6}}, }, { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{21, 25}, - exp: []trange{{1, 10}, {12, 20}, {21, 30}}, + exist: intervals{{1, 10}, {12, 20}, {25, 30}}, + new: interval{21, 23}, + exp: intervals{{1, 10}, {12, 23}, {25, 30}}, }, { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{18, 23}, - exp: []trange{{1, 10}, {12, 23}, {25, 30}}, + exist: intervals{{1, 2}, {3, 5}, {7, 7}}, + new: interval{6, 7}, + exp: intervals{{1, 2}, {3, 7}}, }, { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{9, 23}, - exp: []trange{{1, 23}, {25, 30}}, + exist: intervals{{1, 10}, {12, 20}, {25, 30}}, + new: interval{21, 25}, + exp: intervals{{1, 10}, {12, 30}}, }, { - exist: []trange{{1, 10}, {12, 20}, {25, 30}}, - new: trange{9, 230}, - exp: []trange{{1, 230}}, + exist: intervals{{1, 10}, {12, 20}, {25, 30}}, + new: interval{18, 23}, + exp: intervals{{1, 10}, {12, 23}, {25, 30}}, }, { - exist: []trange{{5, 10}, {12, 20}, {25, 30}}, - new: trange{1, 4}, - exp: []trange{{1, 4}, {5, 10}, {12, 20}, {25, 30}}, + exist: intervals{{1, 10}, {12, 20}, {25, 30}}, + new: interval{9, 23}, + exp: intervals{{1, 23}, {25, 30}}, + }, + { + exist: intervals{{1, 10}, {12, 20}, {25, 30}}, + new: interval{9, 230}, + exp: intervals{{1, 230}}, + }, + { + exist: intervals{{5, 10}, {12, 20}, {25, 30}}, + new: interval{1, 4}, + exp: intervals{{1, 10}, {12, 20}, {25, 30}}, + }, + { + exist: intervals{{5, 10}, {12, 20}, {25, 30}}, + new: interval{11, 14}, + exp: intervals{{5, 20}, {25, 30}}, }, } for _, c := range cases { - require.Equal(t, c.exp, addNewInterval(c.exist, c.new)) + + require.Equal(t, c.exp, c.exist.add(c.new)) } return } @@ -104,20 +120,20 @@ func TestAddingNewIntervals(t *testing.T) { func TestTombstoneReadersSeek(t *testing.T) { // This is assuming that the listPostings is perfect. table := struct { - m map[uint32][]trange + m map[uint32]intervals cases []uint32 }{ - m: map[uint32][]trange{ - 2: []trange{{1, 2}}, - 3: []trange{{1, 4}, {5, 6}}, - 4: []trange{{10, 15}, {16, 20}}, - 5: []trange{{1, 4}, {5, 6}}, - 50: []trange{{10, 20}, {35, 50}}, - 600: []trange{{100, 2000}}, - 1000: []trange{}, - 1500: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + m: map[uint32]intervals{ + 2: intervals{{1, 2}}, + 3: intervals{{1, 4}, {5, 6}}, + 4: intervals{{10, 15}, {16, 20}}, + 5: intervals{{1, 4}, {5, 6}}, + 50: intervals{{10, 20}, {35, 50}}, + 600: intervals{{100, 2000}}, + 1000: intervals{}, + 1500: intervals{{10000, 500000}}, + 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, }, cases: []uint32{1, 10, 20, 40, 30, 20, 50, 599, 601, 1000, 1600, 1601, 2000}, @@ -138,13 +154,13 @@ func TestTombstoneReadersSeek(t *testing.T) { require.Equal(t, pr.Seek(ref), trc.Seek(ref)) if pr.Seek(ref) { require.Equal(t, pr.At(), trc.At().ref) - require.Equal(t, table.m[pr.At()], trc.At().ranges) + require.Equal(t, table.m[pr.At()], trc.At().intervals) } for pr.Next() { require.True(t, trc.Next()) require.Equal(t, pr.At(), trc.At().ref) - require.Equal(t, table.m[pr.At()], trc.At().ranges) + require.Equal(t, table.m[pr.At()], trc.At().intervals) } require.False(t, trc.Next()) @@ -172,7 +188,7 @@ func TestTombstoneReadersSeek(t *testing.T) { return }) t.Run("simpleTombstoneReader", func(t *testing.T) { - ranges := []trange{{1, 2}, {3, 4}, {5, 6}} + dranges := intervals{{1, 2}, {3, 4}, {5, 6}} for _, ref := range table.cases { // Create the listPostings. @@ -182,19 +198,19 @@ func TestTombstoneReadersSeek(t *testing.T) { } sort.Sort(uint32slice(refs)) pr := newListPostings(refs[:]) - tr := newSimpleTombstoneReader(refs[:], ranges) + tr := newSimpleTombstoneReader(refs[:], dranges) // Compare both. trc := tr.Copy() require.Equal(t, pr.Seek(ref), trc.Seek(ref)) if pr.Seek(ref) { require.Equal(t, pr.At(), trc.At().ref) - require.Equal(t, ranges, tr.At().ranges) + require.Equal(t, dranges, tr.At().intervals) } for pr.Next() { require.True(t, trc.Next()) require.Equal(t, pr.At(), trc.At().ref, "refs") - require.Equal(t, ranges, trc.At().ranges) + require.Equal(t, dranges, trc.At().intervals) } require.False(t, trc.Next()) @@ -213,93 +229,93 @@ func TestMergedTombstoneReader(t *testing.T) { }{ { a: newMapTombstoneReader( - map[uint32][]trange{ - 2: []trange{{1, 2}}, - 3: []trange{{1, 4}, {5, 6}}, - 4: []trange{{10, 15}, {16, 20}}, - 5: []trange{{1, 4}, {5, 6}}, - 50: []trange{{10, 20}, {35, 50}}, - 600: []trange{{100, 2000}}, - 1000: []trange{}, - 1500: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + map[uint32]intervals{ + 2: intervals{{1, 2}}, + 3: intervals{{1, 4}, {6, 6}}, + 4: intervals{{10, 15}, {16, 20}}, + 5: intervals{{1, 4}, {5, 6}}, + 50: intervals{{10, 20}, {35, 50}}, + 600: intervals{{100, 2000}}, + 1000: intervals{}, + 1500: intervals{{10000, 500000}}, + 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, }, ), b: newMapTombstoneReader( - map[uint32][]trange{ - 2: []trange{{1, 2}}, - 3: []trange{{1, 4}, {5, 6}}, - 4: []trange{{10, 15}, {16, 20}}, - 5: []trange{{1, 4}, {5, 6}}, - 50: []trange{{10, 20}, {35, 50}}, - 600: []trange{{100, 2000}}, - 1000: []trange{}, - 1500: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + map[uint32]intervals{ + 2: intervals{{1, 2}}, + 3: intervals{{5, 6}}, + 4: intervals{{10, 15}, {16, 20}}, + 5: intervals{{1, 4}, {5, 6}}, + 50: intervals{{10, 20}, {35, 50}}, + 600: intervals{{100, 2000}}, + 1000: intervals{}, + 1500: intervals{{10000, 500000}}, + 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, }, ), exp: newMapTombstoneReader( - map[uint32][]trange{ - 2: []trange{{1, 2}}, - 3: []trange{{1, 4}, {5, 6}}, - 4: []trange{{10, 15}, {16, 20}}, - 5: []trange{{1, 4}, {5, 6}}, - 50: []trange{{10, 20}, {35, 50}}, - 600: []trange{{100, 2000}}, - 1000: []trange{}, - 1500: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 5}, {6, 7}}, + map[uint32]intervals{ + 2: intervals{{1, 2}}, + 3: intervals{{1, 6}}, + 4: intervals{{10, 20}}, + 5: intervals{{1, 6}}, + 50: intervals{{10, 20}, {35, 50}}, + 600: intervals{{100, 2000}}, + 1000: intervals{}, + 1500: intervals{{10000, 500000}}, + 1600: intervals{{1, 7}}, }, ), }, { a: newMapTombstoneReader( - map[uint32][]trange{ - 2: []trange{{1, 2}}, - 3: []trange{{1, 4}, {5, 6}}, - 4: []trange{{10, 15}, {16, 20}}, - 5: []trange{{1, 4}, {5, 6}}, - 50: []trange{{10, 20}, {35, 50}}, - 600: []trange{{100, 2000}}, - 1000: []trange{}, - 1500: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + map[uint32]intervals{ + 2: intervals{{1, 2}}, + 3: intervals{{1, 4}, {6, 6}}, + 4: intervals{{10, 15}, {17, 20}}, + 5: intervals{{1, 6}}, + 50: intervals{{10, 20}, {35, 50}}, + 600: intervals{{100, 2000}}, + 1000: intervals{}, + 1500: intervals{{10000, 500000}}, + 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, }, ), b: newMapTombstoneReader( - map[uint32][]trange{ - 20: []trange{{1, 2}}, - 30: []trange{{1, 4}, {5, 6}}, - 40: []trange{{10, 15}, {16, 20}}, - 60: []trange{{1, 4}, {5, 6}}, - 500: []trange{{10, 20}, {35, 50}}, - 6000: []trange{{100, 2000}}, - 10000: []trange{}, - 15000: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, + map[uint32]intervals{ + 20: intervals{{1, 2}}, + 30: intervals{{1, 4}, {5, 6}}, + 40: intervals{{10, 15}, {16, 20}}, + 60: intervals{{1, 4}, {5, 6}}, + 500: intervals{{10, 20}, {35, 50}}, + 6000: intervals{{100, 2000}}, + 10000: intervals{}, + 15000: intervals{{10000, 500000}}, + 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, }, ), exp: newMapTombstoneReader( - map[uint32][]trange{ - 2: []trange{{1, 2}}, - 3: []trange{{1, 4}, {5, 6}}, - 4: []trange{{10, 15}, {16, 20}}, - 5: []trange{{1, 4}, {5, 6}}, - 50: []trange{{10, 20}, {35, 50}}, - 600: []trange{{100, 2000}}, - 1000: []trange{}, - 1500: []trange{{10000, 500000}}, - 20: []trange{{1, 2}}, - 30: []trange{{1, 4}, {5, 6}}, - 40: []trange{{10, 15}, {16, 20}}, - 60: []trange{{1, 4}, {5, 6}}, - 500: []trange{{10, 20}, {35, 50}}, - 6000: []trange{{100, 2000}}, - 10000: []trange{}, - 15000: []trange{{10000, 500000}}, - 1600: []trange{{1, 2}, {3, 5}, {6, 7}}, + map[uint32]intervals{ + 2: intervals{{1, 2}}, + 3: intervals{{1, 4}, {6, 6}}, + 4: intervals{{10, 15}, {17, 20}}, + 5: intervals{{1, 6}}, + 50: intervals{{10, 20}, {35, 50}}, + 600: intervals{{100, 2000}}, + 1000: intervals{}, + 1500: intervals{{10000, 500000}}, + 20: intervals{{1, 2}}, + 30: intervals{{1, 4}, {5, 6}}, + 40: intervals{{10, 15}, {16, 20}}, + 60: intervals{{1, 4}, {5, 6}}, + 500: intervals{{10, 20}, {35, 50}}, + 6000: intervals{{100, 2000}}, + 10000: intervals{}, + 15000: intervals{{10000, 500000}}, + 1600: intervals{{1, 7}}, }, ), }, From 3eb4119ab1e643b4f9277a4dd18a52acab906eac Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Tue, 23 May 2017 16:15:16 +0530 Subject: [PATCH 16/25] Make HeadBlock use WAL. Signed-off-by: Goutham Veeramachaneni --- block.go | 3 + head.go | 72 ++++++++++++++---------- wal.go | 154 ++++++++++++++++++++++++++++++++++++++++++---------- wal_test.go | 96 ++++++++++++++++++++++---------- 4 files changed, 238 insertions(+), 87 deletions(-) diff --git a/block.go b/block.go index a134acf435..41e5c8b39d 100644 --- a/block.go +++ b/block.go @@ -260,6 +260,9 @@ Outer: if maxtime > maxt { maxtime = maxt } + if mint < chunks[0].MinTime { + mint = chunks[0].MinTime + } delStones[p.At()] = intervals{{mint, maxtime}} continue Outer } diff --git a/head.go b/head.go index db7becfb8a..2eae0952ec 100644 --- a/head.go +++ b/head.go @@ -100,11 +100,6 @@ func TouchHeadBlock(dir string, mint, maxt int64) (string, error) { return "", err } - // Write an empty tombstones file. - if err := writeTombstoneFile(tmp, newEmptyTombstoneReader()); err != nil { - return "", err - } - return dir, renameFile(tmp, dir) } @@ -131,16 +126,19 @@ func OpenHeadBlock(dir string, l log.Logger, wal WAL) (*HeadBlock, error) { func (h *HeadBlock) init() error { r := h.wal.Reader() - for r.Next() { - series, samples := r.At() - + seriesFunc := func(series []labels.Labels) error { for _, lset := range series { h.create(lset.Hash(), lset) h.meta.Stats.NumSeries++ } + + return nil + } + samplesFunc := func(samples []RefSample) error { for _, s := range samples { if int(s.Ref) >= len(h.series) { - return errors.Errorf("unknown series reference %d (max %d); abort WAL restore", s.Ref, len(h.series)) + return errors.Errorf("unknown series reference %d (max %d); abort WAL restore", + s.Ref, len(h.series)) } h.series[s.Ref].append(s.T, s.V) @@ -149,22 +147,26 @@ func (h *HeadBlock) init() error { } h.meta.Stats.NumSamples++ } + + return nil } - if err := r.Err(); err != nil { + deletesFunc := func(stones []stone) error { + for _, s := range stones { + for _, itv := range s.intervals { + // TODO(gouthamve): Recheck. + h.tombstones.stones[s.ref].add(itv) + } + } + + return nil + } + + if err := r.Read(seriesFunc, samplesFunc, deletesFunc); err != nil { return errors.Wrap(err, "consume WAL") } + h.tombstones = newMapTombstoneReader(h.tombstones.stones) - tr, err := readTombstoneFile(h.dir) - if err != nil { - return errors.Wrap(err, "read tombstones file") - } - - for tr.Next() { - s := tr.At() - h.tombstones.refs = append(h.tombstones.refs, s.ref) - h.tombstones.stones[s.ref] = s.intervals - } - return errors.Wrap(err, "tombstones reader iteration") + return nil } // inBounds returns true if the given timestamp is within the valid @@ -230,6 +232,7 @@ func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error { pr := newPostingsReader(ir) p, absent := pr.Select(ms...) + newStones := make(map[uint32]intervals) Outer: for p.Next() { ref := p.At() @@ -245,15 +248,26 @@ Outer: if maxtime > maxt { maxtime = maxt } - h.tombstones.stones[ref] = h.tombstones.stones[ref].add(interval{mint, maxtime}) + if mint < h.series[ref].chunks[0].minTime { + mint = h.series[ref].chunks[0].minTime + } + + newStones[ref] = intervals{{mint, maxtime}} } if p.Err() != nil { return p.Err() } + if err := h.wal.LogDeletes(newMapTombstoneReader(newStones)); err != nil { + return err + } + for k, v := range newStones { + h.tombstones.stones[k] = h.tombstones.stones[k].add(v[0]) + } h.tombstones = newMapTombstoneReader(h.tombstones.stones) - return writeTombstoneFile(h.dir, h.tombstones.Copy()) + + return nil } // Dir returns the directory of the block. @@ -486,6 +500,7 @@ func (a *headAppender) createSeries() { func (a *headAppender) Commit() error { defer atomic.AddUint64(&a.activeWriters, ^uint64(0)) defer putHeadAppendBuffer(a.samples) + defer a.mtx.RUnlock() a.createSeries() @@ -497,11 +512,14 @@ func (a *headAppender) Commit() error { } } + var err MultiError + // Write all new series and samples to the WAL and add it to the // in-mem database on success. - if err := a.wal.Log(a.newLabels, a.samples); err != nil { - a.mtx.RUnlock() - return err + err.Add(a.wal.LogSeries(a.newLabels)) + err.Add(a.wal.LogSamples(a.samples)) + if err.Err() != nil { + return err.Err() } total := uint64(len(a.samples)) @@ -512,8 +530,6 @@ func (a *headAppender) Commit() error { } } - a.mtx.RUnlock() - atomic.AddUint64(&a.meta.Stats.NumSamples, total) atomic.AddUint64(&a.meta.Stats.NumSeries, uint64(len(a.newSeries))) diff --git a/wal.go b/wal.go index 63975de2a1..6de9f73adf 100644 --- a/wal.go +++ b/wal.go @@ -46,8 +46,18 @@ const ( WALEntrySymbols WALEntryType = 1 WALEntrySeries WALEntryType = 2 WALEntrySamples WALEntryType = 3 + WALEntryDeletes WALEntryType = 4 ) +// SamplesCB yolo. +type SamplesCB func([]RefSample) error + +// SeriesCB yolo. +type SeriesCB func([]labels.Labels) error + +// DeletesCB yolo. +type DeletesCB func([]stone) error + // SegmentWAL is a write ahead log for series data. type SegmentWAL struct { mtx sync.Mutex @@ -71,15 +81,15 @@ type SegmentWAL struct { // It must be completely read before new entries are logged. type WAL interface { Reader() WALReader - Log([]labels.Labels, []RefSample) error + LogSeries([]labels.Labels) error + LogSamples([]RefSample) error + LogDeletes(TombstoneReader) error Close() error } // WALReader reads entries from a WAL. type WALReader interface { - At() ([]labels.Labels, []RefSample) - Next() bool - Err() error + Read(SeriesCB, SamplesCB, DeletesCB) error } // RefSample is a timestamp/value pair associated with a reference to a series. @@ -141,13 +151,40 @@ func (w *SegmentWAL) Reader() WALReader { } // Log writes a batch of new series labels and samples to the log. -func (w *SegmentWAL) Log(series []labels.Labels, samples []RefSample) error { +//func (w *SegmentWAL) Log(series []labels.Labels, samples []RefSample) error { +//return nil +//} + +// LogSeries writes a batch of new series labels to the log. +func (w *SegmentWAL) LogSeries(series []labels.Labels) error { if err := w.encodeSeries(series); err != nil { return err } + + if w.flushInterval <= 0 { + return w.Sync() + } + return nil +} + +// LogSamples writes a batch of new samples to the log. +func (w *SegmentWAL) LogSamples(samples []RefSample) error { if err := w.encodeSamples(samples); err != nil { return err } + + if w.flushInterval <= 0 { + return w.Sync() + } + return nil +} + +// LogDeletes write a batch of new deletes to the log. +func (w *SegmentWAL) LogDeletes(tr TombstoneReader) error { + if err := w.encodeDeletes(tr); err != nil { + return err + } + if w.flushInterval <= 0 { return w.Sync() } @@ -369,6 +406,7 @@ func (w *SegmentWAL) entry(et WALEntryType, flag byte, buf []byte) error { const ( walSeriesSimple = 1 walSamplesSimple = 1 + walDeletesSimple = 1 ) var walBuffers = sync.Pool{} @@ -445,6 +483,27 @@ func (w *SegmentWAL) encodeSamples(samples []RefSample) error { return w.entry(WALEntrySamples, walSamplesSimple, buf) } +func (w *SegmentWAL) encodeDeletes(tr TombstoneReader) error { + b := make([]byte, 2*binary.MaxVarintLen64) + eb := &encbuf{b: b} + buf := getWALBuffer() + for tr.Next() { + eb.reset() + s := tr.At() + eb.putUvarint32(s.ref) + eb.putUvarint(len(s.intervals)) + buf = append(buf, eb.get()...) + for _, itv := range s.intervals { + eb.reset() + eb.putVarint64(itv.mint) + eb.putVarint64(itv.maxt) + buf = append(buf, eb.get()...) + } + } + + return w.entry(WALEntryDeletes, walDeletesSimple, buf) +} + // walReader decodes and emits write ahead log entries. type walReader struct { logger log.Logger @@ -454,9 +513,15 @@ type walReader struct { buf []byte crc32 hash.Hash32 - err error - labels []labels.Labels samples []RefSample + series []labels.Labels + stones []stone + + samplesFunc SamplesCB + seriesFunc SeriesCB + deletesFunc DeletesCB + + err error } func newWALReader(w *SegmentWAL, l log.Logger) *walReader { @@ -471,18 +536,22 @@ func newWALReader(w *SegmentWAL, l log.Logger) *walReader { } } -// At returns the last decoded entry of labels or samples. -// The returned slices are only valid until the next call to Next(). Their elements -// have to be copied to preserve them. -func (r *walReader) At() ([]labels.Labels, []RefSample) { - return r.labels, r.samples -} - // Err returns the last error the reader encountered. func (r *walReader) Err() error { return r.err } +func (r *walReader) Read(seriesf SeriesCB, samplesf SamplesCB, deletesf DeletesCB) error { + r.samplesFunc = samplesf + r.seriesFunc = seriesf + r.deletesFunc = deletesf + + for r.next() { + } + + return r.Err() +} + // nextEntry retrieves the next entry. It is also used as a testing hook. func (r *walReader) nextEntry() (WALEntryType, byte, []byte, error) { if r.cur >= len(r.wal.files) { @@ -505,11 +574,12 @@ func (r *walReader) nextEntry() (WALEntryType, byte, []byte, error) { return et, flag, b, err } -// Next returns decodes the next entry pair and returns true +// next returns decodes the next entry pair and returns true // if it was succesful. -func (r *walReader) Next() bool { - r.labels = r.labels[:0] +func (r *walReader) next() bool { + r.series = r.series[:0] r.samples = r.samples[:0] + r.stones = r.stones[:0] if r.cur >= len(r.wal.files) { return false @@ -537,7 +607,7 @@ func (r *walReader) Next() bool { return false } r.cur++ - return r.Next() + return r.next() } if err != nil { r.err = err @@ -550,16 +620,13 @@ func (r *walReader) Next() bool { // In decoding below we never return a walCorruptionErr for now. // Those should generally be catched by entry decoding before. - switch et { - case WALEntrySamples: - if err := r.decodeSamples(flag, b); err != nil { - r.err = err - } case WALEntrySeries: - if err := r.decodeSeries(flag, b); err != nil { - r.err = err - } + r.err = r.decodeSeries(flag, b) + case WALEntrySamples: + r.err = r.decodeSamples(flag, b) + case WALEntryDeletes: + r.err = r.decodeDeletes(flag, b) } return r.err == nil } @@ -617,7 +684,7 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) { if etype == 0 { return 0, 0, nil, io.EOF } - if etype != WALEntrySeries && etype != WALEntrySamples { + if etype != WALEntrySeries && etype != WALEntrySamples && etype != WALEntryDeletes { return 0, 0, nil, walCorruptionErrf("invalid entry type %d", etype) } @@ -669,12 +736,14 @@ func (r *walReader) decodeSeries(flag byte, b []byte) error { b = b[n+int(vl):] } - r.labels = append(r.labels, lset) + r.series = append(r.series, lset) } - return nil + return r.seriesFunc(r.series) } func (r *walReader) decodeSamples(flag byte, b []byte) error { + r.samples = r.samples[:] + if len(b) < 16 { return errors.Wrap(errInvalidSize, "header length") } @@ -710,5 +779,30 @@ func (r *walReader) decodeSamples(flag byte, b []byte) error { r.samples = append(r.samples, smpl) } - return nil + return r.samplesFunc(r.samples) +} + +func (r *walReader) decodeDeletes(flag byte, b []byte) error { + db := &decbuf{b: b} + r.samples = r.samples[:] + + for db.len() > 0 { + var s stone + s.ref = uint32(db.uvarint()) + l := db.uvarint() + if db.err() != nil { + return db.err() + } + + for i := 0; i < l; i++ { + s.intervals = append(s.intervals, interval{db.varint64(), db.varint64()}) + if db.err() != nil { + return db.err() + } + } + + r.stones = append(r.stones, s) + } + + return r.deletesFunc(r.stones) } diff --git a/wal_test.go b/wal_test.go index c2988e7e04..3f622df722 100644 --- a/wal_test.go +++ b/wal_test.go @@ -149,6 +149,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( recordedSeries [][]labels.Labels recordedSamples [][]RefSample + recordedDeletes [][]stone ) var totalSamples int @@ -166,32 +167,51 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( resultSeries [][]labels.Labels resultSamples [][]RefSample + resultDeletes [][]stone ) - for r.Next() { - lsets, smpls := r.At() - + serf := func(lsets []labels.Labels) error { if len(lsets) > 0 { clsets := make([]labels.Labels, len(lsets)) copy(clsets, lsets) resultSeries = append(resultSeries, clsets) } + + return nil + } + smplf := func(smpls []RefSample) error { if len(smpls) > 0 { csmpls := make([]RefSample, len(smpls)) copy(csmpls, smpls) resultSamples = append(resultSamples, csmpls) } + + return nil } - require.NoError(t, r.Err()) + + // TODO: Add this. + delf := func(stones []stone) error { + if len(stones) > 0 { + cstones := make([]stone, len(stones)) + copy(cstones, stones) + resultDeletes = append(resultDeletes, cstones) + } + + return nil + } + + require.NoError(t, r.Read(serf, smplf, delf)) require.Equal(t, recordedSamples, resultSamples) require.Equal(t, recordedSeries, resultSeries) + require.Equal(t, recordedDeletes, resultDeletes) series := series[k : k+(numMetrics/iterations)] // Insert in batches and generate different amounts of samples for each. for i := 0; i < len(series); i += stepSize { var samples []RefSample + stones := map[uint32]intervals{} for j := 0; j < i*10; j++ { samples = append(samples, RefSample{ @@ -201,9 +221,16 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { }) } + for j := 0; j < i*20; j++ { + ts := rand.Int63() + stones[rand.Uint32()] = intervals{{ts, ts + rand.Int63n(10000)}} + } + lbls := series[i : i+stepSize] - require.NoError(t, w.Log(lbls, samples)) + require.NoError(t, w.LogSeries(lbls)) + require.NoError(t, w.LogSamples(samples)) + require.NoError(t, w.LogDeletes(newMapTombstoneReader(stones))) if len(lbls) > 0 { recordedSeries = append(recordedSeries, lbls) @@ -212,6 +239,16 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { recordedSamples = append(recordedSamples, samples) totalSamples += len(samples) } + if len(stones) > 0 { + tr := newMapTombstoneReader(stones) + newdels := []stone{} + for tr.Next() { + newdels = append(newdels, tr.At()) + } + require.NoError(t, tr.Err()) + + recordedDeletes = append(recordedDeletes, newdels) + } } require.NoError(t, w.Close()) @@ -292,13 +329,13 @@ func TestWALRestoreCorrupted(t *testing.T) { w, err := OpenSegmentWAL(dir, nil, 0) require.NoError(t, err) - require.NoError(t, w.Log(nil, []RefSample{{T: 1, V: 2}})) - require.NoError(t, w.Log(nil, []RefSample{{T: 2, V: 3}})) + require.NoError(t, w.LogSamples([]RefSample{{T: 1, V: 2}})) + require.NoError(t, w.LogSamples([]RefSample{{T: 2, V: 3}})) require.NoError(t, w.cut()) - require.NoError(t, w.Log(nil, []RefSample{{T: 3, V: 4}})) - require.NoError(t, w.Log(nil, []RefSample{{T: 5, V: 6}})) + require.NoError(t, w.LogSamples([]RefSample{{T: 3, V: 4}})) + require.NoError(t, w.LogSamples([]RefSample{{T: 5, V: 6}})) require.NoError(t, w.Close()) @@ -314,17 +351,28 @@ func TestWALRestoreCorrupted(t *testing.T) { require.NoError(t, err) r := w2.Reader() + serf := func(l []labels.Labels) error { + require.Equal(t, 0, len(l)) + return nil + } + delf := func([]stone) error { return nil } - require.True(t, r.Next()) - l, s := r.At() - require.Equal(t, 0, len(l)) - require.Equal(t, []RefSample{{T: 1, V: 2}}, s) + // Weird hack to check order of reads. + i := 0 + samplf := func(s []RefSample) error { + if i == 0 { + require.Equal(t, []RefSample{{T: 1, V: 2}}, s) + i++ + } else { + require.Equal(t, []RefSample{{T: 99, V: 100}}, s) + } - // Truncation should happen transparently and not cause an error. - require.False(t, r.Next()) - require.Nil(t, r.Err()) + return nil + } - require.NoError(t, w2.Log(nil, []RefSample{{T: 99, V: 100}})) + require.NoError(t, r.Read(serf, samplf, delf)) + + require.NoError(t, w2.LogSamples([]RefSample{{T: 99, V: 100}})) require.NoError(t, w2.Close()) // We should see the first valid entry and the new one, everything after @@ -334,18 +382,8 @@ func TestWALRestoreCorrupted(t *testing.T) { r = w3.Reader() - require.True(t, r.Next()) - l, s = r.At() - require.Equal(t, 0, len(l)) - require.Equal(t, []RefSample{{T: 1, V: 2}}, s) - - require.True(t, r.Next()) - l, s = r.At() - require.Equal(t, 0, len(l)) - require.Equal(t, []RefSample{{T: 99, V: 100}}, s) - - require.False(t, r.Next()) - require.Nil(t, r.Err()) + i = 0 + require.NoError(t, r.Read(serf, samplf, delf)) }) } } From 9d388941ef94233926e09ab89ca01f94c69899aa Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Tue, 23 May 2017 17:27:45 +0530 Subject: [PATCH 17/25] Add checksums and magic number to the tombstones. Signed-off-by: Goutham Veeramachaneni --- Documentation/format/tombstones.md | 55 ++++++++++++++++++++++++++++++ tombstones.go | 55 +++++++++++++++++++++++++----- 2 files changed, 101 insertions(+), 9 deletions(-) create mode 100644 Documentation/format/tombstones.md diff --git a/Documentation/format/tombstones.md b/Documentation/format/tombstones.md new file mode 100644 index 0000000000..e8da95aecd --- /dev/null +++ b/Documentation/format/tombstones.md @@ -0,0 +1,55 @@ +# Tombstones Disk Format + +The following describes the format of a tombstones file, which is the directory of a block. + +The last 8 bytes specifies the offset to the start of Stones section. +The stones section is 0 padded to a multiple of 4 for fast scans. + +``` +┌────────────────────────────┬─────────────────────┐ +│ magic(0x130BA30) <4b> │ version(1) <1 byte> │ +├────────────────────────────┴─────────────────────┤ +│ ┌──────────────────────────────────────────────┐ │ +│ │ Ranges 1 │ │ +│ ├──────────────────────────────────────────────┤ │ +│ │ ... │ │ +│ ├──────────────────────────────────────────────┤ │ +│ │ Ranges N │ │ +│ ├──────────────────────────────────────────────┤ │ +│ │ Stones │ │ +│ ├──────────────────────────────────────────────┤ │ +│ │ Ref(stones start)<8b> │ │ +│ └──────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────┘ +``` + +# Ranges + +``` +┌──────────────────────────────────────────┐ +│ ┌──────────────────────────────────────┐ │ +│ │ #ranges │ │ +│ ├───────────────────┬──────────────────┤ │ +│ │ mint │ maxt │ │ +│ ├───────────────────┴──────────────────┤ │ +│ │ . . . │ │ +│ ├──────────────────────────────────────┤ │ +│ │ CRC32 <4b> │ │ +│ └──────────────────────────────────────┘ │ +└──────────────────────────────────────────┘ +``` + +# Stones +``` +┌──────────────────────────────────────────┐ +│ ┌──────────────────────────────────────┐ │ +│ │ #stones <4b> │ │ +│ ├───────────────────┬──────────────────┤ │ +│ │ ref <4b> │ offset <8b> │ │ +│ ├───────────────────┴──────────────────┤ │ +│ │ . . . │ │ +│ └──────────────────────────────────────┘ │ +└──────────────────────────────────────────┘ +``` + +The offset here is the offset to the relevant ranges. diff --git a/tombstones.go b/tombstones.go index 96916d23ab..ef72b4c289 100644 --- a/tombstones.go +++ b/tombstones.go @@ -2,6 +2,8 @@ package tsdb import ( "encoding/binary" + "fmt" + "hash/crc32" "io/ioutil" "os" "path/filepath" @@ -10,6 +12,13 @@ import ( const tombstoneFilename = "tombstones" +const ( + // MagicTombstone is 4 bytes at the head of a tombstone file. + MagicTombstone = 0x130BA30 + + tombstoneFormatV1 = 1 +) + func readTombstoneFile(dir string) (TombstoneReader, error) { return newTombStoneReader(dir) } @@ -17,6 +26,7 @@ func readTombstoneFile(dir string) (TombstoneReader, error) { func writeTombstoneFile(dir string, tr TombstoneReader) error { path := filepath.Join(dir, tombstoneFilename) tmp := path + ".tmp" + hash := crc32.New(crc32.MakeTable(crc32.Castagnoli)) f, err := os.Create(tmp) if err != nil { @@ -28,6 +38,16 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { pos := int64(0) buf := encbuf{b: make([]byte, 2*binary.MaxVarintLen64)} + buf.reset() + // Write the meta. + buf.putBE32(MagicTombstone) + buf.putByte(tombstoneFormatV1) + n, err := f.Write(buf.get()) + if err != nil { + return err + } + pos += int64(n) + for tr.Next() { s := tr.At() @@ -36,29 +56,40 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { // Write the ranges. buf.reset() - buf.putVarint64(int64(len(s.intervals))) + buf.putUvarint(len(s.intervals)) n, err := f.Write(buf.get()) if err != nil { return err } pos += int64(n) + buf.reset() for _, r := range s.intervals { - buf.reset() buf.putVarint64(r.mint) buf.putVarint64(r.maxt) - n, err = f.Write(buf.get()) - if err != nil { - return err - } - pos += int64(n) } + buf.putHash(hash) + + n, err = f.Write(buf.get()) + if err != nil { + return err + } + pos += int64(n) } if err := tr.Err(); err != nil { return err } // Write the offset table. + // Pad first. + if p := 4 - (int(pos) % 4); p != 0 { + if _, err := f.Write(make([]byte, p)); err != nil { + return err + } + + pos += int64(p) + } + buf.reset() buf.putBE32int(len(refs)) if _, err := f.Write(buf.get()); err != nil { @@ -123,8 +154,13 @@ func newTombStoneReader(dir string) (*tombstoneReader, error) { return nil, err } + d := &decbuf{b: b} + if mg := d.be32(); mg != MagicTombstone { + return nil, fmt.Errorf("invalid magic number %x", mg) + } + offsetBytes := b[len(b)-8:] - d := &decbuf{b: offsetBytes} + d = &decbuf{b: offsetBytes} off := d.be64int64() if err := d.err(); err != nil { return nil, err @@ -158,7 +194,7 @@ func (t *tombstoneReader) Next() bool { off := d.be64int64() d = &decbuf{b: t.b[off:]} - numRanges := d.varint64() + numRanges := d.uvarint() if err := d.err(); err != nil { t.err = err return false @@ -176,6 +212,7 @@ func (t *tombstoneReader) Next() bool { dranges = append(dranges, interval{mint, maxt}) } + // TODO(gouthamve): Verify checksum. t.stones = t.stones[12:] t.cur = stone{ref: ref, intervals: dranges} return true From 31cf9394482507a721374264d76b70e4aea9270d Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Tue, 23 May 2017 17:37:04 +0530 Subject: [PATCH 18/25] Add NumTombstones to BlockMeta. Signed-off-by: Goutham Veeramachaneni --- block.go | 11 ++++++++++- head.go | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/block.go b/block.go index 41e5c8b39d..45a63ae5ed 100644 --- a/block.go +++ b/block.go @@ -95,6 +95,9 @@ type BlockMeta struct { Compaction struct { Generation int `json:"generation"` } `json:"compaction"` + + // The number of tombstones. + NumTombstones int64 `json:"numTombstones"` } const ( @@ -278,7 +281,13 @@ Outer: str := newMapTombstoneReader(delStones) tombreader := newMergedTombstoneReader(tr, str) - return writeTombstoneFile(pb.dir, tombreader) + if err := writeTombstoneFile(pb.dir, tombreader); err != nil { + return err + } + + // TODO(gouthamve): This counts any common tombstones too. But gives the same heuristic. + pb.meta.NumTombstones += int64(len(delStones)) + return writeMetaFile(pb.dir, &pb.meta) } func chunkDir(dir string) string { return filepath.Join(dir, "chunks") } diff --git a/head.go b/head.go index 2eae0952ec..8c2cf49052 100644 --- a/head.go +++ b/head.go @@ -267,6 +267,7 @@ Outer: } h.tombstones = newMapTombstoneReader(h.tombstones.stones) + h.meta.NumTombstones = int64(len(h.tombstones.stones)) return nil } From 9bf7aa9af1f70f26b9316fc6d28ea5ec53039ce1 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Tue, 23 May 2017 18:13:30 +0530 Subject: [PATCH 19/25] Misc. fixes incorporating feedback. Signed-off-by: Goutham Veeramachaneni --- db.go | 16 ++++------- db_test.go | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ head.go | 3 +-- head_test.go | 3 +-- wal.go | 6 ++--- 5 files changed, 85 insertions(+), 18 deletions(-) diff --git a/db.go b/db.go index 03335c7101..ae05157e91 100644 --- a/db.go +++ b/db.go @@ -120,7 +120,7 @@ type DB struct { donec chan struct{} stopc chan struct{} - // compMtx is used to control compactions and deletions. + // cmtx is used to control compactions and deletions. cmtx sync.Mutex } @@ -671,17 +671,14 @@ func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error { db.cmtx.Lock() defer db.cmtx.Unlock() - db.headmtx.RLock() + db.mtx.RLock() blocks := db.blocksForInterval(mint, maxt) - db.headmtx.RUnlock() + db.mtx.RUnlock() var g errgroup.Group for _, b := range blocks { - f := func() error { - return b.Delete(mint, maxt, ms...) - } - g.Go(f) + g.Go(func() error { return b.Delete(mint, maxt, ms...) }) } if err := g.Wait(); err != nil { @@ -705,10 +702,7 @@ func (db *DB) appendable() (r []headBlock) { func intervalOverlap(amin, amax, bmin, bmax int64) bool { // Checks Overlap: http://stackoverflow.com/questions/3269434/ - if amin <= bmax && bmin <= amax { - return true - } - return false + return amin <= bmax && bmin <= amax } func intervalContains(min, max, t int64) bool { diff --git a/db_test.go b/db_test.go index 1fa4d72706..e410dfceb7 100644 --- a/db_test.go +++ b/db_test.go @@ -15,6 +15,7 @@ package tsdb import ( "io/ioutil" + "math/rand" "os" "testing" @@ -141,3 +142,77 @@ func TestDBAppenderAddRef(t *testing.T) { err = app2.AddFast(string(refb), 1, 1) require.EqualError(t, errors.Cause(err), ErrNotFound.Error()) } + +func TestDeleteSimple(t *testing.T) { + numSamples := int64(10) + + tmpdir, _ := ioutil.TempDir("", "test") + defer os.RemoveAll(tmpdir) + + db, err := Open(tmpdir, nil, nil, nil) + require.NoError(t, err) + app := db.Appender() + + smpls := make([]float64, numSamples) + for i := int64(0); i < numSamples; i++ { + smpls[i] = rand.Float64() + app.Add(labels.Labels{{"a", "b"}}, i, smpls[i]) + } + + require.NoError(t, app.Commit()) + cases := []struct { + intervals intervals + remaint []int64 + }{ + { + intervals: intervals{{1, 3}, {4, 7}}, + remaint: []int64{0, 8, 9}, + }, + } + +Outer: + for _, c := range cases { + // TODO(gouthamve): Reset the tombstones somehow. + // Delete the ranges. + for _, r := range c.intervals { + require.NoError(t, db.Delete(r.mint, r.maxt, labels.NewEqualMatcher("a", "b"))) + } + + // Compare the result. + q := db.Querier(0, numSamples) + res := q.Select(labels.NewEqualMatcher("a", "b")) + + expSamples := make([]sample, 0, len(c.remaint)) + for _, ts := range c.remaint { + expSamples = append(expSamples, sample{ts, smpls[ts]}) + } + + expss := newListSeriesSet([]Series{ + newSeries(map[string]string{"a": "b"}, expSamples), + }) + + if len(expSamples) == 0 { + require.False(t, res.Next()) + continue + } + + for { + eok, rok := expss.Next(), res.Next() + require.Equal(t, eok, rok, "next") + + if !eok { + continue Outer + } + sexp := expss.At() + sres := res.At() + + require.Equal(t, sexp.Labels(), sres.Labels(), "labels") + + smplExp, errExp := expandSeriesIterator(sexp.Iterator()) + smplRes, errRes := expandSeriesIterator(sres.Iterator()) + + require.Equal(t, errExp, errRes, "samples error") + require.Equal(t, smplExp, smplRes, "samples") + } + } +} diff --git a/head.go b/head.go index 8c2cf49052..72aa67a6da 100644 --- a/head.go +++ b/head.go @@ -153,8 +153,7 @@ func (h *HeadBlock) init() error { deletesFunc := func(stones []stone) error { for _, s := range stones { for _, itv := range s.intervals { - // TODO(gouthamve): Recheck. - h.tombstones.stones[s.ref].add(itv) + h.tombstones.stones[s.ref] = h.tombstones.stones[s.ref].add(itv) } } diff --git a/head_test.go b/head_test.go index 07fbe377e8..0463d84306 100644 --- a/head_test.go +++ b/head_test.go @@ -382,7 +382,7 @@ func TestHeadBlock_e2e(t *testing.T) { return } -func TestDeleteSimple(t *testing.T) { +func TestHBDeleteSimple(t *testing.T) { numSamples := int64(10) dir, _ := ioutil.TempDir("", "test") @@ -427,7 +427,6 @@ func TestDeleteSimple(t *testing.T) { Outer: for _, c := range cases { // Reset the tombstones. - writeTombstoneFile(hb.dir, newEmptyTombstoneReader()) hb.tombstones = newEmptyTombstoneReader() // Delete the ranges. diff --git a/wal.go b/wal.go index 6de9f73adf..0e503e7b8d 100644 --- a/wal.go +++ b/wal.go @@ -49,13 +49,13 @@ const ( WALEntryDeletes WALEntryType = 4 ) -// SamplesCB yolo. +// SamplesCB is the callback after reading samples. type SamplesCB func([]RefSample) error -// SeriesCB yolo. +// SeriesCB is the callback after reading series. type SeriesCB func([]labels.Labels) error -// DeletesCB yolo. +// DeletesCB is the callback after reading deletes. type DeletesCB func([]stone) error // SegmentWAL is a write ahead log for series data. From f29fb62fbaf61c8c9430e8f0a4a04fa508f89b15 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 24 May 2017 11:24:24 +0530 Subject: [PATCH 20/25] Make TombstoneReader a Getter. Signed-off-by: Goutham Veeramachaneni --- block.go | 27 ++-- compact.go | 10 +- head.go | 16 +-- querier.go | 6 +- querier_test.go | 6 +- tombstones.go | 343 +++++---------------------------------------- tombstones_test.go | 231 +----------------------------- wal.go | 15 +- wal_test.go | 10 +- 9 files changed, 79 insertions(+), 585 deletions(-) diff --git a/block.go b/block.go index 45a63ae5ed..82dfb65942 100644 --- a/block.go +++ b/block.go @@ -162,7 +162,7 @@ type persistedBlock struct { indexr *indexReader // For tombstones. - tombstones *mapTombstoneReader + tombstones tombstoneReader } func newPersistedBlock(dir string) (*persistedBlock, error) { @@ -180,25 +180,18 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { return nil, err } - tr, err := readTombstoneFile(dir) + tr, err := readTombstones(dir) if err != nil { return nil, err } - ts := make(map[uint32]intervals) - for tr.Next() { - s := tr.At() - ts[s.ref] = s.intervals - } - pb := &persistedBlock{ dir: dir, meta: *meta, chunkr: cr, indexr: ir, - // TODO(gouthamve): We will be sorting the refs again internally, is it a big deal? - tombstones: newMapTombstoneReader(ts), + tombstones: tr, } return pb, nil } @@ -230,7 +223,7 @@ func (pb *persistedBlock) Dir() string { return pb.dir } func (pb *persistedBlock) Index() IndexReader { return pb.indexr } func (pb *persistedBlock) Chunks() ChunkReader { return pb.chunkr } func (pb *persistedBlock) Tombstones() TombstoneReader { - return pb.tombstones.Copy() + return pb.tombstones } func (pb *persistedBlock) Meta() BlockMeta { return pb.meta } @@ -277,16 +270,18 @@ Outer: } // Merge the current and new tombstones. - tr := pb.Tombstones() - str := newMapTombstoneReader(delStones) - tombreader := newMergedTombstoneReader(tr, str) + for k, v := range pb.tombstones { + for _, itv := range v { + delStones[k] = delStones[k].add(itv) + } + } + tombreader := newTombstoneReader(delStones) if err := writeTombstoneFile(pb.dir, tombreader); err != nil { return err } - // TODO(gouthamve): This counts any common tombstones too. But gives the same heuristic. - pb.meta.NumTombstones += int64(len(delStones)) + pb.meta.NumTombstones = int64(len(delStones)) return writeMetaFile(pb.dir, &pb.meta) } diff --git a/compact.go b/compact.go index 948e2f8245..d139be0268 100644 --- a/compact.go +++ b/compact.go @@ -432,14 +432,8 @@ func (c *compactionSeriesSet) Next() bool { return false } - if c.tombstones.Seek(c.p.At()) { - s := c.tombstones.At() - if c.p.At() == s.ref { - c.intervals = s.intervals - } else { - c.intervals = nil - } - } + c.intervals = c.tombstones.At(c.p.At()) + c.l, c.c, c.err = c.index.Series(c.p.At()) if c.err != nil { return false diff --git a/head.go b/head.go index 72aa67a6da..704c0b222b 100644 --- a/head.go +++ b/head.go @@ -69,7 +69,7 @@ type HeadBlock struct { values map[string]stringset // label names to possible values postings *memPostings // postings lists for terms - tombstones *mapTombstoneReader + tombstones tombstoneReader meta BlockMeta } @@ -153,7 +153,7 @@ func (h *HeadBlock) init() error { deletesFunc := func(stones []stone) error { for _, s := range stones { for _, itv := range s.intervals { - h.tombstones.stones[s.ref] = h.tombstones.stones[s.ref].add(itv) + h.tombstones[s.ref] = h.tombstones[s.ref].add(itv) } } @@ -163,7 +163,6 @@ func (h *HeadBlock) init() error { if err := r.Read(seriesFunc, samplesFunc, deletesFunc); err != nil { return errors.Wrap(err, "consume WAL") } - h.tombstones = newMapTombstoneReader(h.tombstones.stones) return nil } @@ -221,7 +220,7 @@ func (h *HeadBlock) Meta() BlockMeta { // Tombstones returns the TombstoneReader against the block. func (h *HeadBlock) Tombstones() TombstoneReader { - return h.tombstones.Copy() + return h.tombstones } // Delete implements headBlock. @@ -257,16 +256,15 @@ Outer: if p.Err() != nil { return p.Err() } - if err := h.wal.LogDeletes(newMapTombstoneReader(newStones)); err != nil { + if err := h.wal.LogDeletes(newTombstoneReader(newStones)); err != nil { return err } for k, v := range newStones { - h.tombstones.stones[k] = h.tombstones.stones[k].add(v[0]) + h.tombstones[k] = h.tombstones[k].add(v[0]) } - h.tombstones = newMapTombstoneReader(h.tombstones.stones) - h.meta.NumTombstones = int64(len(h.tombstones.stones)) + h.meta.NumTombstones = int64(len(h.tombstones)) return nil } @@ -296,7 +294,7 @@ func (h *HeadBlock) Querier(mint, maxt int64) Querier { maxt: maxt, index: h.Index(), chunks: h.Chunks(), - tombstones: h.Tombstones().Copy(), + tombstones: h.Tombstones(), postingsMapper: func(p Postings) Postings { ep := make([]uint32, 0, 64) diff --git a/querier.go b/querier.go index 28bf3d21a0..49cd013f43 100644 --- a/querier.go +++ b/querier.go @@ -151,7 +151,7 @@ func (q *blockQuerier) Select(ms ...labels.Matcher) SeriesSet { index: q.index, absent: absent, - tombstones: q.tombstones.Copy(), + tombstones: q.tombstones, }, chunks: q.chunks, mint: q.mint, @@ -412,9 +412,9 @@ Outer: s.lset = lset s.chks = chunks - if s.tombstones.Seek(ref) && s.tombstones.At().ref == ref { - s.intervals = s.tombstones.At().intervals + s.intervals = s.tombstones.At(s.p.At()) + if len(s.intervals) > 0 { // Only those chunks that are not entirely deleted. chks := make([]*ChunkMeta, 0, len(s.chks)) for _, chk := range s.chks { diff --git a/querier_test.go b/querier_test.go index d63c36d930..e03570fad6 100644 --- a/querier_test.go +++ b/querier_test.go @@ -432,7 +432,7 @@ func TestBlockQuerierDelete(t *testing.T) { chunks [][]sample } - tombstones *mapTombstoneReader + tombstones tombstoneReader queries []query }{ data: []struct { @@ -480,7 +480,7 @@ func TestBlockQuerierDelete(t *testing.T) { }, }, }, - tombstones: newMapTombstoneReader( + tombstones: newTombstoneReader( map[uint32]intervals{ 1: intervals{{1, 3}}, 2: intervals{{1, 3}, {6, 10}}, @@ -553,7 +553,7 @@ Outer: querier := &blockQuerier{ index: ir, chunks: cr, - tombstones: cases.tombstones.Copy(), + tombstones: cases.tombstones, mint: c.mint, maxt: c.maxt, diff --git a/tombstones.go b/tombstones.go index ef72b4c289..9e6c39931b 100644 --- a/tombstones.go +++ b/tombstones.go @@ -7,7 +7,6 @@ import ( "io/ioutil" "os" "path/filepath" - "sort" ) const tombstoneFilename = "tombstones" @@ -19,11 +18,7 @@ const ( tombstoneFormatV1 = 1 ) -func readTombstoneFile(dir string) (TombstoneReader, error) { - return newTombStoneReader(dir) -} - -func writeTombstoneFile(dir string, tr TombstoneReader) error { +func writeTombstoneFile(dir string, tr tombstoneReader) error { path := filepath.Join(dir, tombstoneFilename) tmp := path + ".tmp" hash := crc32.New(crc32.MakeTable(crc32.Castagnoli)) @@ -48,15 +43,13 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { } pos += int64(n) - for tr.Next() { - s := tr.At() - - refs = append(refs, s.ref) - stoneOff[s.ref] = pos + for k, v := range tr { + refs = append(refs, k) + stoneOff[k] = pos // Write the ranges. buf.reset() - buf.putUvarint(len(s.intervals)) + buf.putUvarint(len(v)) n, err := f.Write(buf.get()) if err != nil { return err @@ -64,7 +57,7 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { pos += int64(n) buf.reset() - for _, r := range s.intervals { + for _, r := range v { buf.putVarint64(r.mint) buf.putVarint64(r.maxt) } @@ -76,9 +69,6 @@ func writeTombstoneFile(dir string, tr TombstoneReader) error { } pos += int64(n) } - if err := tr.Err(); err != nil { - return err - } // Write the offset table. // Pad first. @@ -130,25 +120,10 @@ type stone struct { // TombstoneReader is the iterator over tombstones. type TombstoneReader interface { - Next() bool - Seek(ref uint32) bool - At() stone - // Copy copies the current reader state. Changes to the copy will not affect parent. - Copy() TombstoneReader - Err() error + At(ref uint32) intervals } -type tombstoneReader struct { - stones []byte - - cur stone - - b []byte - err error -} - -func newTombStoneReader(dir string) (*tombstoneReader, error) { - // TODO(gouthamve): MMAP? +func readTombstones(dir string) (tombstoneReader, error) { b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename)) if err != nil { return nil, err @@ -173,292 +148,50 @@ func newTombStoneReader(dir string) (*tombstoneReader, error) { } off += 4 // For the numStones which has been read. - return &tombstoneReader{ - stones: b[off : off+int64(numStones*12)], + stones := b[off : off+int64(numStones*12)] + stonesMap := make(map[uint32]intervals) + for len(stones) >= 12 { + d := &decbuf{b: stones[:12]} + ref := d.be32() + off := d.be64int64() - b: b, - }, nil -} - -func (t *tombstoneReader) Next() bool { - if t.err != nil { - return false - } - - if len(t.stones) < 12 { - return false - } - - d := &decbuf{b: t.stones[:12]} - ref := d.be32() - off := d.be64int64() - - d = &decbuf{b: t.b[off:]} - numRanges := d.uvarint() - if err := d.err(); err != nil { - t.err = err - return false - } - - dranges := make(intervals, 0, numRanges) - for i := 0; i < int(numRanges); i++ { - mint := d.varint64() - maxt := d.varint64() + d = &decbuf{b: b[off:]} + numRanges := d.uvarint() if err := d.err(); err != nil { - t.err = err - return false + return nil, err } - dranges = append(dranges, interval{mint, maxt}) - } + dranges := make(intervals, 0, numRanges) + for i := 0; i < int(numRanges); i++ { + mint := d.varint64() + maxt := d.varint64() + if err := d.err(); err != nil { + return nil, err + } - // TODO(gouthamve): Verify checksum. - t.stones = t.stones[12:] - t.cur = stone{ref: ref, intervals: dranges} - return true -} - -func (t *tombstoneReader) Seek(ref uint32) bool { - i := sort.Search(len(t.stones)/12, func(i int) bool { - x := binary.BigEndian.Uint32(t.stones[i*12:]) - return x >= ref - }) - - if i*12 < len(t.stones) { - t.stones = t.stones[i*12:] - return t.Next() - } - - t.stones = nil - return false -} - -func (t *tombstoneReader) At() stone { - return t.cur -} - -func (t *tombstoneReader) Copy() TombstoneReader { - return &tombstoneReader{ - stones: t.stones[:], - cur: t.cur, - - b: t.b, - } -} - -func (t *tombstoneReader) Err() error { - return t.err -} - -type mapTombstoneReader struct { - refs []uint32 - cur uint32 - - stones map[uint32]intervals -} - -func newMapTombstoneReader(ts map[uint32]intervals) *mapTombstoneReader { - refs := make([]uint32, 0, len(ts)) - for k := range ts { - refs = append(refs, k) - } - - sort.Sort(uint32slice(refs)) - return &mapTombstoneReader{stones: ts, refs: refs} -} - -func newEmptyTombstoneReader() *mapTombstoneReader { - return &mapTombstoneReader{stones: make(map[uint32]intervals)} -} - -func (t *mapTombstoneReader) Next() bool { - if len(t.refs) > 0 { - t.cur = t.refs[0] - t.refs = t.refs[1:] - return true - } - - t.cur = 0 - return false -} - -func (t *mapTombstoneReader) Seek(ref uint32) bool { - // If the current value satisfies, then return. - if t.cur >= ref { - return true - } - - // Do binary search between current position and end. - i := sort.Search(len(t.refs), func(i int) bool { - return t.refs[i] >= ref - }) - if i < len(t.refs) { - t.cur = t.refs[i] - t.refs = t.refs[i+1:] - return true - } - t.refs = nil - return false -} - -func (t *mapTombstoneReader) At() stone { - return stone{ref: t.cur, intervals: t.stones[t.cur]} -} - -func (t *mapTombstoneReader) Copy() TombstoneReader { - return &mapTombstoneReader{ - refs: t.refs[:], - cur: t.cur, - - stones: t.stones, - } -} - -func (t *mapTombstoneReader) Err() error { - return nil -} - -type simpleTombstoneReader struct { - refs []uint32 - cur uint32 - - intervals intervals -} - -func newSimpleTombstoneReader(refs []uint32, dranges intervals) *simpleTombstoneReader { - return &simpleTombstoneReader{refs: refs, intervals: dranges} -} - -func (t *simpleTombstoneReader) Next() bool { - if len(t.refs) > 0 { - t.cur = t.refs[0] - t.refs = t.refs[1:] - return true - } - - t.cur = 0 - return false -} - -func (t *simpleTombstoneReader) Seek(ref uint32) bool { - // If the current value satisfies, then return. - if t.cur >= ref { - return true - } - - // Do binary search between current position and end. - i := sort.Search(len(t.refs), func(i int) bool { - return t.refs[i] >= ref - }) - if i < len(t.refs) { - t.cur = t.refs[i] - t.refs = t.refs[i+1:] - return true - } - t.refs = nil - return false -} - -func (t *simpleTombstoneReader) At() stone { - return stone{ref: t.cur, intervals: t.intervals} -} - -func (t *simpleTombstoneReader) Copy() TombstoneReader { - return &simpleTombstoneReader{refs: t.refs[:], cur: t.cur, intervals: t.intervals} -} - -func (t *simpleTombstoneReader) Err() error { - return nil -} - -type mergedTombstoneReader struct { - a, b TombstoneReader - cur stone - - initialized bool - aok, bok bool -} - -func newMergedTombstoneReader(a, b TombstoneReader) *mergedTombstoneReader { - return &mergedTombstoneReader{a: a, b: b} -} - -func (t *mergedTombstoneReader) Next() bool { - if !t.initialized { - t.aok = t.a.Next() - t.bok = t.b.Next() - t.initialized = true - } - - if !t.aok && !t.bok { - return false - } - - if !t.aok { - t.cur = t.b.At() - t.bok = t.b.Next() - return true - } - if !t.bok { - t.cur = t.a.At() - t.aok = t.a.Next() - return true - } - - acur, bcur := t.a.At(), t.b.At() - - if acur.ref < bcur.ref { - t.cur = acur - t.aok = t.a.Next() - } else if acur.ref > bcur.ref { - t.cur = bcur - t.bok = t.b.Next() - } else { - // Merge time ranges. - for _, r := range bcur.intervals { - acur.intervals = acur.intervals.add(r) + dranges = append(dranges, interval{mint, maxt}) } - t.cur = acur - t.aok = t.a.Next() - t.bok = t.b.Next() - } - return true -} - -func (t *mergedTombstoneReader) Seek(ref uint32) bool { - if t.cur.ref >= ref { - return true + // TODO(gouthamve): Verify checksum. + stones = stones[12:] + stonesMap[ref] = dranges } - t.aok = t.a.Seek(ref) - t.bok = t.b.Seek(ref) - t.initialized = true - - return t.Next() -} -func (t *mergedTombstoneReader) At() stone { - return t.cur + return newTombstoneReader(stonesMap), nil } -func (t *mergedTombstoneReader) Copy() TombstoneReader { - return &mergedTombstoneReader{ - a: t.a.Copy(), - b: t.b.Copy(), +type tombstoneReader map[uint32]intervals - cur: t.cur, - - initialized: t.initialized, - aok: t.aok, - bok: t.bok, - } +func newTombstoneReader(ts map[uint32]intervals) tombstoneReader { + return tombstoneReader(ts) } -func (t *mergedTombstoneReader) Err() error { - if t.a.Err() != nil { - return t.a.Err() - } - return t.b.Err() +func newEmptyTombstoneReader() tombstoneReader { + return tombstoneReader(make(map[uint32]intervals)) +} + +func (t tombstoneReader) At(ref uint32) intervals { + return t[ref] } type interval struct { diff --git a/tombstones_test.go b/tombstones_test.go index 1807651c09..525d825f7d 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -4,7 +4,6 @@ import ( "io/ioutil" "math/rand" "os" - "sort" "testing" "time" @@ -31,20 +30,13 @@ func TestWriteAndReadbackTombStones(t *testing.T) { stones[ref] = dranges } - require.NoError(t, writeTombstoneFile(tmpdir, newMapTombstoneReader(stones))) + require.NoError(t, writeTombstoneFile(tmpdir, newTombstoneReader(stones))) - restr, err := readTombstoneFile(tmpdir) + restr, err := readTombstones(tmpdir) require.NoError(t, err) - exptr := newMapTombstoneReader(stones) + exptr := newTombstoneReader(stones) // Compare the two readers. - for restr.Next() { - require.True(t, exptr.Next()) - - require.Equal(t, exptr.At(), restr.At()) - } - require.False(t, exptr.Next()) - require.NoError(t, restr.Err()) - require.NoError(t, exptr.Err()) + require.Equal(t, restr, exptr) } func TestAddingNewIntervals(t *testing.T) { @@ -116,218 +108,3 @@ func TestAddingNewIntervals(t *testing.T) { } return } - -func TestTombstoneReadersSeek(t *testing.T) { - // This is assuming that the listPostings is perfect. - table := struct { - m map[uint32]intervals - - cases []uint32 - }{ - m: map[uint32]intervals{ - 2: intervals{{1, 2}}, - 3: intervals{{1, 4}, {5, 6}}, - 4: intervals{{10, 15}, {16, 20}}, - 5: intervals{{1, 4}, {5, 6}}, - 50: intervals{{10, 20}, {35, 50}}, - 600: intervals{{100, 2000}}, - 1000: intervals{}, - 1500: intervals{{10000, 500000}}, - 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, - }, - - cases: []uint32{1, 10, 20, 40, 30, 20, 50, 599, 601, 1000, 1600, 1601, 2000}, - } - - testFunc := func(t *testing.T, tr TombstoneReader) { - for _, ref := range table.cases { - // Create the listPostings. - refs := make([]uint32, 0, len(table.m)) - for k := range table.m { - refs = append(refs, k) - } - sort.Sort(uint32slice(refs)) - pr := newListPostings(refs) - - // Compare both. - trc := tr.Copy() - require.Equal(t, pr.Seek(ref), trc.Seek(ref)) - if pr.Seek(ref) { - require.Equal(t, pr.At(), trc.At().ref) - require.Equal(t, table.m[pr.At()], trc.At().intervals) - } - - for pr.Next() { - require.True(t, trc.Next()) - require.Equal(t, pr.At(), trc.At().ref) - require.Equal(t, table.m[pr.At()], trc.At().intervals) - } - - require.False(t, trc.Next()) - require.NoError(t, pr.Err()) - require.NoError(t, tr.Err()) - } - } - - t.Run("tombstoneReader", func(t *testing.T) { - tmpdir, _ := ioutil.TempDir("", "test") - defer os.RemoveAll(tmpdir) - - mtr := newMapTombstoneReader(table.m) - writeTombstoneFile(tmpdir, mtr) - tr, err := readTombstoneFile(tmpdir) - require.NoError(t, err) - - testFunc(t, tr) - return - }) - t.Run("mapTombstoneReader", func(t *testing.T) { - mtr := newMapTombstoneReader(table.m) - - testFunc(t, mtr) - return - }) - t.Run("simpleTombstoneReader", func(t *testing.T) { - dranges := intervals{{1, 2}, {3, 4}, {5, 6}} - - for _, ref := range table.cases { - // Create the listPostings. - refs := make([]uint32, 0, len(table.m)) - for k := range table.m { - refs = append(refs, k) - } - sort.Sort(uint32slice(refs)) - pr := newListPostings(refs[:]) - tr := newSimpleTombstoneReader(refs[:], dranges) - - // Compare both. - trc := tr.Copy() - require.Equal(t, pr.Seek(ref), trc.Seek(ref)) - if pr.Seek(ref) { - require.Equal(t, pr.At(), trc.At().ref) - require.Equal(t, dranges, tr.At().intervals) - } - for pr.Next() { - require.True(t, trc.Next()) - require.Equal(t, pr.At(), trc.At().ref, "refs") - require.Equal(t, dranges, trc.At().intervals) - } - - require.False(t, trc.Next()) - require.NoError(t, pr.Err()) - require.NoError(t, tr.Err()) - } - return - }) -} - -func TestMergedTombstoneReader(t *testing.T) { - cases := []struct { - a, b TombstoneReader - - exp TombstoneReader - }{ - { - a: newMapTombstoneReader( - map[uint32]intervals{ - 2: intervals{{1, 2}}, - 3: intervals{{1, 4}, {6, 6}}, - 4: intervals{{10, 15}, {16, 20}}, - 5: intervals{{1, 4}, {5, 6}}, - 50: intervals{{10, 20}, {35, 50}}, - 600: intervals{{100, 2000}}, - 1000: intervals{}, - 1500: intervals{{10000, 500000}}, - 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, - }, - ), - b: newMapTombstoneReader( - map[uint32]intervals{ - 2: intervals{{1, 2}}, - 3: intervals{{5, 6}}, - 4: intervals{{10, 15}, {16, 20}}, - 5: intervals{{1, 4}, {5, 6}}, - 50: intervals{{10, 20}, {35, 50}}, - 600: intervals{{100, 2000}}, - 1000: intervals{}, - 1500: intervals{{10000, 500000}}, - 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, - }, - ), - - exp: newMapTombstoneReader( - map[uint32]intervals{ - 2: intervals{{1, 2}}, - 3: intervals{{1, 6}}, - 4: intervals{{10, 20}}, - 5: intervals{{1, 6}}, - 50: intervals{{10, 20}, {35, 50}}, - 600: intervals{{100, 2000}}, - 1000: intervals{}, - 1500: intervals{{10000, 500000}}, - 1600: intervals{{1, 7}}, - }, - ), - }, - { - a: newMapTombstoneReader( - map[uint32]intervals{ - 2: intervals{{1, 2}}, - 3: intervals{{1, 4}, {6, 6}}, - 4: intervals{{10, 15}, {17, 20}}, - 5: intervals{{1, 6}}, - 50: intervals{{10, 20}, {35, 50}}, - 600: intervals{{100, 2000}}, - 1000: intervals{}, - 1500: intervals{{10000, 500000}}, - 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, - }, - ), - b: newMapTombstoneReader( - map[uint32]intervals{ - 20: intervals{{1, 2}}, - 30: intervals{{1, 4}, {5, 6}}, - 40: intervals{{10, 15}, {16, 20}}, - 60: intervals{{1, 4}, {5, 6}}, - 500: intervals{{10, 20}, {35, 50}}, - 6000: intervals{{100, 2000}}, - 10000: intervals{}, - 15000: intervals{{10000, 500000}}, - 1600: intervals{{1, 2}, {3, 4}, {4, 5}, {6, 7}}, - }, - ), - - exp: newMapTombstoneReader( - map[uint32]intervals{ - 2: intervals{{1, 2}}, - 3: intervals{{1, 4}, {6, 6}}, - 4: intervals{{10, 15}, {17, 20}}, - 5: intervals{{1, 6}}, - 50: intervals{{10, 20}, {35, 50}}, - 600: intervals{{100, 2000}}, - 1000: intervals{}, - 1500: intervals{{10000, 500000}}, - 20: intervals{{1, 2}}, - 30: intervals{{1, 4}, {5, 6}}, - 40: intervals{{10, 15}, {16, 20}}, - 60: intervals{{1, 4}, {5, 6}}, - 500: intervals{{10, 20}, {35, 50}}, - 6000: intervals{{100, 2000}}, - 10000: intervals{}, - 15000: intervals{{10000, 500000}}, - 1600: intervals{{1, 7}}, - }, - ), - }, - } - - for _, c := range cases { - res := newMergedTombstoneReader(c.a, c.b) - for c.exp.Next() { - require.True(t, res.Next()) - require.Equal(t, c.exp.At(), res.At()) - } - require.False(t, res.Next()) - } - return -} diff --git a/wal.go b/wal.go index 0e503e7b8d..831a6f7e55 100644 --- a/wal.go +++ b/wal.go @@ -83,7 +83,7 @@ type WAL interface { Reader() WALReader LogSeries([]labels.Labels) error LogSamples([]RefSample) error - LogDeletes(TombstoneReader) error + LogDeletes(tombstoneReader) error Close() error } @@ -180,7 +180,7 @@ func (w *SegmentWAL) LogSamples(samples []RefSample) error { } // LogDeletes write a batch of new deletes to the log. -func (w *SegmentWAL) LogDeletes(tr TombstoneReader) error { +func (w *SegmentWAL) LogDeletes(tr tombstoneReader) error { if err := w.encodeDeletes(tr); err != nil { return err } @@ -483,17 +483,16 @@ func (w *SegmentWAL) encodeSamples(samples []RefSample) error { return w.entry(WALEntrySamples, walSamplesSimple, buf) } -func (w *SegmentWAL) encodeDeletes(tr TombstoneReader) error { +func (w *SegmentWAL) encodeDeletes(tr tombstoneReader) error { b := make([]byte, 2*binary.MaxVarintLen64) eb := &encbuf{b: b} buf := getWALBuffer() - for tr.Next() { + for k, v := range tr { eb.reset() - s := tr.At() - eb.putUvarint32(s.ref) - eb.putUvarint(len(s.intervals)) + eb.putUvarint32(k) + eb.putUvarint(len(v)) buf = append(buf, eb.get()...) - for _, itv := range s.intervals { + for _, itv := range v { eb.reset() eb.putVarint64(itv.mint) eb.putVarint64(itv.maxt) diff --git a/wal_test.go b/wal_test.go index 3f622df722..605f3d8e62 100644 --- a/wal_test.go +++ b/wal_test.go @@ -189,7 +189,6 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { return nil } - // TODO: Add this. delf := func(stones []stone) error { if len(stones) > 0 { cstones := make([]stone, len(stones)) @@ -230,7 +229,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { require.NoError(t, w.LogSeries(lbls)) require.NoError(t, w.LogSamples(samples)) - require.NoError(t, w.LogDeletes(newMapTombstoneReader(stones))) + require.NoError(t, w.LogDeletes(newTombstoneReader(stones))) if len(lbls) > 0 { recordedSeries = append(recordedSeries, lbls) @@ -240,12 +239,11 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { totalSamples += len(samples) } if len(stones) > 0 { - tr := newMapTombstoneReader(stones) + tr := newTombstoneReader(stones) newdels := []stone{} - for tr.Next() { - newdels = append(newdels, tr.At()) + for k, v := range tr { + newdels = append(newdels, stone{k, v}) } - require.NoError(t, tr.Err()) recordedDeletes = append(recordedDeletes, newdels) } From bacb143b7e7a3820436002e7c79b659cbd3479e1 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 24 May 2017 13:42:56 +0530 Subject: [PATCH 21/25] Simplify tombstone and WAL Delete formats. Signed-off-by: Goutham Veeramachaneni --- Documentation/format/tombstones.md | 39 ++------ encoding_helpers.go | 21 ++++- tombstones.go | 144 ++++++++--------------------- tombstones_test.go | 8 +- wal.go | 16 +--- wal_test.go | 19 ++-- 6 files changed, 80 insertions(+), 167 deletions(-) diff --git a/Documentation/format/tombstones.md b/Documentation/format/tombstones.md index e8da95aecd..059d1ace5d 100644 --- a/Documentation/format/tombstones.md +++ b/Documentation/format/tombstones.md @@ -10,46 +10,21 @@ The stones section is 0 padded to a multiple of 4 for fast scans. │ magic(0x130BA30) <4b> │ version(1) <1 byte> │ ├────────────────────────────┴─────────────────────┤ │ ┌──────────────────────────────────────────────┐ │ -│ │ Ranges 1 │ │ +│ │ Tombstone 1 │ │ │ ├──────────────────────────────────────────────┤ │ │ │ ... │ │ │ ├──────────────────────────────────────────────┤ │ -│ │ Ranges N │ │ +│ │ Tombstone N │ │ │ ├──────────────────────────────────────────────┤ │ -│ │ Stones │ │ -│ ├──────────────────────────────────────────────┤ │ -│ │ Ref(stones start)<8b> │ │ +│ │ CRC<4b> │ │ │ └──────────────────────────────────────────────┘ │ └──────────────────────────────────────────────────┘ ``` -# Ranges +# Tombstone ``` -┌──────────────────────────────────────────┐ -│ ┌──────────────────────────────────────┐ │ -│ │ #ranges │ │ -│ ├───────────────────┬──────────────────┤ │ -│ │ mint │ maxt │ │ -│ ├───────────────────┴──────────────────┤ │ -│ │ . . . │ │ -│ ├──────────────────────────────────────┤ │ -│ │ CRC32 <4b> │ │ -│ └──────────────────────────────────────┘ │ -└──────────────────────────────────────────┘ +┌─────────────┬───────────────┬──────────────┐ +│ref │ mint │ maxt │ +└─────────────┴───────────────┴──────────────┘ ``` - -# Stones -``` -┌──────────────────────────────────────────┐ -│ ┌──────────────────────────────────────┐ │ -│ │ #stones <4b> │ │ -│ ├───────────────────┬──────────────────┤ │ -│ │ ref <4b> │ offset <8b> │ │ -│ ├───────────────────┴──────────────────┤ │ -│ │ . . . │ │ -│ └──────────────────────────────────────┘ │ -└──────────────────────────────────────────┘ -``` - -The offset here is the offset to the relevant ranges. diff --git a/encoding_helpers.go b/encoding_helpers.go index 50189e0bbe..c1ea902a79 100644 --- a/encoding_helpers.go +++ b/encoding_helpers.go @@ -72,9 +72,10 @@ type decbuf struct { e error } -func (d *decbuf) uvarint() int { return int(d.uvarint64()) } -func (d *decbuf) be32int() int { return int(d.be32()) } -func (d *decbuf) be64int64() int64 { return int64(d.be64()) } +func (d *decbuf) uvarint() int { return int(d.uvarint64()) } +func (d *decbuf) uvarint32() uint32 { return uint32(d.uvarint64()) } +func (d *decbuf) be32int() int { return int(d.be32()) } +func (d *decbuf) be64int64() int64 { return int64(d.be64()) } func (d *decbuf) uvarintStr() string { l := d.uvarint64() @@ -142,6 +143,20 @@ func (d *decbuf) be32() uint32 { return x } +func (d *decbuf) byte() byte { + if d.e != nil { + return 0 + } + if len(d.b) < 1 { + d.e = errInvalidSize + return 0 + } + x := d.b[0] + d.b = d.b[1:] + return x + +} + func (d *decbuf) decbuf(l int) decbuf { if d.e != nil { return decbuf{e: d.e} diff --git a/tombstones.go b/tombstones.go index 9e6c39931b..7abb025a1b 100644 --- a/tombstones.go +++ b/tombstones.go @@ -4,9 +4,12 @@ import ( "encoding/binary" "fmt" "hash/crc32" + "io" "io/ioutil" "os" "path/filepath" + + "github.com/pkg/errors" ) const tombstoneFilename = "tombstones" @@ -27,84 +30,36 @@ func writeTombstoneFile(dir string, tr tombstoneReader) error { if err != nil { return err } + defer f.Close() - stoneOff := make(map[uint32]int64) // The map that holds the ref to offset vals. - refs := []uint32{} // Sorted refs. - - pos := int64(0) - buf := encbuf{b: make([]byte, 2*binary.MaxVarintLen64)} + buf := encbuf{b: make([]byte, 3*binary.MaxVarintLen64)} buf.reset() // Write the meta. buf.putBE32(MagicTombstone) buf.putByte(tombstoneFormatV1) - n, err := f.Write(buf.get()) - if err != nil { - return err - } - pos += int64(n) - - for k, v := range tr { - refs = append(refs, k) - stoneOff[k] = pos - - // Write the ranges. - buf.reset() - buf.putUvarint(len(v)) - n, err := f.Write(buf.get()) - if err != nil { - return err - } - pos += int64(n) - - buf.reset() - for _, r := range v { - buf.putVarint64(r.mint) - buf.putVarint64(r.maxt) - } - buf.putHash(hash) - - n, err = f.Write(buf.get()) - if err != nil { - return err - } - pos += int64(n) - } - - // Write the offset table. - // Pad first. - if p := 4 - (int(pos) % 4); p != 0 { - if _, err := f.Write(make([]byte, p)); err != nil { - return err - } - - pos += int64(p) - } - - buf.reset() - buf.putBE32int(len(refs)) - if _, err := f.Write(buf.get()); err != nil { - return err - } - - for _, ref := range refs { - buf.reset() - buf.putBE32(ref) - buf.putBE64int64(stoneOff[ref]) - _, err = f.Write(buf.get()) - if err != nil { - return err - } - } - - // Write the offset to the offset table. - buf.reset() - buf.putBE64int64(pos) _, err = f.Write(buf.get()) if err != nil { return err } - if err := f.Close(); err != nil { + mw := io.MultiWriter(f, hash) + + for k, v := range tr { + for _, itv := range v { + buf.reset() + buf.putUvarint32(k) + buf.putVarint64(itv.mint) + buf.putVarint64(itv.maxt) + + _, err = mw.Write(buf.get()) + if err != nil { + return err + } + } + } + + _, err = f.Write(hash.Sum(nil)) + if err != nil { return err } @@ -129,52 +84,33 @@ func readTombstones(dir string) (tombstoneReader, error) { return nil, err } - d := &decbuf{b: b} + d := &decbuf{b: b[:len(b)-4]} // 4 for the checksum. if mg := d.be32(); mg != MagicTombstone { return nil, fmt.Errorf("invalid magic number %x", mg) } - - offsetBytes := b[len(b)-8:] - d = &decbuf{b: offsetBytes} - off := d.be64int64() - if err := d.err(); err != nil { - return nil, err + if flag := d.byte(); flag != tombstoneFormatV1 { + return nil, fmt.Errorf("invalid tombstone format %x", flag) } - d = &decbuf{b: b[off:]} - numStones := d.be32int() - if err := d.err(); err != nil { - return nil, err + // Verify checksum + hash := crc32.New(crc32.MakeTable(crc32.Castagnoli)) + if _, err := hash.Write(d.get()); err != nil { + return nil, errors.Wrap(err, "write to hash") + } + if binary.BigEndian.Uint32(b[len(b)-4:]) != hash.Sum32() { + return nil, errors.New("checksum did not match") } - off += 4 // For the numStones which has been read. - stones := b[off : off+int64(numStones*12)] stonesMap := make(map[uint32]intervals) - for len(stones) >= 12 { - d := &decbuf{b: stones[:12]} - ref := d.be32() - off := d.be64int64() - - d = &decbuf{b: b[off:]} - numRanges := d.uvarint() - if err := d.err(); err != nil { - return nil, err + for d.len() > 0 { + k := d.uvarint32() + mint := d.varint64() + maxt := d.varint64() + if d.err() != nil { + return nil, d.err() } - dranges := make(intervals, 0, numRanges) - for i := 0; i < int(numRanges); i++ { - mint := d.varint64() - maxt := d.varint64() - if err := d.err(); err != nil { - return nil, err - } - - dranges = append(dranges, interval{mint, maxt}) - } - - // TODO(gouthamve): Verify checksum. - stones = stones[12:] - stonesMap[ref] = dranges + stonesMap[k] = stonesMap[k].add(interval{mint, maxt}) } return newTombstoneReader(stonesMap), nil diff --git a/tombstones_test.go b/tombstones_test.go index 525d825f7d..6469d0fbe2 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -20,11 +20,11 @@ func TestWriteAndReadbackTombStones(t *testing.T) { // Generate the tombstones. for i := 0; i < 100; i++ { ref += uint32(rand.Int31n(10)) + 1 - numRanges := rand.Intn(5) - dranges := make(intervals, numRanges) + numRanges := rand.Intn(5) + 1 + dranges := make(intervals, 0, numRanges) mint := rand.Int63n(time.Now().UnixNano()) for j := 0; j < numRanges; j++ { - dranges[j] = interval{mint, mint + rand.Int63n(1000)} + dranges = dranges.add(interval{mint, mint + rand.Int63n(1000)}) mint += rand.Int63n(1000) + 1 } stones[ref] = dranges @@ -36,7 +36,7 @@ func TestWriteAndReadbackTombStones(t *testing.T) { require.NoError(t, err) exptr := newTombstoneReader(stones) // Compare the two readers. - require.Equal(t, restr, exptr) + require.Equal(t, exptr, restr) } func TestAddingNewIntervals(t *testing.T) { diff --git a/wal.go b/wal.go index 831a6f7e55..bf0b1af871 100644 --- a/wal.go +++ b/wal.go @@ -488,12 +488,9 @@ func (w *SegmentWAL) encodeDeletes(tr tombstoneReader) error { eb := &encbuf{b: b} buf := getWALBuffer() for k, v := range tr { - eb.reset() - eb.putUvarint32(k) - eb.putUvarint(len(v)) - buf = append(buf, eb.get()...) for _, itv := range v { eb.reset() + eb.putUvarint32(k) eb.putVarint64(itv.mint) eb.putVarint64(itv.maxt) buf = append(buf, eb.get()...) @@ -787,19 +784,12 @@ func (r *walReader) decodeDeletes(flag byte, b []byte) error { for db.len() > 0 { var s stone - s.ref = uint32(db.uvarint()) - l := db.uvarint() + s.ref = db.uvarint32() + s.intervals = intervals{{db.varint64(), db.varint64()}} if db.err() != nil { return db.err() } - for i := 0; i < l; i++ { - s.intervals = append(s.intervals, interval{db.varint64(), db.varint64()}) - if db.err() != nil { - return db.err() - } - } - r.stones = append(r.stones, s) } diff --git a/wal_test.go b/wal_test.go index 605f3d8e62..f706849d44 100644 --- a/wal_test.go +++ b/wal_test.go @@ -149,7 +149,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( recordedSeries [][]labels.Labels recordedSamples [][]RefSample - recordedDeletes [][]stone + recordedDeletes []tombstoneReader ) var totalSamples int @@ -167,7 +167,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( resultSeries [][]labels.Labels resultSamples [][]RefSample - resultDeletes [][]stone + resultDeletes []tombstoneReader ) serf := func(lsets []labels.Labels) error { @@ -191,9 +191,11 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { delf := func(stones []stone) error { if len(stones) > 0 { - cstones := make([]stone, len(stones)) - copy(cstones, stones) - resultDeletes = append(resultDeletes, cstones) + dels := make(map[uint32]intervals) + for _, s := range stones { + dels[s.ref] = s.intervals + } + resultDeletes = append(resultDeletes, newTombstoneReader(dels)) } return nil @@ -240,12 +242,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { } if len(stones) > 0 { tr := newTombstoneReader(stones) - newdels := []stone{} - for k, v := range tr { - newdels = append(newdels, stone{k, v}) - } - - recordedDeletes = append(recordedDeletes, newdels) + recordedDeletes = append(recordedDeletes, tr) } } From c211ec4f492c47ec2673c36dac687a717b709b7e Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Wed, 24 May 2017 16:58:04 +0530 Subject: [PATCH 22/25] Fix concurrent map access. Signed-off-by: Goutham Veeramachaneni --- head.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/head.go b/head.go index 704c0b222b..dd441f7973 100644 --- a/head.go +++ b/head.go @@ -260,9 +260,12 @@ Outer: return err } + // Map is accessed in other places also, so protect it. + h.mtx.Lock() for k, v := range newStones { h.tombstones[k] = h.tombstones[k].add(v[0]) } + h.mtx.Unlock() h.meta.NumTombstones = int64(len(h.tombstones)) return nil From 6febabeb285c89df45613881cca617c2aa65a206 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 26 May 2017 16:31:45 +0530 Subject: [PATCH 23/25] Final delete fixes. * Make sure no reads happen on the block when delete is in progress. * Fix bugs in compaction. Signed-off-by: Goutham Veeramachaneni --- block.go | 15 ++++++--------- compact.go | 20 +++++++++----------- db.go | 11 +++++++---- head.go | 3 --- 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/block.go b/block.go index 82dfb65942..b0f88512f9 100644 --- a/block.go +++ b/block.go @@ -234,7 +234,7 @@ func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { ir := pb.indexr // Choose only valid postings which have chunks in the time-range. - delStones := map[uint32]intervals{} + newStones := map[uint32]intervals{} Outer: for p.Next() { @@ -259,7 +259,7 @@ Outer: if mint < chunks[0].MinTime { mint = chunks[0].MinTime } - delStones[p.At()] = intervals{{mint, maxtime}} + newStones[p.At()] = intervals{{mint, maxtime}} continue Outer } } @@ -270,18 +270,15 @@ Outer: } // Merge the current and new tombstones. - for k, v := range pb.tombstones { - for _, itv := range v { - delStones[k] = delStones[k].add(itv) - } + for k, v := range newStones { + pb.tombstones[k] = pb.tombstones[k].add(v[0]) } - tombreader := newTombstoneReader(delStones) - if err := writeTombstoneFile(pb.dir, tombreader); err != nil { + if err := writeTombstoneFile(pb.dir, pb.tombstones); err != nil { return err } - pb.meta.NumTombstones = int64(len(delStones)) + pb.meta.NumTombstones = int64(len(pb.tombstones)) return writeMetaFile(pb.dir, &pb.meta) } diff --git a/compact.go b/compact.go index d139be0268..6bf7988339 100644 --- a/compact.go +++ b/compact.go @@ -263,12 +263,8 @@ func (c *compactor) write(uid ulid.ULID, blocks ...Block) (err error) { } // Create an empty tombstones file. - tf, err := os.Create(filepath.Join(tmp, tombstoneFilename)) - if err != nil { - return errors.Wrap(err, "touch tombstones file") - } - if err := tf.Close(); err != nil { - return errors.Wrap(err, "close tombstones file") + if err := writeTombstoneFile(tmp, newEmptyTombstoneReader()); err != nil { + return errors.Wrap(err, "write new tombstones file") } // Block successfully written, make visible and remove old ones. @@ -444,11 +440,6 @@ func (c *compactionSeriesSet) Next() bool { chks := make([]*ChunkMeta, 0, len(c.c)) for _, chk := range c.c { if !(interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) { - chk.Chunk, c.err = c.chunks.Chunk(chk.Ref) - if c.err != nil { - return false - } - chks = append(chks, chk) } } @@ -456,6 +447,13 @@ func (c *compactionSeriesSet) Next() bool { c.c = chks } + for _, chk := range c.c { + chk.Chunk, c.err = c.chunks.Chunk(chk.Ref) + if c.err != nil { + return false + } + } + return true } diff --git a/db.go b/db.go index ae05157e91..2747b8a8fc 100644 --- a/db.go +++ b/db.go @@ -424,6 +424,7 @@ func (db *DB) reloadBlocks() error { if err := validateBlockSequence(blocks); err != nil { return errors.Wrap(err, "invalid block sequence") } + // Close all opened blocks that no longer exist after we returned all locks. for _, b := range db.blocks { if _, ok := exist[b.Meta().ULID]; !ok { @@ -670,22 +671,24 @@ func (a *dbAppender) Rollback() error { func (db *DB) Delete(mint, maxt int64, ms ...labels.Matcher) error { db.cmtx.Lock() defer db.cmtx.Unlock() + db.mtx.Lock() + defer db.mtx.Unlock() - db.mtx.RLock() blocks := db.blocksForInterval(mint, maxt) - db.mtx.RUnlock() var g errgroup.Group for _, b := range blocks { - g.Go(func() error { return b.Delete(mint, maxt, ms...) }) + g.Go(func(b Block) func() error { + return func() error { return b.Delete(mint, maxt, ms...) } + }(b)) } if err := g.Wait(); err != nil { return err } - return db.reloadBlocks() + return nil } // appendable returns a copy of a slice of HeadBlocks that can still be appended to. diff --git a/head.go b/head.go index dd441f7973..704c0b222b 100644 --- a/head.go +++ b/head.go @@ -260,12 +260,9 @@ Outer: return err } - // Map is accessed in other places also, so protect it. - h.mtx.Lock() for k, v := range newStones { h.tombstones[k] = h.tombstones[k].add(v[0]) } - h.mtx.Unlock() h.meta.NumTombstones = int64(len(h.tombstones)) return nil From 44e9ae38b50004afa6abedc0e2b4aee73fb9d371 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 26 May 2017 21:26:31 +0530 Subject: [PATCH 24/25] Incorporate PR feedback. * Expose Stone as it is used in an exported method. * Move from tombstoneReader to []Stone for the same reason as above. * Make WAL reading a little cleaner. Signed-off-by: Goutham Veeramachaneni --- Documentation/format/tombstones.md | 3 +- block.go | 51 ++++++------- block_test.go | 13 ++++ compact.go | 2 +- encoding_helpers.go | 1 - head.go | 37 ++++------ querier.go | 2 +- tombstones.go | 37 ++++++++-- tombstones_test.go | 13 ++++ wal.go | 115 ++++++++++++++++------------- wal_test.go | 23 +++--- 11 files changed, 173 insertions(+), 124 deletions(-) diff --git a/Documentation/format/tombstones.md b/Documentation/format/tombstones.md index 059d1ace5d..2af0ac98c5 100644 --- a/Documentation/format/tombstones.md +++ b/Documentation/format/tombstones.md @@ -1,6 +1,7 @@ # Tombstones Disk Format -The following describes the format of a tombstones file, which is the directory of a block. +The following describes the format of a tombstones file, which is placed +at the top level directory of a block. The last 8 bytes specifies the offset to the start of Stones section. The stones section is 0 padded to a multiple of 4 for fast scans. diff --git a/block.go b/block.go index b0f88512f9..6f21164909 100644 --- a/block.go +++ b/block.go @@ -86,18 +86,16 @@ type BlockMeta struct { // Stats about the contents of the block. Stats struct { - NumSamples uint64 `json:"numSamples,omitempty"` - NumSeries uint64 `json:"numSeries,omitempty"` - NumChunks uint64 `json:"numChunks,omitempty"` + NumSamples uint64 `json:"numSamples,omitempty"` + NumSeries uint64 `json:"numSeries,omitempty"` + NumChunks uint64 `json:"numChunks,omitempty"` + NumTombstones uint64 `json:"numTombstones,omitempty"` } `json:"stats,omitempty"` // Information on compactions the block was created from. Compaction struct { Generation int `json:"generation"` } `json:"compaction"` - - // The number of tombstones. - NumTombstones int64 `json:"numTombstones"` } const ( @@ -161,7 +159,6 @@ type persistedBlock struct { chunkr *chunkReader indexr *indexReader - // For tombstones. tombstones tombstoneReader } @@ -186,11 +183,10 @@ func newPersistedBlock(dir string) (*persistedBlock, error) { } pb := &persistedBlock{ - dir: dir, - meta: *meta, - chunkr: cr, - indexr: ir, - + dir: dir, + meta: *meta, + chunkr: cr, + indexr: ir, tombstones: tr, } return pb, nil @@ -234,7 +230,7 @@ func (pb *persistedBlock) Delete(mint, maxt int64, ms ...labels.Matcher) error { ir := pb.indexr // Choose only valid postings which have chunks in the time-range. - newStones := map[uint32]intervals{} + stones := map[uint32]intervals{} Outer: for p.Next() { @@ -251,15 +247,9 @@ Outer: for _, chk := range chunks { if intervalOverlap(mint, maxt, chk.MinTime, chk.MaxTime) { - // Delete only until the current maxtime and not beyond. - maxtime := chunks[len(chunks)-1].MaxTime - if maxtime > maxt { - maxtime = maxt - } - if mint < chunks[0].MinTime { - mint = chunks[0].MinTime - } - newStones[p.At()] = intervals{{mint, maxtime}} + // Delete only until the current vlaues and not beyond. + mint, maxt = clampInterval(mint, maxt, chunks[0].MinTime, chunks[len(chunks)-1].MaxTime) + stones[p.At()] = intervals{{mint, maxt}} continue Outer } } @@ -270,21 +260,32 @@ Outer: } // Merge the current and new tombstones. - for k, v := range newStones { - pb.tombstones[k] = pb.tombstones[k].add(v[0]) + for k, v := range stones { + pb.tombstones.add(k, v[0]) } if err := writeTombstoneFile(pb.dir, pb.tombstones); err != nil { return err } - pb.meta.NumTombstones = int64(len(pb.tombstones)) + pb.meta.Stats.NumTombstones = uint64(len(pb.tombstones)) return writeMetaFile(pb.dir, &pb.meta) } func chunkDir(dir string) string { return filepath.Join(dir, "chunks") } func walDir(dir string) string { return filepath.Join(dir, "wal") } +func clampInterval(a, b, mint, maxt int64) (int64, int64) { + if a < mint { + a = mint + } + if b > maxt { + b = maxt + } + + return a, b +} + type mmapFile struct { f *os.File b []byte diff --git a/block_test.go b/block_test.go index 35178ff493..e75d4ac3f2 100644 --- a/block_test.go +++ b/block_test.go @@ -1 +1,14 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package tsdb diff --git a/compact.go b/compact.go index 6bf7988339..46ce52eab0 100644 --- a/compact.go +++ b/compact.go @@ -428,7 +428,7 @@ func (c *compactionSeriesSet) Next() bool { return false } - c.intervals = c.tombstones.At(c.p.At()) + c.intervals = c.tombstones.Get(c.p.At()) c.l, c.c, c.err = c.index.Series(c.p.At()) if c.err != nil { diff --git a/encoding_helpers.go b/encoding_helpers.go index c1ea902a79..25ff32d00b 100644 --- a/encoding_helpers.go +++ b/encoding_helpers.go @@ -154,7 +154,6 @@ func (d *decbuf) byte() byte { x := d.b[0] d.b = d.b[1:] return x - } func (d *decbuf) decbuf(l int) decbuf { diff --git a/head.go b/head.go index 704c0b222b..833f024112 100644 --- a/head.go +++ b/head.go @@ -150,10 +150,10 @@ func (h *HeadBlock) init() error { return nil } - deletesFunc := func(stones []stone) error { + deletesFunc := func(stones []Stone) error { for _, s := range stones { for _, itv := range s.intervals { - h.tombstones[s.ref] = h.tombstones[s.ref].add(itv) + h.tombstones.add(s.ref, itv) } } @@ -230,7 +230,8 @@ func (h *HeadBlock) Delete(mint int64, maxt int64, ms ...labels.Matcher) error { pr := newPostingsReader(ir) p, absent := pr.Select(ms...) - newStones := make(map[uint32]intervals) + var stones []Stone + Outer: for p.Next() { ref := p.At() @@ -242,29 +243,22 @@ Outer: } // Delete only until the current values and not beyond. - maxtime := h.series[ref].head().maxTime - if maxtime > maxt { - maxtime = maxt - } - if mint < h.series[ref].chunks[0].minTime { - mint = h.series[ref].chunks[0].minTime - } - - newStones[ref] = intervals{{mint, maxtime}} + mint, maxt = clampInterval(mint, maxt, h.series[ref].chunks[0].minTime, h.series[ref].head().maxTime) + stones = append(stones, Stone{ref, intervals{{mint, maxt}}}) } if p.Err() != nil { return p.Err() } - if err := h.wal.LogDeletes(newTombstoneReader(newStones)); err != nil { + if err := h.wal.LogDeletes(stones); err != nil { return err } - for k, v := range newStones { - h.tombstones[k] = h.tombstones[k].add(v[0]) + for _, s := range stones { + h.tombstones.add(s.ref, s.intervals[0]) } - h.meta.NumTombstones = int64(len(h.tombstones)) + h.meta.Stats.NumTombstones = uint64(len(h.tombstones)) return nil } @@ -510,14 +504,13 @@ func (a *headAppender) Commit() error { } } - var err MultiError - // Write all new series and samples to the WAL and add it to the // in-mem database on success. - err.Add(a.wal.LogSeries(a.newLabels)) - err.Add(a.wal.LogSamples(a.samples)) - if err.Err() != nil { - return err.Err() + if err := a.wal.LogSeries(a.newLabels); err != nil { + return errors.Wrap(err, "WAL log series") + } + if err := a.wal.LogSamples(a.samples); err != nil { + return errors.Wrap(err, "WAL log samples") } total := uint64(len(a.samples)) diff --git a/querier.go b/querier.go index 49cd013f43..601fc74407 100644 --- a/querier.go +++ b/querier.go @@ -412,7 +412,7 @@ Outer: s.lset = lset s.chks = chunks - s.intervals = s.tombstones.At(s.p.At()) + s.intervals = s.tombstones.Get(s.p.At()) if len(s.intervals) > 0 { // Only those chunks that are not entirely deleted. diff --git a/tombstones.go b/tombstones.go index 7abb025a1b..612b3029fe 100644 --- a/tombstones.go +++ b/tombstones.go @@ -1,3 +1,16 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package tsdb import ( @@ -66,16 +79,16 @@ func writeTombstoneFile(dir string, tr tombstoneReader) error { return renameFile(tmp, path) } -// stone holds the information on the posting and time-range +// Stone holds the information on the posting and time-range // that is deleted. -type stone struct { +type Stone struct { ref uint32 intervals intervals } // TombstoneReader is the iterator over tombstones. type TombstoneReader interface { - At(ref uint32) intervals + Get(ref uint32) intervals } func readTombstones(dir string) (tombstoneReader, error) { @@ -84,6 +97,10 @@ func readTombstones(dir string) (tombstoneReader, error) { return nil, err } + if len(b) < 5 { + return nil, errors.Wrap(errInvalidSize, "tombstones header") + } + d := &decbuf{b: b[:len(b)-4]} // 4 for the checksum. if mg := d.be32(); mg != MagicTombstone { return nil, fmt.Errorf("invalid magic number %x", mg) @@ -92,6 +109,10 @@ func readTombstones(dir string) (tombstoneReader, error) { return nil, fmt.Errorf("invalid tombstone format %x", flag) } + if d.err() != nil { + return nil, d.err() + } + // Verify checksum hash := crc32.New(crc32.MakeTable(crc32.Castagnoli)) if _, err := hash.Write(d.get()); err != nil { @@ -101,7 +122,7 @@ func readTombstones(dir string) (tombstoneReader, error) { return nil, errors.New("checksum did not match") } - stonesMap := make(map[uint32]intervals) + stonesMap := newEmptyTombstoneReader() for d.len() > 0 { k := d.uvarint32() mint := d.varint64() @@ -110,7 +131,7 @@ func readTombstones(dir string) (tombstoneReader, error) { return nil, d.err() } - stonesMap[k] = stonesMap[k].add(interval{mint, maxt}) + stonesMap.add(k, interval{mint, maxt}) } return newTombstoneReader(stonesMap), nil @@ -126,10 +147,14 @@ func newEmptyTombstoneReader() tombstoneReader { return tombstoneReader(make(map[uint32]intervals)) } -func (t tombstoneReader) At(ref uint32) intervals { +func (t tombstoneReader) Get(ref uint32) intervals { return t[ref] } +func (t tombstoneReader) add(ref uint32, itv interval) { + t[ref] = t[ref].add(itv) +} + type interval struct { mint, maxt int64 } diff --git a/tombstones_test.go b/tombstones_test.go index 6469d0fbe2..bc6199f114 100644 --- a/tombstones_test.go +++ b/tombstones_test.go @@ -1,3 +1,16 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package tsdb import ( diff --git a/wal.go b/wal.go index bf0b1af871..50ddb6e346 100644 --- a/wal.go +++ b/wal.go @@ -56,7 +56,7 @@ type SamplesCB func([]RefSample) error type SeriesCB func([]labels.Labels) error // DeletesCB is the callback after reading deletes. -type DeletesCB func([]stone) error +type DeletesCB func([]Stone) error // SegmentWAL is a write ahead log for series data. type SegmentWAL struct { @@ -83,7 +83,7 @@ type WAL interface { Reader() WALReader LogSeries([]labels.Labels) error LogSamples([]RefSample) error - LogDeletes(tombstoneReader) error + LogDeletes([]Stone) error Close() error } @@ -180,8 +180,8 @@ func (w *SegmentWAL) LogSamples(samples []RefSample) error { } // LogDeletes write a batch of new deletes to the log. -func (w *SegmentWAL) LogDeletes(tr tombstoneReader) error { - if err := w.encodeDeletes(tr); err != nil { +func (w *SegmentWAL) LogDeletes(stones []Stone) error { + if err := w.encodeDeletes(stones); err != nil { return err } @@ -483,14 +483,14 @@ func (w *SegmentWAL) encodeSamples(samples []RefSample) error { return w.entry(WALEntrySamples, walSamplesSimple, buf) } -func (w *SegmentWAL) encodeDeletes(tr tombstoneReader) error { +func (w *SegmentWAL) encodeDeletes(stones []Stone) error { b := make([]byte, 2*binary.MaxVarintLen64) eb := &encbuf{b: b} buf := getWALBuffer() - for k, v := range tr { - for _, itv := range v { + for _, s := range stones { + for _, itv := range s.intervals { eb.reset() - eb.putUvarint32(k) + eb.putUvarint32(s.ref) eb.putVarint64(itv.mint) eb.putVarint64(itv.maxt) buf = append(buf, eb.get()...) @@ -509,13 +509,9 @@ type walReader struct { buf []byte crc32 hash.Hash32 - samples []RefSample - series []labels.Labels - stones []stone - - samplesFunc SamplesCB - seriesFunc SeriesCB - deletesFunc DeletesCB + curType WALEntryType + curFlag byte + curBuf []byte err error } @@ -538,11 +534,30 @@ func (r *walReader) Err() error { } func (r *walReader) Read(seriesf SeriesCB, samplesf SamplesCB, deletesf DeletesCB) error { - r.samplesFunc = samplesf - r.seriesFunc = seriesf - r.deletesFunc = deletesf - for r.next() { + et, flag, b := r.at() + // In decoding below we never return a walCorruptionErr for now. + // Those should generally be catched by entry decoding before. + switch et { + case WALEntrySeries: + s, err := r.decodeSeries(flag, b) + if err != nil { + return err + } + seriesf(s) + case WALEntrySamples: + s, err := r.decodeSamples(flag, b) + if err != nil { + return err + } + samplesf(s) + case WALEntryDeletes: + s, err := r.decodeDeletes(flag, b) + if err != nil { + return err + } + deletesf(s) + } } return r.Err() @@ -570,13 +585,13 @@ func (r *walReader) nextEntry() (WALEntryType, byte, []byte, error) { return et, flag, b, err } +func (r *walReader) at() (WALEntryType, byte, []byte) { + return r.curType, r.curFlag, r.curBuf +} + // next returns decodes the next entry pair and returns true // if it was succesful. func (r *walReader) next() bool { - r.series = r.series[:0] - r.samples = r.samples[:0] - r.stones = r.stones[:0] - if r.cur >= len(r.wal.files) { return false } @@ -614,16 +629,9 @@ func (r *walReader) next() bool { return false } - // In decoding below we never return a walCorruptionErr for now. - // Those should generally be catched by entry decoding before. - switch et { - case WALEntrySeries: - r.err = r.decodeSeries(flag, b) - case WALEntrySamples: - r.err = r.decodeSamples(flag, b) - case WALEntryDeletes: - r.err = r.decodeDeletes(flag, b) - } + r.curType = et + r.curFlag = flag + r.curBuf = b return r.err == nil } @@ -707,11 +715,12 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) { return etype, flag, buf, nil } -func (r *walReader) decodeSeries(flag byte, b []byte) error { +func (r *walReader) decodeSeries(flag byte, b []byte) ([]labels.Labels, error) { + series := []labels.Labels{} for len(b) > 0 { l, n := binary.Uvarint(b) if n < 1 { - return errors.Wrap(errInvalidSize, "number of labels") + return nil, errors.Wrap(errInvalidSize, "number of labels") } b = b[n:] lset := make(labels.Labels, l) @@ -719,29 +728,29 @@ func (r *walReader) decodeSeries(flag byte, b []byte) error { for i := 0; i < int(l); i++ { nl, n := binary.Uvarint(b) if n < 1 || len(b) < n+int(nl) { - return errors.Wrap(errInvalidSize, "label name") + return nil, errors.Wrap(errInvalidSize, "label name") } lset[i].Name = string(b[n : n+int(nl)]) b = b[n+int(nl):] vl, n := binary.Uvarint(b) if n < 1 || len(b) < n+int(vl) { - return errors.Wrap(errInvalidSize, "label value") + return nil, errors.Wrap(errInvalidSize, "label value") } lset[i].Value = string(b[n : n+int(vl)]) b = b[n+int(vl):] } - r.series = append(r.series, lset) + series = append(series, lset) } - return r.seriesFunc(r.series) + return series, nil } -func (r *walReader) decodeSamples(flag byte, b []byte) error { - r.samples = r.samples[:] +func (r *walReader) decodeSamples(flag byte, b []byte) ([]RefSample, error) { + samples := []RefSample{} if len(b) < 16 { - return errors.Wrap(errInvalidSize, "header length") + return nil, errors.Wrap(errInvalidSize, "header length") } var ( baseRef = binary.BigEndian.Uint64(b) @@ -754,7 +763,7 @@ func (r *walReader) decodeSamples(flag byte, b []byte) error { dref, n := binary.Varint(b) if n < 1 { - return errors.Wrap(errInvalidSize, "sample ref delta") + return nil, errors.Wrap(errInvalidSize, "sample ref delta") } b = b[n:] @@ -762,36 +771,36 @@ func (r *walReader) decodeSamples(flag byte, b []byte) error { dtime, n := binary.Varint(b) if n < 1 { - return errors.Wrap(errInvalidSize, "sample timestamp delta") + return nil, errors.Wrap(errInvalidSize, "sample timestamp delta") } b = b[n:] smpl.T = baseTime + dtime if len(b) < 8 { - return errors.Wrapf(errInvalidSize, "sample value bits %d", len(b)) + return nil, errors.Wrapf(errInvalidSize, "sample value bits %d", len(b)) } smpl.V = float64(math.Float64frombits(binary.BigEndian.Uint64(b))) b = b[8:] - r.samples = append(r.samples, smpl) + samples = append(samples, smpl) } - return r.samplesFunc(r.samples) + return samples, nil } -func (r *walReader) decodeDeletes(flag byte, b []byte) error { +func (r *walReader) decodeDeletes(flag byte, b []byte) ([]Stone, error) { db := &decbuf{b: b} - r.samples = r.samples[:] + stones := []Stone{} for db.len() > 0 { - var s stone + var s Stone s.ref = db.uvarint32() s.intervals = intervals{{db.varint64(), db.varint64()}} if db.err() != nil { - return db.err() + return nil, db.err() } - r.stones = append(r.stones, s) + stones = append(stones, s) } - return r.deletesFunc(r.stones) + return stones, nil } diff --git a/wal_test.go b/wal_test.go index f706849d44..23667fb9a0 100644 --- a/wal_test.go +++ b/wal_test.go @@ -149,7 +149,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( recordedSeries [][]labels.Labels recordedSamples [][]RefSample - recordedDeletes []tombstoneReader + recordedDeletes [][]Stone ) var totalSamples int @@ -167,7 +167,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( resultSeries [][]labels.Labels resultSamples [][]RefSample - resultDeletes []tombstoneReader + resultDeletes [][]Stone ) serf := func(lsets []labels.Labels) error { @@ -189,13 +189,9 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { return nil } - delf := func(stones []stone) error { + delf := func(stones []Stone) error { if len(stones) > 0 { - dels := make(map[uint32]intervals) - for _, s := range stones { - dels[s.ref] = s.intervals - } - resultDeletes = append(resultDeletes, newTombstoneReader(dels)) + resultDeletes = append(resultDeletes, stones) } return nil @@ -212,7 +208,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { // Insert in batches and generate different amounts of samples for each. for i := 0; i < len(series); i += stepSize { var samples []RefSample - stones := map[uint32]intervals{} + var stones []Stone for j := 0; j < i*10; j++ { samples = append(samples, RefSample{ @@ -224,14 +220,14 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { for j := 0; j < i*20; j++ { ts := rand.Int63() - stones[rand.Uint32()] = intervals{{ts, ts + rand.Int63n(10000)}} + stones = append(stones, Stone{rand.Uint32(), intervals{{ts, ts + rand.Int63n(10000)}}}) } lbls := series[i : i+stepSize] require.NoError(t, w.LogSeries(lbls)) require.NoError(t, w.LogSamples(samples)) - require.NoError(t, w.LogDeletes(newTombstoneReader(stones))) + require.NoError(t, w.LogDeletes(stones)) if len(lbls) > 0 { recordedSeries = append(recordedSeries, lbls) @@ -241,8 +237,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { totalSamples += len(samples) } if len(stones) > 0 { - tr := newTombstoneReader(stones) - recordedDeletes = append(recordedDeletes, tr) + recordedDeletes = append(recordedDeletes, stones) } } @@ -350,7 +345,7 @@ func TestWALRestoreCorrupted(t *testing.T) { require.Equal(t, 0, len(l)) return nil } - delf := func([]stone) error { return nil } + delf := func([]Stone) error { return nil } // Weird hack to check order of reads. i := 0 From 29c73f05f2954a612b52c1931cc0eb2b8cf68363 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Sat, 27 May 2017 21:59:49 +0530 Subject: [PATCH 25/25] Make sure that mint and maxt are not modified. Signed-off-by: Goutham Veeramachaneni --- block.go | 4 ++-- head.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/block.go b/block.go index 6f21164909..1351efae7b 100644 --- a/block.go +++ b/block.go @@ -248,8 +248,8 @@ Outer: for _, chk := range chunks { if intervalOverlap(mint, maxt, chk.MinTime, chk.MaxTime) { // Delete only until the current vlaues and not beyond. - mint, maxt = clampInterval(mint, maxt, chunks[0].MinTime, chunks[len(chunks)-1].MaxTime) - stones[p.At()] = intervals{{mint, maxt}} + tmin, tmax := clampInterval(mint, maxt, chunks[0].MinTime, chunks[len(chunks)-1].MaxTime) + stones[p.At()] = intervals{{tmin, tmax}} continue Outer } } diff --git a/head.go b/head.go index 833f024112..7f93d1658a 100644 --- a/head.go +++ b/head.go @@ -243,8 +243,8 @@ Outer: } // Delete only until the current values and not beyond. - mint, maxt = clampInterval(mint, maxt, h.series[ref].chunks[0].minTime, h.series[ref].head().maxTime) - stones = append(stones, Stone{ref, intervals{{mint, maxt}}}) + tmin, tmax := clampInterval(mint, maxt, h.series[ref].chunks[0].minTime, h.series[ref].head().maxTime) + stones = append(stones, Stone{ref, intervals{{tmin, tmax}}}) } if p.Err() != nil {