From bc506ce959b406be13cbc00edd43eb13fbe8496b Mon Sep 17 00:00:00 2001 From: Fabian Reinartz Date: Mon, 4 Jul 2016 10:08:49 +0200 Subject: [PATCH] vendor: update goleveldb dependencies --- .../syndtr/goleveldb/leveldb/batch.go | 54 +- .../syndtr/goleveldb/leveldb/cache/cache.go | 14 +- .../syndtr/goleveldb/leveldb/comparer.go | 12 +- .../github.com/syndtr/goleveldb/leveldb/db.go | 322 +++++----- .../syndtr/goleveldb/leveldb/db_compaction.go | 217 ++++--- .../syndtr/goleveldb/leveldb/db_iter.go | 52 +- .../syndtr/goleveldb/leveldb/db_snapshot.go | 8 +- .../syndtr/goleveldb/leveldb/db_state.go | 58 +- .../goleveldb/leveldb/db_transaction.go | 289 +++++++++ .../syndtr/goleveldb/leveldb/db_util.go | 62 +- .../syndtr/goleveldb/leveldb/db_write.go | 48 +- .../syndtr/goleveldb/leveldb/errors/errors.go | 20 +- .../syndtr/goleveldb/leveldb/filter.go | 4 +- .../goleveldb/leveldb/journal/journal.go | 3 +- .../syndtr/goleveldb/leveldb/key.go | 95 +-- .../syndtr/goleveldb/leveldb/opt/options.go | 58 +- .../syndtr/goleveldb/leveldb/options.go | 41 +- .../syndtr/goleveldb/leveldb/session.go | 64 +- .../goleveldb/leveldb/session_compaction.go | 125 ++-- .../goleveldb/leveldb/session_record.go | 92 +-- .../syndtr/goleveldb/leveldb/session_util.go | 103 ++-- .../goleveldb/leveldb/storage/file_storage.go | 582 ++++++++++-------- .../leveldb/storage/file_storage_nacl.go | 34 + .../leveldb/storage/file_storage_plan9.go | 17 +- .../leveldb/storage/file_storage_solaris.go | 25 +- .../leveldb/storage/file_storage_unix.go | 25 +- .../leveldb/storage/file_storage_windows.go | 17 +- .../goleveldb/leveldb/storage/mem_storage.go | 243 ++++---- .../goleveldb/leveldb/storage/storage.go | 148 ++--- .../syndtr/goleveldb/leveldb/table.go | 88 +-- .../syndtr/goleveldb/leveldb/table/reader.go | 12 +- .../syndtr/goleveldb/leveldb/util.go | 14 +- .../syndtr/goleveldb/leveldb/util/hash.go | 28 +- .../syndtr/goleveldb/leveldb/util/pool.go | 21 - .../goleveldb/leveldb/util/pool_legacy.go | 33 - .../syndtr/goleveldb/leveldb/version.go | 308 +++++---- vendor/vendor.json | 60 +- 37 files changed, 1991 insertions(+), 1405 deletions(-) create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go delete mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/pool.go delete mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go index ccf390c9c..501006717 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go @@ -12,8 +12,10 @@ import ( "github.com/syndtr/goleveldb/leveldb/errors" "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/storage" ) +// ErrBatchCorrupted records reason of batch corruption. type ErrBatchCorrupted struct { Reason string } @@ -23,7 +25,7 @@ func (e *ErrBatchCorrupted) Error() string { } func newErrBatchCorrupted(reason string) error { - return errors.NewErrCorrupted(nil, &ErrBatchCorrupted{reason}) + return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason}) } const ( @@ -31,6 +33,7 @@ const ( batchGrowRec = 3000 ) +// BatchReplay wraps basic batch operations. type BatchReplay interface { Put(key, value []byte) Delete(key []byte) @@ -67,20 +70,20 @@ func (b *Batch) grow(n int) { } } -func (b *Batch) appendRec(kt kType, key, value []byte) { +func (b *Batch) appendRec(kt keyType, key, value []byte) { n := 1 + binary.MaxVarintLen32 + len(key) - if kt == ktVal { + if kt == keyTypeVal { n += binary.MaxVarintLen32 + len(value) } b.grow(n) off := len(b.data) data := b.data[:off+n] data[off] = byte(kt) - off += 1 + off++ off += binary.PutUvarint(data[off:], uint64(len(key))) copy(data[off:], key) off += len(key) - if kt == ktVal { + if kt == keyTypeVal { off += binary.PutUvarint(data[off:], uint64(len(value))) copy(data[off:], value) off += len(value) @@ -94,13 +97,13 @@ func (b *Batch) appendRec(kt kType, key, value []byte) { // Put appends 'put operation' of the given key/value pair to the batch. // It is safe to modify the contents of the argument after Put returns. func (b *Batch) Put(key, value []byte) { - b.appendRec(ktVal, key, value) + b.appendRec(keyTypeVal, key, value) } // Delete appends 'delete operation' of the given key to the batch. // It is safe to modify the contents of the argument after Delete returns. func (b *Batch) Delete(key []byte) { - b.appendRec(ktDel, key, nil) + b.appendRec(keyTypeDel, key, nil) } // Dump dumps batch contents. The returned slice can be loaded into the @@ -121,13 +124,14 @@ func (b *Batch) Load(data []byte) error { // Replay replays batch contents. func (b *Batch) Replay(r BatchReplay) error { - return b.decodeRec(func(i int, kt kType, key, value []byte) { + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { switch kt { - case ktVal: + case keyTypeVal: r.Put(key, value) - case ktDel: + case keyTypeDel: r.Delete(key) } + return nil }) } @@ -154,6 +158,7 @@ func (b *Batch) append(p *Batch) { b.grow(len(p.data) - batchHdrLen) b.data = append(b.data, p.data[batchHdrLen:]...) b.rLen += p.rLen + b.bLen += p.bLen } if p.sync { b.sync = true @@ -193,18 +198,19 @@ func (b *Batch) decode(prevSeq uint64, data []byte) error { return nil } -func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error) { +func (b *Batch) decodeRec(f func(i int, kt keyType, key, value []byte) error) error { off := batchHdrLen for i := 0; i < b.rLen; i++ { if off >= len(b.data) { return newErrBatchCorrupted("invalid records length") } - kt := kType(b.data[off]) - if kt > ktVal { + kt := keyType(b.data[off]) + if kt > keyTypeVal { + panic(kt) return newErrBatchCorrupted("bad record: invalid type") } - off += 1 + off++ x, n := binary.Uvarint(b.data[off:]) off += n @@ -214,7 +220,7 @@ func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error key := b.data[off : off+int(x)] off += int(x) var value []byte - if kt == ktVal { + if kt == keyTypeVal { x, n := binary.Uvarint(b.data[off:]) off += n if n <= 0 || off+int(x) > len(b.data) { @@ -224,16 +230,19 @@ func (b *Batch) decodeRec(f func(i int, kt kType, key, value []byte)) (err error off += int(x) } - f(i, kt, key, value) + if err := f(i, kt, key, value); err != nil { + return err + } } return nil } func (b *Batch) memReplay(to *memdb.DB) error { - return b.decodeRec(func(i int, kt kType, key, value []byte) { - ikey := newIkey(key, b.seq+uint64(i), kt) - to.Put(ikey, value) + var ikScratch []byte + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { + ikScratch = makeInternalKey(ikScratch, key, b.seq+uint64(i), kt) + return to.Put(ikScratch, value) }) } @@ -245,8 +254,9 @@ func (b *Batch) memDecodeAndReplay(prevSeq uint64, data []byte, to *memdb.DB) er } func (b *Batch) revertMemReplay(to *memdb.DB) error { - return b.decodeRec(func(i int, kt kType, key, value []byte) { - ikey := newIkey(key, b.seq+uint64(i), kt) - to.Delete(ikey) + var ikScratch []byte + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { + ikScratch := makeInternalKey(ikScratch, key, b.seq+uint64(i), kt) + return to.Delete(ikScratch) }) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go index c9670de5d..a287d0e5e 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go @@ -47,17 +47,21 @@ type Cacher interface { // so the the Release method will be called once object is released. type Value interface{} -type CacheGetter struct { +// NamespaceGetter provides convenient wrapper for namespace. +type NamespaceGetter struct { Cache *Cache NS uint64 } -func (g *CacheGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle { +// Get simply calls Cache.Get() method. +func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle { return g.Cache.Get(g.NS, key, setFunc) } // The hash tables implementation is based on: -// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, Kunlong Zhang, and Michael Spear. ACM Symposium on Principles of Distributed Computing, Jul 2014. +// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu, +// Kunlong Zhang, and Michael Spear. +// ACM Symposium on Principles of Distributed Computing, Jul 2014. const ( mInitialSize = 1 << 4 @@ -610,10 +614,12 @@ func (n *Node) unrefLocked() { } } +// Handle is a 'cache handle' of a 'cache node'. type Handle struct { n unsafe.Pointer // *Node } +// Value returns the value of the 'cache node'. func (h *Handle) Value() Value { n := (*Node)(atomic.LoadPointer(&h.n)) if n != nil { @@ -622,6 +628,8 @@ func (h *Handle) Value() Value { return nil } +// Release releases this 'cache handle'. +// It is safe to call release multiple times. func (h *Handle) Release() { nPtr := atomic.LoadPointer(&h.n) if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) { diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go index d33d5e9c7..248bf7c21 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go @@ -33,9 +33,9 @@ func (icmp *iComparer) Name() string { } func (icmp *iComparer) Compare(a, b []byte) int { - x := icmp.ucmp.Compare(iKey(a).ukey(), iKey(b).ukey()) + x := icmp.ucmp.Compare(internalKey(a).ukey(), internalKey(b).ukey()) if x == 0 { - if m, n := iKey(a).num(), iKey(b).num(); m > n { + if m, n := internalKey(a).num(), internalKey(b).num(); m > n { x = -1 } else if m < n { x = 1 @@ -45,13 +45,13 @@ func (icmp *iComparer) Compare(a, b []byte) int { } func (icmp *iComparer) Separator(dst, a, b []byte) []byte { - ua, ub := iKey(a).ukey(), iKey(b).ukey() + ua, ub := internalKey(a).ukey(), internalKey(b).ukey() dst = icmp.ucmp.Separator(dst, ua, ub) if dst == nil { return nil } if len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 { - dst = append(dst, kMaxNumBytes...) + dst = append(dst, keyMaxNumBytes...) } else { // Did not close possibilities that n maybe longer than len(ub). dst = append(dst, a[len(a)-8:]...) @@ -60,13 +60,13 @@ func (icmp *iComparer) Separator(dst, a, b []byte) []byte { } func (icmp *iComparer) Successor(dst, b []byte) []byte { - ub := iKey(b).ukey() + ub := internalKey(b).ukey() dst = icmp.ucmp.Successor(dst, ub) if dst == nil { return nil } if len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 { - dst = append(dst, kMaxNumBytes...) + dst = append(dst, keyMaxNumBytes...) } else { // Did not close possibilities that n maybe longer than len(ub). dst = append(dst, b[len(b)-8:]...) diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db.go b/vendor/github.com/syndtr/goleveldb/leveldb/db.go index 88a3e0db3..eb6abd0fb 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db.go @@ -36,14 +36,14 @@ type DB struct { s *session // MemDB. - memMu sync.RWMutex - memPool chan *memdb.DB - mem, frozenMem *memDB - journal *journal.Writer - journalWriter storage.Writer - journalFile storage.File - frozenJournalFile storage.File - frozenSeq uint64 + memMu sync.RWMutex + memPool chan *memdb.DB + mem, frozenMem *memDB + journal *journal.Writer + journalWriter storage.Writer + journalFd storage.FileDesc + frozenJournalFd storage.FileDesc + frozenSeq uint64 // Snapshot. snapsMu sync.Mutex @@ -61,8 +61,10 @@ type DB struct { writeDelayN int journalC chan *Batch journalAckC chan error + tr *Transaction // Compaction. + compCommitLk sync.Mutex tcompCmdC chan cCmd tcompPauseC chan chan<- struct{} mcompCmdC chan cCmd @@ -70,7 +72,8 @@ type DB struct { compPerErrC chan error compErrSetC chan error compWriteLocking bool - compStats []cStats + compStats cStats + memdbMaxLevel int // For testing. // Close. closeW sync.WaitGroup @@ -104,7 +107,6 @@ func openDB(s *session) (*DB, error) { compErrC: make(chan error), compPerErrC: make(chan error), compErrSetC: make(chan error), - compStats: make([]cStats, s.o.GetNumLevel()), // Close closeC: make(chan struct{}), } @@ -209,7 +211,7 @@ func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) { // The returned DB instance is goroutine-safe. // The DB must be closed after use, by calling Close method. func OpenFile(path string, o *opt.Options) (db *DB, err error) { - stor, err := storage.OpenFile(path) + stor, err := storage.OpenFile(path, o.GetReadOnly()) if err != nil { return } @@ -259,7 +261,7 @@ func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) { // The returned DB instance is goroutine-safe. // The DB must be closed after use, by calling Close method. func RecoverFile(path string, o *opt.Options) (db *DB, err error) { - stor, err := storage.OpenFile(path) + stor, err := storage.OpenFile(path, false) if err != nil { return } @@ -278,12 +280,11 @@ func recoverTable(s *session, o *opt.Options) error { o.Strict &= ^opt.StrictReader // Get all tables and sort it by file number. - tableFiles_, err := s.getFiles(storage.TypeTable) + fds, err := s.stor.List(storage.TypeTable) if err != nil { return err } - tableFiles := files(tableFiles_) - tableFiles.sort() + sortFds(fds) var ( maxSeq uint64 @@ -296,17 +297,17 @@ func recoverTable(s *session, o *opt.Options) error { rec = &sessionRecord{} bpool = util.NewBufferPool(o.GetBlockSize() + 5) ) - buildTable := func(iter iterator.Iterator) (tmp storage.File, size int64, err error) { - tmp = s.newTemp() - writer, err := tmp.Create() + buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) { + tmpFd = s.newTemp() + writer, err := s.stor.Create(tmpFd) if err != nil { return } defer func() { writer.Close() if err != nil { - tmp.Remove() - tmp = nil + s.stor.Remove(tmpFd) + tmpFd = storage.FileDesc{} } }() @@ -314,7 +315,7 @@ func recoverTable(s *session, o *opt.Options) error { tw := table.NewWriter(writer, o) for iter.Next() { key := iter.Key() - if validIkey(key) { + if validInternalKey(key) { err = tw.Append(key, iter.Value()) if err != nil { return @@ -338,9 +339,9 @@ func recoverTable(s *session, o *opt.Options) error { size = int64(tw.BytesLen()) return } - recoverTable := func(file storage.File) error { - s.logf("table@recovery recovering @%d", file.Num()) - reader, err := file.Open() + recoverTable := func(fd storage.FileDesc) error { + s.logf("table@recovery recovering @%d", fd.Num) + reader, err := s.stor.Open(fd) if err != nil { return err } @@ -362,7 +363,7 @@ func recoverTable(s *session, o *opt.Options) error { tgoodKey, tcorruptedKey, tcorruptedBlock int imin, imax []byte ) - tr, err := table.NewReader(reader, size, storage.NewFileInfo(file), nil, bpool, o) + tr, err := table.NewReader(reader, size, fd, nil, bpool, o) if err != nil { return err } @@ -370,7 +371,7 @@ func recoverTable(s *session, o *opt.Options) error { if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok { itererr.SetErrorCallback(func(err error) { if errors.IsCorrupted(err) { - s.logf("table@recovery block corruption @%d %q", file.Num(), err) + s.logf("table@recovery block corruption @%d %q", fd.Num, err) tcorruptedBlock++ } }) @@ -379,7 +380,7 @@ func recoverTable(s *session, o *opt.Options) error { // Scan the table. for iter.Next() { key := iter.Key() - _, seq, _, kerr := parseIkey(key) + _, seq, _, kerr := parseInternalKey(key) if kerr != nil { tcorruptedKey++ continue @@ -405,23 +406,23 @@ func recoverTable(s *session, o *opt.Options) error { if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) { droppedTable++ - s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) + s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) return nil } if tgoodKey > 0 { if tcorruptedKey > 0 || tcorruptedBlock > 0 { // Rebuild the table. - s.logf("table@recovery rebuilding @%d", file.Num()) + s.logf("table@recovery rebuilding @%d", fd.Num) iter := tr.NewIterator(nil, nil) - tmp, newSize, err := buildTable(iter) + tmpFd, newSize, err := buildTable(iter) iter.Release() if err != nil { return err } closed = true reader.Close() - if err := file.Replace(tmp); err != nil { + if err := s.stor.Rename(tmpFd, fd); err != nil { return err } size = newSize @@ -431,30 +432,30 @@ func recoverTable(s *session, o *opt.Options) error { } recoveredKey += tgoodKey // Add table to level 0. - rec.addTable(0, file.Num(), uint64(size), imin, imax) - s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", file.Num(), tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) + rec.addTable(0, fd.Num, size, imin, imax) + s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq) } else { droppedTable++ - s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", file.Num(), tcorruptedKey, tcorruptedBlock, size) + s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size) } return nil } // Recover all tables. - if len(tableFiles) > 0 { - s.logf("table@recovery F·%d", len(tableFiles)) + if len(fds) > 0 { + s.logf("table@recovery F·%d", len(fds)) // Mark file number as used. - s.markFileNum(tableFiles[len(tableFiles)-1].Num()) + s.markFileNum(fds[len(fds)-1].Num) - for _, file := range tableFiles { - if err := recoverTable(file); err != nil { + for _, fd := range fds { + if err := recoverTable(fd); err != nil { return err } } - s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(tableFiles), recoveredKey, goodKey, corruptedKey, maxSeq) + s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq) } // Set sequence number. @@ -471,31 +472,31 @@ func recoverTable(s *session, o *opt.Options) error { func (db *DB) recoverJournal() error { // Get all journals and sort it by file number. - allJournalFiles, err := db.s.getFiles(storage.TypeJournal) + rawFds, err := db.s.stor.List(storage.TypeJournal) if err != nil { return err } - files(allJournalFiles).sort() + sortFds(rawFds) // Journals that will be recovered. - var recJournalFiles []storage.File - for _, jf := range allJournalFiles { - if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum { - recJournalFiles = append(recJournalFiles, jf) + var fds []storage.FileDesc + for _, fd := range rawFds { + if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { + fds = append(fds, fd) } } var ( - of storage.File // Obsolete file. + ofd storage.FileDesc // Obsolete file. rec = &sessionRecord{} ) // Recover journals. - if len(recJournalFiles) > 0 { - db.logf("journal@recovery F·%d", len(recJournalFiles)) + if len(fds) > 0 { + db.logf("journal@recovery F·%d", len(fds)) // Mark file number as used. - db.s.markFileNum(recJournalFiles[len(recJournalFiles)-1].Num()) + db.s.markFileNum(fds[len(fds)-1].Num) var ( // Options. @@ -509,31 +510,31 @@ func (db *DB) recoverJournal() error { batch = &Batch{} ) - for _, jf := range recJournalFiles { - db.logf("journal@recovery recovering @%d", jf.Num()) + for _, fd := range fds { + db.logf("journal@recovery recovering @%d", fd.Num) - fr, err := jf.Open() + fr, err := db.s.stor.Open(fd) if err != nil { return err } // Create or reset journal reader instance. if jr == nil { - jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum) + jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum) } else { - jr.Reset(fr, dropper{db.s, jf}, strict, checksum) + jr.Reset(fr, dropper{db.s, fd}, strict, checksum) } // Flush memdb and remove obsolete journal file. - if of != nil { + if !ofd.Nil() { if mdb.Len() > 0 { - if _, err := db.s.flushMemdb(rec, mdb, -1); err != nil { + if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil { fr.Close() return err } } - rec.setJournalNum(jf.Num()) + rec.setJournalNum(fd.Num) rec.setSeqNum(db.seq) if err := db.s.commit(rec); err != nil { fr.Close() @@ -541,8 +542,8 @@ func (db *DB) recoverJournal() error { } rec.resetAddedTables() - of.Remove() - of = nil + db.s.stor.Remove(ofd) + ofd = storage.FileDesc{} } // Replay journal to memdb. @@ -555,7 +556,7 @@ func (db *DB) recoverJournal() error { } fr.Close() - return errors.SetFile(err, jf) + return errors.SetFd(err, fd) } buf.Reset() @@ -566,7 +567,7 @@ func (db *DB) recoverJournal() error { } fr.Close() - return errors.SetFile(err, jf) + return errors.SetFd(err, fd) } if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil { if !strict && errors.IsCorrupted(err) { @@ -576,7 +577,7 @@ func (db *DB) recoverJournal() error { } fr.Close() - return errors.SetFile(err, jf) + return errors.SetFd(err, fd) } // Save sequence number. @@ -594,7 +595,7 @@ func (db *DB) recoverJournal() error { } fr.Close() - of = jf + ofd = fd } // Flush the last memdb. @@ -611,7 +612,7 @@ func (db *DB) recoverJournal() error { } // Commit. - rec.setJournalNum(db.journalFile.Num()) + rec.setJournalNum(db.journalFd.Num) rec.setSeqNum(db.seq) if err := db.s.commit(rec); err != nil { // Close journal on error. @@ -623,8 +624,8 @@ func (db *DB) recoverJournal() error { } // Remove the last obsolete journal file. - if of != nil { - of.Remove() + if !ofd.Nil() { + db.s.stor.Remove(ofd) } return nil @@ -632,17 +633,17 @@ func (db *DB) recoverJournal() error { func (db *DB) recoverJournalRO() error { // Get all journals and sort it by file number. - allJournalFiles, err := db.s.getFiles(storage.TypeJournal) + rawFds, err := db.s.stor.List(storage.TypeJournal) if err != nil { return err } - files(allJournalFiles).sort() + sortFds(rawFds) // Journals that will be recovered. - var recJournalFiles []storage.File - for _, jf := range allJournalFiles { - if jf.Num() >= db.s.stJournalNum || jf.Num() == db.s.stPrevJournalNum { - recJournalFiles = append(recJournalFiles, jf) + var fds []storage.FileDesc + for _, fd := range rawFds { + if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum { + fds = append(fds, fd) } } @@ -656,8 +657,8 @@ func (db *DB) recoverJournalRO() error { ) // Recover journals. - if len(recJournalFiles) > 0 { - db.logf("journal@recovery RO·Mode F·%d", len(recJournalFiles)) + if len(fds) > 0 { + db.logf("journal@recovery RO·Mode F·%d", len(fds)) var ( jr *journal.Reader @@ -665,19 +666,19 @@ func (db *DB) recoverJournalRO() error { batch = &Batch{} ) - for _, jf := range recJournalFiles { - db.logf("journal@recovery recovering @%d", jf.Num()) + for _, fd := range fds { + db.logf("journal@recovery recovering @%d", fd.Num) - fr, err := jf.Open() + fr, err := db.s.stor.Open(fd) if err != nil { return err } // Create or reset journal reader instance. if jr == nil { - jr = journal.NewReader(fr, dropper{db.s, jf}, strict, checksum) + jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum) } else { - jr.Reset(fr, dropper{db.s, jf}, strict, checksum) + jr.Reset(fr, dropper{db.s, fd}, strict, checksum) } // Replay journal to memdb. @@ -689,7 +690,7 @@ func (db *DB) recoverJournalRO() error { } fr.Close() - return errors.SetFile(err, jf) + return errors.SetFd(err, fd) } buf.Reset() @@ -700,7 +701,7 @@ func (db *DB) recoverJournalRO() error { } fr.Close() - return errors.SetFile(err, jf) + return errors.SetFd(err, fd) } if err := batch.memDecodeAndReplay(db.seq, buf.Bytes(), mdb); err != nil { if !strict && errors.IsCorrupted(err) { @@ -710,7 +711,7 @@ func (db *DB) recoverJournalRO() error { } fr.Close() - return errors.SetFile(err, jf) + return errors.SetFd(err, fd) } // Save sequence number. @@ -727,46 +728,35 @@ func (db *DB) recoverJournalRO() error { return nil } -func (db *DB) get(key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { - ikey := newIkey(key, seq, ktSeek) - - em, fm := db.getMems() - for _, m := range [...]*memDB{em, fm} { - if m == nil { - continue +func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) { + mk, mv, err := mdb.Find(ikey) + if err == nil { + ukey, _, kt, kerr := parseInternalKey(mk) + if kerr != nil { + // Shouldn't have had happen. + panic(kerr) } - defer m.decref() + if icmp.uCompare(ukey, ikey.ukey()) == 0 { + if kt == keyTypeDel { + return true, nil, ErrNotFound + } + return true, mv, nil - mk, mv, me := m.Find(ikey) - if me == nil { - ukey, _, kt, kerr := parseIkey(mk) - if kerr != nil { - // Shouldn't have had happen. - panic(kerr) - } - if db.s.icmp.uCompare(ukey, key) == 0 { - if kt == ktDel { - return nil, ErrNotFound - } - return append([]byte{}, mv...), nil - } - } else if me != ErrNotFound { - return nil, me } - } - - v := db.s.version() - value, cSched, err := v.get(ikey, ro, false) - v.release() - if cSched { - // Trigger table compaction. - db.compSendTrigger(db.tcompCmdC) + } else if err != ErrNotFound { + return true, nil, err } return } -func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) { - ikey := newIkey(key, seq, ktSeek) +func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) { + ikey := makeInternalKey(nil, key, seq, keyTypeSeek) + + if auxm != nil { + if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok { + return append([]byte{}, mv...), me + } + } em, fm := db.getMems() for _, m := range [...]*memDB{em, fm} { @@ -775,30 +765,55 @@ func (db *DB) has(key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err er } defer m.decref() - mk, _, me := m.Find(ikey) - if me == nil { - ukey, _, kt, kerr := parseIkey(mk) - if kerr != nil { - // Shouldn't have had happen. - panic(kerr) - } - if db.s.icmp.uCompare(ukey, key) == 0 { - if kt == ktDel { - return false, nil - } - return true, nil - } - } else if me != ErrNotFound { - return false, me + if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok { + return append([]byte{}, mv...), me } } v := db.s.version() - _, cSched, err := v.get(ikey, ro, true) + value, cSched, err := v.get(auxt, ikey, ro, false) v.release() if cSched { // Trigger table compaction. - db.compSendTrigger(db.tcompCmdC) + db.compTrigger(db.tcompCmdC) + } + return +} + +func nilIfNotFound(err error) error { + if err == ErrNotFound { + return nil + } + return err +} + +func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) { + ikey := makeInternalKey(nil, key, seq, keyTypeSeek) + + if auxm != nil { + if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok { + return me == nil, nilIfNotFound(me) + } + } + + em, fm := db.getMems() + for _, m := range [...]*memDB{em, fm} { + if m == nil { + continue + } + defer m.decref() + + if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok { + return me == nil, nilIfNotFound(me) + } + } + + v := db.s.version() + _, cSched, err := v.get(auxt, ikey, ro, true) + v.release() + if cSched { + // Trigger table compaction. + db.compTrigger(db.tcompCmdC) } if err == nil { ret = true @@ -822,7 +837,7 @@ func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) { se := db.acquireSnapshot() defer db.releaseSnapshot(se) - return db.get(key, se.seq, ro) + return db.get(nil, nil, key, se.seq, ro) } // Has returns true if the DB does contains the given key. @@ -836,11 +851,11 @@ func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) { se := db.acquireSnapshot() defer db.releaseSnapshot(se) - return db.has(key, se.seq, ro) + return db.has(nil, nil, key, se.seq, ro) } // NewIterator returns an iterator for the latest snapshot of the -// uderlying DB. +// underlying DB. // The returned iterator is not goroutine-safe, but it is safe to use // multiple iterators concurrently, with each in a dedicated goroutine. // It is also safe to use an iterator concurrently with modifying its @@ -864,7 +879,7 @@ func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Itera defer db.releaseSnapshot(se) // Iterator holds 'version' lock, 'version' is immutable so snapshot // can be released after iterator created. - return db.newIterator(se.seq, slice, ro) + return db.newIterator(nil, nil, se.seq, slice, ro) } // GetSnapshot returns a latest snapshot of the underlying DB. A snapshot @@ -920,7 +935,7 @@ func (db *DB) GetProperty(name string) (value string, err error) { var level uint var rest string n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest) - if n != 1 || int(level) >= db.s.o.GetNumLevel() { + if n != 1 { err = ErrNotFound } else { value = fmt.Sprint(v.tLen(int(level))) @@ -929,8 +944,8 @@ func (db *DB) GetProperty(name string) (value string, err error) { value = "Compactions\n" + " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" + "-------+------------+---------------+---------------+---------------+---------------\n" - for level, tables := range v.tables { - duration, read, write := db.compStats[level].get() + for level, tables := range v.levels { + duration, read, write := db.compStats.getStat(level) if len(tables) == 0 && duration == 0 { continue } @@ -939,10 +954,10 @@ func (db *DB) GetProperty(name string) (value string, err error) { float64(read)/1048576.0, float64(write)/1048576.0) } case p == "sstables": - for level, tables := range v.tables { + for level, tables := range v.levels { value += fmt.Sprintf("--- level %d ---\n", level) for _, t := range tables { - value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.file.Num(), t.size, t.imin, t.imax) + value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax) } } case p == "blockpool": @@ -982,8 +997,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { sizes := make(Sizes, 0, len(ranges)) for _, r := range ranges { - imin := newIkey(r.Start, kMaxSeq, ktSeek) - imax := newIkey(r.Limit, kMaxSeq, ktSeek) + imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek) + imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek) start, err := v.offsetOf(imin) if err != nil { return nil, err @@ -992,7 +1007,7 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { if err != nil { return nil, err } - var size uint64 + var size int64 if limit >= start { size = limit - start } @@ -1002,8 +1017,8 @@ func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) { return sizes, nil } -// Close closes the DB. This will also releases any outstanding snapshot and -// abort any in-flight compaction. +// Close closes the DB. This will also releases any outstanding snapshot, +// abort any in-flight compaction and discard open transaction. // // It is not safe to close a DB until all outstanding iterators are released. // It is valid to call Close multiple times. Other methods should not be @@ -1032,11 +1047,18 @@ func (db *DB) Close() error { // Signal all goroutines. close(db.closeC) + // Discard open transaction. + if db.tr != nil { + db.tr.Discard() + } + + // Acquire writer lock. + db.writeLockC <- struct{}{} + // Wait for all gorotines to exit. db.closeW.Wait() - // Lock writer and closes journal. - db.writeLockC <- struct{}{} + // Closes journal. if db.journal != nil { db.journal.Close() db.journalWriter.Close() @@ -1063,8 +1085,6 @@ func (db *DB) Close() error { db.frozenMem = nil db.journal = nil db.journalWriter = nil - db.journalFile = nil - db.frozenJournalFile = nil db.closer = nil return err diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go index 26003106e..659f00dc6 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go @@ -12,55 +12,76 @@ import ( "github.com/syndtr/goleveldb/leveldb/errors" "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/storage" ) var ( errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting") ) -type cStats struct { - sync.Mutex +type cStat struct { duration time.Duration - read uint64 - write uint64 + read int64 + write int64 } -func (p *cStats) add(n *cStatsStaging) { - p.Lock() +func (p *cStat) add(n *cStatStaging) { p.duration += n.duration p.read += n.read p.write += n.write - p.Unlock() } -func (p *cStats) get() (duration time.Duration, read, write uint64) { - p.Lock() - defer p.Unlock() +func (p *cStat) get() (duration time.Duration, read, write int64) { return p.duration, p.read, p.write } -type cStatsStaging struct { +type cStatStaging struct { start time.Time duration time.Duration on bool - read uint64 - write uint64 + read int64 + write int64 } -func (p *cStatsStaging) startTimer() { +func (p *cStatStaging) startTimer() { if !p.on { p.start = time.Now() p.on = true } } -func (p *cStatsStaging) stopTimer() { +func (p *cStatStaging) stopTimer() { if p.on { p.duration += time.Since(p.start) p.on = false } } +type cStats struct { + lk sync.Mutex + stats []cStat +} + +func (p *cStats) addStat(level int, n *cStatStaging) { + p.lk.Lock() + if level >= len(p.stats) { + newStats := make([]cStat, level+1) + copy(newStats, p.stats) + p.stats = newStats + } + p.stats[level].add(n) + p.lk.Unlock() +} + +func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) { + p.lk.Lock() + defer p.lk.Unlock() + if level < len(p.stats) { + return p.stats[level].get() + } + return +} + func (db *DB) compactionError() { var err error noerr: @@ -151,7 +172,7 @@ func (db *DB) compactionTransact(name string, t compactionTransactInterface) { disableBackoff = db.s.o.GetDisableCompactionBackoff() ) for n := 0; ; n++ { - // Check wether the DB is closed. + // Check whether the DB is closed. if db.isClosed() { db.logf("%s exiting", name) db.compactionExitTransact() @@ -235,6 +256,14 @@ func (db *DB) compactionExitTransact() { panic(errCompactionTransactExiting) } +func (db *DB) compactionCommit(name string, rec *sessionRecord) { + db.compCommitLk.Lock() + defer db.compCommitLk.Unlock() // Defer is necessary. + db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error { + return db.s.commit(rec) + }, nil) +} + func (db *DB) memCompaction() { mdb := db.getFrozenMem() if mdb == nil { @@ -265,41 +294,40 @@ func (db *DB) memCompaction() { var ( rec = &sessionRecord{} - stats = &cStatsStaging{} + stats = &cStatStaging{} flushLevel int ) + // Generate tables. db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) { stats.startTimer() - flushLevel, err = db.s.flushMemdb(rec, mdb.DB, -1) + flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel) stats.stopTimer() return }, func() error { for _, r := range rec.addedTables { db.logf("memdb@flush revert @%d", r.num) - f := db.s.getTableFile(r.num) - if err := f.Remove(); err != nil { + if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil { return err } } return nil }) - db.compactionTransactFunc("memdb@commit", func(cnt *compactionTransactCounter) (err error) { - stats.startTimer() - rec.setJournalNum(db.journalFile.Num()) - rec.setSeqNum(db.frozenSeq) - err = db.s.commit(rec) - stats.stopTimer() - return - }, nil) + rec.setJournalNum(db.journalFd.Num) + rec.setSeqNum(db.frozenSeq) + + // Commit. + stats.startTimer() + db.compactionCommit("memdb", rec) + stats.stopTimer() db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration) for _, r := range rec.addedTables { stats.write += r.size } - db.compStats[flushLevel].add(stats) + db.compStats.addStat(flushLevel, stats) // Drop frozen memdb. db.dropFrozenMem() @@ -315,7 +343,7 @@ func (db *DB) memCompaction() { } // Trigger table compaction. - db.compSendTrigger(db.tcompCmdC) + db.compTrigger(db.tcompCmdC) } type tableCompactionBuilder struct { @@ -323,7 +351,7 @@ type tableCompactionBuilder struct { s *session c *compaction rec *sessionRecord - stat0, stat1 *cStatsStaging + stat0, stat1 *cStatStaging snapHasLastUkey bool snapLastUkey []byte @@ -377,9 +405,9 @@ func (b *tableCompactionBuilder) flush() error { if err != nil { return err } - b.rec.addTableFile(b.c.level+1, t) + b.rec.addTableFile(b.c.sourceLevel+1, t) b.stat1.write += t.size - b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.level+1, t.file.Num(), b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax) + b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax) b.tw = nil return nil } @@ -424,7 +452,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { } ikey := iter.Key() - ukey, seq, kt, kerr := parseIkey(ikey) + ukey, seq, kt, kerr := parseInternalKey(ikey) if kerr == nil { shouldStop := !resumed && b.c.shouldStopBefore(ikey) @@ -450,14 +478,14 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { hasLastUkey = true lastUkey = append(lastUkey[:0], ukey...) - lastSeq = kMaxSeq + lastSeq = keyMaxSeq } switch { case lastSeq <= b.minSeq: // Dropped because newer entry for same user key exist fallthrough // (A) - case kt == ktDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey): + case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey): // For this user key: // (1) there is no data in higher levels // (2) data in lower levels will have larger seq numbers @@ -479,7 +507,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { // Don't drop corrupted keys. hasLastUkey = false lastUkey = lastUkey[:0] - lastSeq = kMaxSeq + lastSeq = keyMaxSeq b.kerrCnt++ } @@ -502,8 +530,7 @@ func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error { func (b *tableCompactionBuilder) revert() error { for _, at := range b.rec.addedTables { b.s.logf("table@build revert @%d", at.num) - f := b.s.getTableFile(at.num) - if err := f.Remove(); err != nil { + if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil { return err } } @@ -514,30 +541,28 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) { defer c.release() rec := &sessionRecord{} - rec.addCompPtr(c.level, c.imax) + rec.addCompPtr(c.sourceLevel, c.imax) if !noTrivial && c.trivial() { - t := c.tables[0][0] - db.logf("table@move L%d@%d -> L%d", c.level, t.file.Num(), c.level+1) - rec.delTable(c.level, t.file.Num()) - rec.addTableFile(c.level+1, t) - db.compactionTransactFunc("table@move", func(cnt *compactionTransactCounter) (err error) { - return db.s.commit(rec) - }, nil) + t := c.levels[0][0] + db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1) + rec.delTable(c.sourceLevel, t.fd.Num) + rec.addTableFile(c.sourceLevel+1, t) + db.compactionCommit("table-move", rec) return } - var stats [2]cStatsStaging - for i, tables := range c.tables { + var stats [2]cStatStaging + for i, tables := range c.levels { for _, t := range tables { stats[i].read += t.size // Insert deleted tables into record - rec.delTable(c.level+i, t.file.Num()) + rec.delTable(c.sourceLevel+i, t.fd.Num) } } sourceSize := int(stats[0].read + stats[1].read) minSeq := db.minSeq() - db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.level, len(c.tables[0]), c.level+1, len(c.tables[1]), shortenb(sourceSize), minSeq) + db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq) b := &tableCompactionBuilder{ db: db, @@ -547,49 +572,60 @@ func (db *DB) tableCompaction(c *compaction, noTrivial bool) { stat1: &stats[1], minSeq: minSeq, strict: db.s.o.GetStrict(opt.StrictCompaction), - tableSize: db.s.o.GetCompactionTableSize(c.level + 1), + tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1), } db.compactionTransact("table@build", b) - // Commit changes - db.compactionTransactFunc("table@commit", func(cnt *compactionTransactCounter) (err error) { - stats[1].startTimer() - defer stats[1].stopTimer() - return db.s.commit(rec) - }, nil) + // Commit. + stats[1].startTimer() + db.compactionCommit("table", rec) + stats[1].stopTimer() resultSize := int(stats[1].write) db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration) // Save compaction stats for i := range stats { - db.compStats[c.level+1].add(&stats[i]) + db.compStats.addStat(c.sourceLevel+1, &stats[i]) } } -func (db *DB) tableRangeCompaction(level int, umin, umax []byte) { +func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error { db.logf("table@compaction range L%d %q:%q", level, umin, umax) - if level >= 0 { - if c := db.s.getCompactionRange(level, umin, umax); c != nil { + if c := db.s.getCompactionRange(level, umin, umax, true); c != nil { db.tableCompaction(c, true) } } else { - v := db.s.version() - m := 1 - for i, t := range v.tables[1:] { - if t.overlaps(db.s.icmp, umin, umax, false) { - m = i + 1 - } - } - v.release() + // Retry until nothing to compact. + for { + compacted := false - for level := 0; level < m; level++ { - if c := db.s.getCompactionRange(level, umin, umax); c != nil { - db.tableCompaction(c, true) + // Scan for maximum level with overlapped tables. + v := db.s.version() + m := 1 + for i := m; i < len(v.levels); i++ { + tables := v.levels[i] + if tables.overlaps(db.s.icmp, umin, umax, false) { + m = i + } + } + v.release() + + for level := 0; level < m; level++ { + if c := db.s.getCompactionRange(level, umin, umax, false); c != nil { + db.tableCompaction(c, true) + compacted = true + } + } + + if !compacted { + break } } } + + return nil } func (db *DB) tableAutoCompaction() { @@ -616,11 +652,11 @@ type cCmd interface { ack(err error) } -type cIdle struct { +type cAuto struct { ackC chan<- error } -func (r cIdle) ack(err error) { +func (r cAuto) ack(err error) { if r.ackC != nil { defer func() { recover() @@ -644,13 +680,21 @@ func (r cRange) ack(err error) { } } -// This will trigger auto compation and/or wait for all compaction to be done. -func (db *DB) compSendIdle(compC chan<- cCmd) (err error) { +// This will trigger auto compaction but will not wait for it. +func (db *DB) compTrigger(compC chan<- cCmd) { + select { + case compC <- cAuto{}: + default: + } +} + +// This will trigger auto compaction and/or wait for all compaction to be done. +func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) { ch := make(chan error) defer close(ch) // Send cmd. select { - case compC <- cIdle{ch}: + case compC <- cAuto{ch}: case err = <-db.compErrC: return case _, _ = <-db.closeC: @@ -666,16 +710,8 @@ func (db *DB) compSendIdle(compC chan<- cCmd) (err error) { return err } -// This will trigger auto compaction but will not wait for it. -func (db *DB) compSendTrigger(compC chan<- cCmd) { - select { - case compC <- cIdle{}: - default: - } -} - // Send range compaction request. -func (db *DB) compSendRange(compC chan<- cCmd, level int, min, max []byte) (err error) { +func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) { ch := make(chan error) defer close(ch) // Send cmd. @@ -715,7 +751,7 @@ func (db *DB) mCompaction() { select { case x = <-db.mcompCmdC: switch x.(type) { - case cIdle: + case cAuto: db.memCompaction() x.ack(nil) x = nil @@ -776,11 +812,10 @@ func (db *DB) tCompaction() { } if x != nil { switch cmd := x.(type) { - case cIdle: + case cAuto: ackQ = append(ackQ, x) case cRange: - db.tableRangeCompaction(cmd.level, cmd.min, cmd.max) - x.ack(nil) + x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max)) default: panic("leveldb: unknown command") } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go index 656ae9856..03c24cdab 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go @@ -19,7 +19,7 @@ import ( ) var ( - errInvalidIkey = errors.New("leveldb: Iterator: invalid internal key") + errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key") ) type memdbReleaser struct { @@ -33,40 +33,50 @@ func (mr *memdbReleaser) Release() { }) } -func (db *DB) newRawIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { +func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader) em, fm := db.getMems() v := db.s.version() - ti := v.getIterators(slice, ro) - n := len(ti) + 2 - i := make([]iterator.Iterator, 0, n) + tableIts := v.getIterators(slice, ro) + n := len(tableIts) + len(auxt) + 3 + its := make([]iterator.Iterator, 0, n) + + if auxm != nil { + ami := auxm.NewIterator(slice) + ami.SetReleaser(&memdbReleaser{m: auxm}) + its = append(its, ami) + } + for _, t := range auxt { + its = append(its, v.s.tops.newIterator(t, slice, ro)) + } + emi := em.NewIterator(slice) emi.SetReleaser(&memdbReleaser{m: em}) - i = append(i, emi) + its = append(its, emi) if fm != nil { fmi := fm.NewIterator(slice) fmi.SetReleaser(&memdbReleaser{m: fm}) - i = append(i, fmi) + its = append(its, fmi) } - i = append(i, ti...) - strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader) - mi := iterator.NewMergedIterator(i, db.s.icmp, strict) + its = append(its, tableIts...) + mi := iterator.NewMergedIterator(its, db.s.icmp, strict) mi.SetReleaser(&versionReleaser{v: v}) return mi } -func (db *DB) newIterator(seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter { +func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter { var islice *util.Range if slice != nil { islice = &util.Range{} if slice.Start != nil { - islice.Start = newIkey(slice.Start, kMaxSeq, ktSeek) + islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek) } if slice.Limit != nil { - islice.Limit = newIkey(slice.Limit, kMaxSeq, ktSeek) + islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek) } } - rawIter := db.newRawIterator(islice, ro) + rawIter := db.newRawIterator(auxm, auxt, islice, ro) iter := &dbIter{ db: db, icmp: db.s.icmp, @@ -177,7 +187,7 @@ func (i *dbIter) Seek(key []byte) bool { return false } - ikey := newIkey(key, i.seq, ktSeek) + ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek) if i.iter.Seek(ikey) { i.dir = dirSOI return i.next() @@ -189,15 +199,15 @@ func (i *dbIter) Seek(key []byte) bool { func (i *dbIter) next() bool { for { - if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil { + if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil { i.sampleSeek() if seq <= i.seq { switch kt { - case ktDel: + case keyTypeDel: // Skip deleted key. i.key = append(i.key[:0], ukey...) i.dir = dirForward - case ktVal: + case keyTypeVal: if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 { i.key = append(i.key[:0], ukey...) i.value = append(i.value[:0], i.iter.Value()...) @@ -240,13 +250,13 @@ func (i *dbIter) prev() bool { del := true if i.iter.Valid() { for { - if ukey, seq, kt, kerr := parseIkey(i.iter.Key()); kerr == nil { + if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil { i.sampleSeek() if seq <= i.seq { if !del && i.icmp.uCompare(ukey, i.key) < 0 { return true } - del = (kt == ktDel) + del = (kt == keyTypeDel) if !del { i.key = append(i.key[:0], ukey...) i.value = append(i.value[:0], i.iter.Value()...) @@ -282,7 +292,7 @@ func (i *dbIter) Prev() bool { return i.Last() case dirForward: for i.iter.Prev() { - if ukey, _, _, kerr := parseIkey(i.iter.Key()); kerr == nil { + if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil { i.sampleSeek() if i.icmp.uCompare(ukey, i.key) < 0 { goto cont diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go index 0372848ff..977f65ba5 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go @@ -110,7 +110,7 @@ func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err er err = ErrSnapshotReleased return } - return snap.db.get(key, snap.elem.seq, ro) + return snap.db.get(nil, nil, key, snap.elem.seq, ro) } // Has returns true if the DB does contains the given key. @@ -127,10 +127,10 @@ func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) err = ErrSnapshotReleased return } - return snap.db.has(key, snap.elem.seq, ro) + return snap.db.has(nil, nil, key, snap.elem.seq, ro) } -// NewIterator returns an iterator for the snapshot of the uderlying DB. +// NewIterator returns an iterator for the snapshot of the underlying DB. // The returned iterator is not goroutine-safe, but it is safe to use // multiple iterators concurrently, with each in a dedicated goroutine. // It is also safe to use an iterator concurrently with modifying its @@ -158,7 +158,7 @@ func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterat } // Since iterator already hold version ref, it doesn't need to // hold snapshot ref. - return snap.db.newIterator(snap.elem.seq, slice, ro) + return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro) } // Release releases the snapshot. This will not release any returned diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go index 24671dd39..40f454da1 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go @@ -12,6 +12,7 @@ import ( "github.com/syndtr/goleveldb/leveldb/journal" "github.com/syndtr/goleveldb/leveldb/memdb" + "github.com/syndtr/goleveldb/leveldb/storage" ) type memDB struct { @@ -20,6 +21,10 @@ type memDB struct { ref int32 } +func (m *memDB) getref() int32 { + return atomic.LoadInt32(&m.ref) +} + func (m *memDB) incref() { atomic.AddInt32(&m.ref, 1) } @@ -48,11 +53,15 @@ func (db *DB) addSeq(delta uint64) { atomic.AddUint64(&db.seq, delta) } -func (db *DB) sampleSeek(ikey iKey) { +func (db *DB) setSeq(seq uint64) { + atomic.StoreUint64(&db.seq, seq) +} + +func (db *DB) sampleSeek(ikey internalKey) { v := db.s.version() if v.sampleSeek(ikey) { // Trigger table compaction. - db.compSendTrigger(db.tcompCmdC) + db.compTrigger(db.tcompCmdC) } v.release() } @@ -67,12 +76,18 @@ func (db *DB) mpoolPut(mem *memdb.DB) { } } -func (db *DB) mpoolGet() *memdb.DB { +func (db *DB) mpoolGet(n int) *memDB { + var mdb *memdb.DB select { - case mem := <-db.memPool: - return mem + case mdb = <-db.memPool: default: - return nil + } + if mdb == nil || mdb.Capacity() < n { + mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n)) + } + return &memDB{ + db: db, + DB: mdb, } } @@ -95,11 +110,10 @@ func (db *DB) mpoolDrain() { // Create new memdb and froze the old one; need external synchronization. // newMem only called synchronously by the writer. func (db *DB) newMem(n int) (mem *memDB, err error) { - num := db.s.allocFileNum() - file := db.s.getJournalFile(num) - w, err := file.Create() + fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()} + w, err := db.s.stor.Create(fd) if err != nil { - db.s.reuseFileNum(num) + db.s.reuseFileNum(fd.Num) return } @@ -115,20 +129,14 @@ func (db *DB) newMem(n int) (mem *memDB, err error) { } else { db.journal.Reset(w) db.journalWriter.Close() - db.frozenJournalFile = db.journalFile + db.frozenJournalFd = db.journalFd } db.journalWriter = w - db.journalFile = file + db.journalFd = fd db.frozenMem = db.mem - mdb := db.mpoolGet() - if mdb == nil || mdb.Capacity() < n { - mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n)) - } - mem = &memDB{ - db: db, - DB: mdb, - ref: 2, - } + mem = db.mpoolGet(n) + mem.incref() // for self + mem.incref() // for caller db.mem = mem // The seq only incremented by the writer. And whoever called newMem // should hold write lock, so no need additional synchronization here. @@ -181,12 +189,12 @@ func (db *DB) getFrozenMem() *memDB { // Drop frozen memdb; assume that frozen memdb isn't nil. func (db *DB) dropFrozenMem() { db.memMu.Lock() - if err := db.frozenJournalFile.Remove(); err != nil { - db.logf("journal@remove removing @%d %q", db.frozenJournalFile.Num(), err) + if err := db.s.stor.Remove(db.frozenJournalFd); err != nil { + db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err) } else { - db.logf("journal@remove removed @%d", db.frozenJournalFile.Num()) + db.logf("journal@remove removed @%d", db.frozenJournalFd.Num) } - db.frozenJournalFile = nil + db.frozenJournalFd = storage.FileDesc{} db.frozenMem.decref() db.frozenMem = nil db.memMu.Unlock() diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go new file mode 100644 index 000000000..fca88037b --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go @@ -0,0 +1,289 @@ +// Copyright (c) 2016, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +package leveldb + +import ( + "errors" + "sync" + "time" + + "github.com/syndtr/goleveldb/leveldb/iterator" + "github.com/syndtr/goleveldb/leveldb/opt" + "github.com/syndtr/goleveldb/leveldb/util" +) + +var errTransactionDone = errors.New("leveldb: transaction already closed") + +// Transaction is the transaction handle. +type Transaction struct { + db *DB + lk sync.RWMutex + seq uint64 + mem *memDB + tables tFiles + ikScratch []byte + rec sessionRecord + stats cStatStaging + closed bool +} + +// Get gets the value for the given key. It returns ErrNotFound if the +// DB does not contains the key. +// +// The returned slice is its own copy, it is safe to modify the contents +// of the returned slice. +// It is safe to modify the contents of the argument after Get returns. +func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) { + tr.lk.RLock() + defer tr.lk.RUnlock() + if tr.closed { + return nil, errTransactionDone + } + return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro) +} + +// Has returns true if the DB does contains the given key. +// +// It is safe to modify the contents of the argument after Has returns. +func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) { + tr.lk.RLock() + defer tr.lk.RUnlock() + if tr.closed { + return false, errTransactionDone + } + return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro) +} + +// NewIterator returns an iterator for the latest snapshot of the transaction. +// The returned iterator is not goroutine-safe, but it is safe to use multiple +// iterators concurrently, with each in a dedicated goroutine. +// It is also safe to use an iterator concurrently while writes to the +// transaction. The resultant key/value pairs are guaranteed to be consistent. +// +// Slice allows slicing the iterator to only contains keys in the given +// range. A nil Range.Start is treated as a key before all keys in the +// DB. And a nil Range.Limit is treated as a key after all keys in +// the DB. +// +// The iterator must be released after use, by calling Release method. +// +// Also read Iterator documentation of the leveldb/iterator package. +func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { + tr.lk.RLock() + defer tr.lk.RUnlock() + if tr.closed { + return iterator.NewEmptyIterator(errTransactionDone) + } + tr.mem.incref() + return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro) +} + +func (tr *Transaction) flush() error { + // Flush memdb. + if tr.mem.Len() != 0 { + tr.stats.startTimer() + iter := tr.mem.NewIterator(nil) + t, n, err := tr.db.s.tops.createFrom(iter) + iter.Release() + tr.stats.stopTimer() + if err != nil { + return err + } + if tr.mem.getref() == 1 { + tr.mem.Reset() + } else { + tr.mem.decref() + tr.mem = tr.db.mpoolGet(0) + tr.mem.incref() + } + tr.tables = append(tr.tables, t) + tr.rec.addTableFile(0, t) + tr.stats.write += t.size + tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax) + } + return nil +} + +func (tr *Transaction) put(kt keyType, key, value []byte) error { + tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt) + if tr.mem.Free() < len(tr.ikScratch)+len(value) { + if err := tr.flush(); err != nil { + return err + } + } + if err := tr.mem.Put(tr.ikScratch, value); err != nil { + return err + } + tr.seq++ + return nil +} + +// Put sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. +// Please note that the transaction is not compacted until committed, so if you +// writes 10 same keys, then those 10 same keys are in the transaction. +// +// It is safe to modify the contents of the arguments after Put returns. +func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error { + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + return tr.put(keyTypeVal, key, value) +} + +// Delete deletes the value for the given key. +// Please note that the transaction is not compacted until committed, so if you +// writes 10 same keys, then those 10 same keys are in the transaction. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error { + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + return tr.put(keyTypeDel, key, nil) +} + +// Write apply the given batch to the transaction. The batch will be applied +// sequentially. +// Please note that the transaction is not compacted until committed, so if you +// writes 10 same keys, then those 10 same keys are in the transaction. +// +// It is safe to modify the contents of the arguments after Write returns. +func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error { + if b == nil || b.Len() == 0 { + return nil + } + + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + return b.decodeRec(func(i int, kt keyType, key, value []byte) error { + return tr.put(kt, key, value) + }) +} + +func (tr *Transaction) setDone() { + tr.closed = true + tr.db.tr = nil + tr.mem.decref() + <-tr.db.writeLockC +} + +// Commit commits the transaction. +// +// Other methods should not be called after transaction has been committed. +func (tr *Transaction) Commit() error { + if err := tr.db.ok(); err != nil { + return err + } + + tr.lk.Lock() + defer tr.lk.Unlock() + if tr.closed { + return errTransactionDone + } + defer tr.setDone() + if err := tr.flush(); err != nil { + tr.discard() + return err + } + if len(tr.tables) != 0 { + // Committing transaction. + tr.rec.setSeqNum(tr.seq) + tr.db.compCommitLk.Lock() + defer tr.db.compCommitLk.Unlock() + for retry := 0; retry < 3; retry++ { + if err := tr.db.s.commit(&tr.rec); err != nil { + tr.db.logf("transaction@commit error R·%d %q", retry, err) + select { + case <-time.After(time.Second): + case _, _ = <-tr.db.closeC: + tr.db.logf("transaction@commit exiting") + return err + } + } else { + // Success. Set db.seq. + tr.db.setSeq(tr.seq) + break + } + } + // Trigger table auto-compaction. + tr.db.compTrigger(tr.db.tcompCmdC) + } + return nil +} + +func (tr *Transaction) discard() { + // Discard transaction. + for _, t := range tr.tables { + tr.db.logf("transaction@discard @%d", t.fd.Num) + if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil { + tr.db.s.reuseFileNum(t.fd.Num) + } + } +} + +// Discard discards the transaction. +// +// Other methods should not be called after transaction has been discarded. +func (tr *Transaction) Discard() { + tr.lk.Lock() + if !tr.closed { + tr.discard() + tr.setDone() + } + tr.lk.Unlock() +} + +// OpenTransaction opens an atomic DB transaction. Only one transaction can be +// opened at a time. Write will be blocked until the transaction is committed or +// discarded. +// The returned transaction handle is goroutine-safe. +// +// The transaction must be closed once done, either by committing or discarding +// the transaction. +// Closing the DB will discard open transaction. +func (db *DB) OpenTransaction() (*Transaction, error) { + if err := db.ok(); err != nil { + return nil, err + } + + // The write happen synchronously. + select { + case db.writeLockC <- struct{}{}: + case err := <-db.compPerErrC: + return nil, err + case _, _ = <-db.closeC: + return nil, ErrClosed + } + + if db.tr != nil { + panic("leveldb: has open transaction") + } + + // Flush current memdb. + if db.mem != nil && db.mem.Len() != 0 { + if _, err := db.rotateMem(0, true); err != nil { + return nil, err + } + } + + tr := &Transaction{ + db: db, + seq: db.seq, + mem: db.mpoolGet(0), + } + tr.mem.incref() + db.tr = tr + return tr, nil +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go index a8a2bdf72..7fd386ca4 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go @@ -21,14 +21,16 @@ type Reader interface { NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator } -type Sizes []uint64 +// Sizes is list of size. +type Sizes []int64 // Sum returns sum of the sizes. -func (p Sizes) Sum() (n uint64) { - for _, s := range p { - n += s +func (sizes Sizes) Sum() int64 { + var sum int64 + for _, size := range sizes { + sum += size } - return n + return sum } // Logging. @@ -40,59 +42,59 @@ func (db *DB) checkAndCleanFiles() error { v := db.s.version() defer v.release() - tablesMap := make(map[uint64]bool) - for _, tables := range v.tables { + tmap := make(map[int64]bool) + for _, tables := range v.levels { for _, t := range tables { - tablesMap[t.file.Num()] = false + tmap[t.fd.Num] = false } } - files, err := db.s.getFiles(storage.TypeAll) + fds, err := db.s.stor.List(storage.TypeAll) if err != nil { return err } - var nTables int - var rem []storage.File - for _, f := range files { + var nt int + var rem []storage.FileDesc + for _, fd := range fds { keep := true - switch f.Type() { + switch fd.Type { case storage.TypeManifest: - keep = f.Num() >= db.s.manifestFile.Num() + keep = fd.Num >= db.s.manifestFd.Num case storage.TypeJournal: - if db.frozenJournalFile != nil { - keep = f.Num() >= db.frozenJournalFile.Num() + if !db.frozenJournalFd.Nil() { + keep = fd.Num >= db.frozenJournalFd.Num } else { - keep = f.Num() >= db.journalFile.Num() + keep = fd.Num >= db.journalFd.Num } case storage.TypeTable: - _, keep = tablesMap[f.Num()] + _, keep = tmap[fd.Num] if keep { - tablesMap[f.Num()] = true - nTables++ + tmap[fd.Num] = true + nt++ } } if !keep { - rem = append(rem, f) + rem = append(rem, fd) } } - if nTables != len(tablesMap) { - var missing []*storage.FileInfo - for num, present := range tablesMap { + if nt != len(tmap) { + var mfds []storage.FileDesc + for num, present := range tmap { if !present { - missing = append(missing, &storage.FileInfo{Type: storage.TypeTable, Num: num}) + mfds = append(mfds, storage.FileDesc{storage.TypeTable, num}) db.logf("db@janitor table missing @%d", num) } } - return errors.NewErrCorrupted(nil, &errors.ErrMissingFiles{Files: missing}) + return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds}) } - db.logf("db@janitor F·%d G·%d", len(files), len(rem)) - for _, f := range rem { - db.logf("db@janitor removing %s-%d", f.Type(), f.Num()) - if err := f.Remove(); err != nil { + db.logf("db@janitor F·%d G·%d", len(fds), len(rem)) + for _, fd := range rem { + db.logf("db@janitor removing %s-%d", fd.Type, fd.Num) + if err := db.s.stor.Remove(fd); err != nil { return err } } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go index 0c3956539..5576761fe 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go @@ -45,9 +45,9 @@ func (db *DB) jWriter() { } } -func (db *DB) rotateMem(n int) (mem *memDB, err error) { +func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) { // Wait for pending memdb compaction. - err = db.compSendIdle(db.mcompCmdC) + err = db.compTriggerWait(db.mcompCmdC) if err != nil { return } @@ -59,7 +59,11 @@ func (db *DB) rotateMem(n int) (mem *memDB, err error) { } // Schedule memdb compaction. - db.compSendTrigger(db.mcompCmdC) + if wait { + err = db.compTriggerWait(db.mcompCmdC) + } else { + db.compTrigger(db.mcompCmdC) + } return } @@ -84,7 +88,7 @@ func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) { return false case v.tLen(0) >= db.s.o.GetWriteL0PauseTrigger(): delayed = true - err = db.compSendIdle(db.tcompCmdC) + err = db.compTriggerWait(db.tcompCmdC) if err != nil { return false } @@ -94,7 +98,7 @@ func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) { mdbFree = n } else { mdb.decref() - mdb, err = db.rotateMem(n) + mdb, err = db.rotateMem(n, false) if err == nil { mdbFree = mdb.Free() } else { @@ -131,12 +135,27 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) { b.init(wo.GetSync() && !db.s.o.GetNoSync()) + if b.size() > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() { + // Writes using transaction. + tr, err1 := db.OpenTransaction() + if err1 != nil { + return err1 + } + if err1 := tr.Write(b, wo); err1 != nil { + tr.Discard() + return err1 + } + return tr.Commit() + } + // The write happen synchronously. select { case db.writeC <- b: if <-db.writeMergedC { return <-db.writeAckC } + // Continue, the write lock already acquired by previous writer + // and handed out to us. case db.writeLockC <- struct{}{}: case err = <-db.compPerErrC: return @@ -147,14 +166,15 @@ func (db *DB) Write(b *Batch, wo *opt.WriteOptions) (err error) { merged := 0 danglingMerge := false defer func() { + for i := 0; i < merged; i++ { + db.writeAckC <- err + } if danglingMerge { + // Only one dangling merge at most, so this is safe. db.writeMergedC <- false } else { <-db.writeLockC } - for i := 0; i < merged; i++ { - db.writeAckC <- err - } }() mdb, mdbFree, err := db.flush(b.size()) @@ -234,7 +254,7 @@ drain: db.addSeq(uint64(b.Len())) if b.size() >= mdbFree { - db.rotateMem(0) + db.rotateMem(0, false) } return } @@ -261,8 +281,8 @@ func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error { func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool { iter := mem.NewIterator(nil) defer iter.Release() - return (max == nil || (iter.First() && icmp.uCompare(max, iKey(iter.Key()).ukey()) >= 0)) && - (min == nil || (iter.Last() && icmp.uCompare(min, iKey(iter.Key()).ukey()) <= 0)) + return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) && + (min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0)) } // CompactRange compacts the underlying DB for the given key range. @@ -293,12 +313,12 @@ func (db *DB) CompactRange(r util.Range) error { defer mdb.decref() if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) { // Memdb compaction. - if _, err := db.rotateMem(0); err != nil { + if _, err := db.rotateMem(0, false); err != nil { <-db.writeLockC return err } <-db.writeLockC - if err := db.compSendIdle(db.mcompCmdC); err != nil { + if err := db.compTriggerWait(db.mcompCmdC); err != nil { return err } } else { @@ -306,7 +326,7 @@ func (db *DB) CompactRange(r util.Range) error { } // Table compaction. - return db.compSendRange(db.tcompCmdC, -1, r.Start, r.Limit) + return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit) } // SetReadOnly makes DB read-only. It will stay read-only until reopened. diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go index dacbf131c..9a0f6e2c1 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go @@ -29,21 +29,21 @@ func New(text string) error { // ErrCorrupted is the type that wraps errors that indicate corruption in // the database. type ErrCorrupted struct { - File *storage.FileInfo - Err error + Fd storage.FileDesc + Err error } func (e *ErrCorrupted) Error() string { - if e.File != nil { - return fmt.Sprintf("%v [file=%v]", e.Err, e.File) + if !e.Fd.Nil() { + return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd) } else { return e.Err.Error() } } // NewErrCorrupted creates new ErrCorrupted error. -func NewErrCorrupted(f storage.File, err error) error { - return &ErrCorrupted{storage.NewFileInfo(f), err} +func NewErrCorrupted(fd storage.FileDesc, err error) error { + return &ErrCorrupted{fd, err} } // IsCorrupted returns a boolean indicating whether the error is indicating @@ -61,17 +61,17 @@ func IsCorrupted(err error) bool { // ErrMissingFiles is the type that indicating a corruption due to missing // files. ErrMissingFiles always wrapped with ErrCorrupted. type ErrMissingFiles struct { - Files []*storage.FileInfo + Fds []storage.FileDesc } func (e *ErrMissingFiles) Error() string { return "file missing" } -// SetFile sets 'file info' of the given error with the given file. +// SetFd sets 'file info' of the given error with the given file. // Currently only ErrCorrupted is supported, otherwise will do nothing. -func SetFile(err error, f storage.File) error { +func SetFd(err error, fd storage.FileDesc) error { switch x := err.(type) { case *ErrCorrupted: - x.File = storage.NewFileInfo(f) + x.Fd = fd return x } return err diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go index 37c1e146b..e961e420d 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go @@ -15,7 +15,7 @@ type iFilter struct { } func (f iFilter) Contains(filter, key []byte) bool { - return f.Filter.Contains(filter, iKey(key).ukey()) + return f.Filter.Contains(filter, internalKey(key).ukey()) } func (f iFilter) NewGenerator() filter.FilterGenerator { @@ -27,5 +27,5 @@ type iFilterGenerator struct { } func (g iFilterGenerator) Add(key []byte) { - g.FilterGenerator.Add(iKey(key).ukey()) + g.FilterGenerator.Add(internalKey(key).ukey()) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go index 6519ec660..891098bb7 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go @@ -83,6 +83,7 @@ import ( "io" "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/storage" "github.com/syndtr/goleveldb/leveldb/util" ) @@ -165,7 +166,7 @@ func (r *Reader) corrupt(n int, reason string, skip bool) error { r.dropper.Drop(&ErrCorrupted{n, reason}) } if r.strict && !skip { - r.err = errors.NewErrCorrupted(nil, &ErrCorrupted{n, reason}) + r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason}) return r.err } return errSkip diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/key.go b/vendor/github.com/syndtr/goleveldb/leveldb/key.go index 572ae8150..d0b80aaf9 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/key.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/key.go @@ -11,28 +11,30 @@ import ( "fmt" "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/storage" ) -type ErrIkeyCorrupted struct { +// ErrInternalKeyCorrupted records internal key corruption. +type ErrInternalKeyCorrupted struct { Ikey []byte Reason string } -func (e *ErrIkeyCorrupted) Error() string { - return fmt.Sprintf("leveldb: iKey %q corrupted: %s", e.Ikey, e.Reason) +func (e *ErrInternalKeyCorrupted) Error() string { + return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason) } -func newErrIkeyCorrupted(ikey []byte, reason string) error { - return errors.NewErrCorrupted(nil, &ErrIkeyCorrupted{append([]byte{}, ikey...), reason}) +func newErrInternalKeyCorrupted(ikey []byte, reason string) error { + return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason}) } -type kType int +type keyType uint -func (kt kType) String() string { +func (kt keyType) String() string { switch kt { - case ktDel: + case keyTypeDel: return "d" - case ktVal: + case keyTypeVal: return "v" } return "x" @@ -41,102 +43,105 @@ func (kt kType) String() string { // Value types encoded as the last component of internal keys. // Don't modify; this value are saved to disk. const ( - ktDel kType = iota - ktVal + keyTypeDel keyType = iota + keyTypeVal ) -// ktSeek defines the kType that should be passed when constructing an +// keyTypeSeek defines the keyType that should be passed when constructing an // internal key for seeking to a particular sequence number (since we // sort sequence numbers in decreasing order and the value type is // embedded as the low 8 bits in the sequence number in internal keys, // we need to use the highest-numbered ValueType, not the lowest). -const ktSeek = ktVal +const keyTypeSeek = keyTypeVal const ( // Maximum value possible for sequence number; the 8-bits are // used by value type, so its can packed together in single // 64-bit integer. - kMaxSeq uint64 = (uint64(1) << 56) - 1 + keyMaxSeq = (uint64(1) << 56) - 1 // Maximum value possible for packed sequence number and type. - kMaxNum uint64 = (kMaxSeq << 8) | uint64(ktSeek) + keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek) ) // Maximum number encoded in bytes. -var kMaxNumBytes = make([]byte, 8) +var keyMaxNumBytes = make([]byte, 8) func init() { - binary.LittleEndian.PutUint64(kMaxNumBytes, kMaxNum) + binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum) } -type iKey []byte +type internalKey []byte -func newIkey(ukey []byte, seq uint64, kt kType) iKey { - if seq > kMaxSeq { +func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey { + if seq > keyMaxSeq { panic("leveldb: invalid sequence number") - } else if kt > ktVal { + } else if kt > keyTypeVal { panic("leveldb: invalid type") } - ik := make(iKey, len(ukey)+8) - copy(ik, ukey) - binary.LittleEndian.PutUint64(ik[len(ukey):], (seq<<8)|uint64(kt)) - return ik + if n := len(ukey) + 8; cap(dst) < n { + dst = make([]byte, n) + } else { + dst = dst[:n] + } + copy(dst, ukey) + binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt)) + return internalKey(dst) } -func parseIkey(ik []byte) (ukey []byte, seq uint64, kt kType, err error) { +func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) { if len(ik) < 8 { - return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid length") + return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length") } num := binary.LittleEndian.Uint64(ik[len(ik)-8:]) - seq, kt = uint64(num>>8), kType(num&0xff) - if kt > ktVal { - return nil, 0, 0, newErrIkeyCorrupted(ik, "invalid type") + seq, kt = uint64(num>>8), keyType(num&0xff) + if kt > keyTypeVal { + return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type") } ukey = ik[:len(ik)-8] return } -func validIkey(ik []byte) bool { - _, _, _, err := parseIkey(ik) +func validInternalKey(ik []byte) bool { + _, _, _, err := parseInternalKey(ik) return err == nil } -func (ik iKey) assert() { +func (ik internalKey) assert() { if ik == nil { - panic("leveldb: nil iKey") + panic("leveldb: nil internalKey") } if len(ik) < 8 { - panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid length", []byte(ik), len(ik))) + panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik))) } } -func (ik iKey) ukey() []byte { +func (ik internalKey) ukey() []byte { ik.assert() return ik[:len(ik)-8] } -func (ik iKey) num() uint64 { +func (ik internalKey) num() uint64 { ik.assert() return binary.LittleEndian.Uint64(ik[len(ik)-8:]) } -func (ik iKey) parseNum() (seq uint64, kt kType) { +func (ik internalKey) parseNum() (seq uint64, kt keyType) { num := ik.num() - seq, kt = uint64(num>>8), kType(num&0xff) - if kt > ktVal { - panic(fmt.Sprintf("leveldb: iKey %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt)) + seq, kt = uint64(num>>8), keyType(num&0xff) + if kt > keyTypeVal { + panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt)) } return } -func (ik iKey) String() string { +func (ik internalKey) String() string { if ik == nil { return "" } - if ukey, seq, kt, err := parseIkey(ik); err == nil { + if ukey, seq, kt, err := parseInternalKey(ik); err == nil { return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq) - } else { - return "" } + return "" } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go index 7b5d8b9b5..3d2bf1c02 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go @@ -8,10 +8,11 @@ package opt import ( + "math" + "github.com/syndtr/goleveldb/leveldb/cache" "github.com/syndtr/goleveldb/leveldb/comparer" "github.com/syndtr/goleveldb/leveldb/filter" - "math" ) const ( @@ -35,8 +36,6 @@ var ( DefaultCompactionTotalSizeMultiplier = 10.0 DefaultCompressionType = SnappyCompression DefaultIteratorSamplingRate = 1 * MiB - DefaultMaxMemCompationLevel = 2 - DefaultNumLevel = 7 DefaultOpenFilesCacher = LRUCacher DefaultOpenFilesCacheCapacity = 500 DefaultWriteBuffer = 4 * MiB @@ -266,6 +265,13 @@ type Options struct { // The default value is false. DisableCompactionBackoff bool + // DisableLargeBatchTransaction allows disabling switch-to-transaction mode + // on large batch write. If enable batch writes large than WriteBuffer will + // use transaction. + // + // The default is false. + DisableLargeBatchTransaction bool + // ErrorIfExist defines whether an error should returned if the DB already // exist. // @@ -301,24 +307,11 @@ type Options struct { // The default is 1MiB. IteratorSamplingRate int - // MaxMemCompationLevel defines maximum level a newly compacted 'memdb' - // will be pushed into if doesn't creates overlap. This should less than - // NumLevel. Use -1 for level-0. - // - // The default is 2. - MaxMemCompationLevel int - // NoSync allows completely disable fsync. // // The default is false. NoSync bool - // NumLevel defines number of database level. The level shouldn't changed - // between opens, or the database will panic. - // - // The default is 7. - NumLevel int - // OpenFilesCacher provides cache algorithm for open files caching. // Specify NoCacher to disable caching algorithm. // @@ -440,7 +433,7 @@ func (o *Options) GetCompactionTableSize(level int) int { if o.CompactionTableSize > 0 { base = o.CompactionTableSize } - if len(o.CompactionTableSizeMultiplierPerLevel) > level && o.CompactionTableSizeMultiplierPerLevel[level] > 0 { + if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 { mult = o.CompactionTableSizeMultiplierPerLevel[level] } else if o.CompactionTableSizeMultiplier > 0 { mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level)) @@ -461,7 +454,7 @@ func (o *Options) GetCompactionTotalSize(level int) int64 { if o.CompactionTotalSize > 0 { base = o.CompactionTotalSize } - if len(o.CompactionTotalSizeMultiplierPerLevel) > level && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 { + if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 { mult = o.CompactionTotalSizeMultiplierPerLevel[level] } else if o.CompactionTotalSizeMultiplier > 0 { mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level)) @@ -508,6 +501,13 @@ func (o *Options) GetDisableCompactionBackoff() bool { return o.DisableCompactionBackoff } +func (o *Options) GetDisableLargeBatchTransaction() bool { + if o == nil { + return false + } + return o.DisableLargeBatchTransaction +} + func (o *Options) GetErrorIfExist() bool { if o == nil { return false @@ -536,21 +536,6 @@ func (o *Options) GetIteratorSamplingRate() int { return o.IteratorSamplingRate } -func (o *Options) GetMaxMemCompationLevel() int { - level := DefaultMaxMemCompationLevel - if o != nil { - if o.MaxMemCompationLevel > 0 { - level = o.MaxMemCompationLevel - } else if o.MaxMemCompationLevel < 0 { - level = 0 - } - } - if level >= o.GetNumLevel() { - return o.GetNumLevel() - 1 - } - return level -} - func (o *Options) GetNoSync() bool { if o == nil { return false @@ -558,13 +543,6 @@ func (o *Options) GetNoSync() bool { return o.NoSync } -func (o *Options) GetNumLevel() int { - if o == nil || o.NumLevel <= 0 { - return DefaultNumLevel - } - return o.NumLevel -} - func (o *Options) GetOpenFilesCacher() Cacher { if o == nil || o.OpenFilesCacher == nil { return DefaultOpenFilesCacher diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/options.go index a3d84ef60..b072b1ac4 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/options.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/options.go @@ -43,6 +43,8 @@ func (s *session) setOptions(o *opt.Options) { s.o.cache() } +const optCachedLevel = 7 + type cachedOptions struct { *opt.Options @@ -54,15 +56,13 @@ type cachedOptions struct { } func (co *cachedOptions) cache() { - numLevel := co.Options.GetNumLevel() + co.compactionExpandLimit = make([]int, optCachedLevel) + co.compactionGPOverlaps = make([]int, optCachedLevel) + co.compactionSourceLimit = make([]int, optCachedLevel) + co.compactionTableSize = make([]int, optCachedLevel) + co.compactionTotalSize = make([]int64, optCachedLevel) - co.compactionExpandLimit = make([]int, numLevel) - co.compactionGPOverlaps = make([]int, numLevel) - co.compactionSourceLimit = make([]int, numLevel) - co.compactionTableSize = make([]int, numLevel) - co.compactionTotalSize = make([]int64, numLevel) - - for level := 0; level < numLevel; level++ { + for level := 0; level < optCachedLevel; level++ { co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level) co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level) co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level) @@ -72,21 +72,36 @@ func (co *cachedOptions) cache() { } func (co *cachedOptions) GetCompactionExpandLimit(level int) int { - return co.compactionExpandLimit[level] + if level < optCachedLevel { + return co.compactionExpandLimit[level] + } + return co.Options.GetCompactionExpandLimit(level) } func (co *cachedOptions) GetCompactionGPOverlaps(level int) int { - return co.compactionGPOverlaps[level] + if level < optCachedLevel { + return co.compactionGPOverlaps[level] + } + return co.Options.GetCompactionGPOverlaps(level) } func (co *cachedOptions) GetCompactionSourceLimit(level int) int { - return co.compactionSourceLimit[level] + if level < optCachedLevel { + return co.compactionSourceLimit[level] + } + return co.Options.GetCompactionSourceLimit(level) } func (co *cachedOptions) GetCompactionTableSize(level int) int { - return co.compactionTableSize[level] + if level < optCachedLevel { + return co.compactionTableSize[level] + } + return co.Options.GetCompactionTableSize(level) } func (co *cachedOptions) GetCompactionTotalSize(level int) int64 { - return co.compactionTotalSize[level] + if level < optCachedLevel { + return co.compactionTotalSize[level] + } + return co.Options.GetCompactionTotalSize(level) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session.go b/vendor/github.com/syndtr/goleveldb/leveldb/session.go index f0bba4602..b0d3fef1d 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/session.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session.go @@ -16,9 +16,9 @@ import ( "github.com/syndtr/goleveldb/leveldb/journal" "github.com/syndtr/goleveldb/leveldb/opt" "github.com/syndtr/goleveldb/leveldb/storage" - "github.com/syndtr/goleveldb/leveldb/util" ) +// ErrManifestCorrupted records manifest corruption. type ErrManifestCorrupted struct { Field string Reason string @@ -28,31 +28,31 @@ func (e *ErrManifestCorrupted) Error() string { return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason) } -func newErrManifestCorrupted(f storage.File, field, reason string) error { - return errors.NewErrCorrupted(f, &ErrManifestCorrupted{field, reason}) +func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error { + return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason}) } // session represent a persistent database session. type session struct { // Need 64-bit alignment. - stNextFileNum uint64 // current unused file number - stJournalNum uint64 // current journal file number; need external synchronization - stPrevJournalNum uint64 // prev journal file number; no longer used; for compatibility with older version of leveldb + stNextFileNum int64 // current unused file number + stJournalNum int64 // current journal file number; need external synchronization + stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb + stTempFileNum int64 stSeqNum uint64 // last mem compacted seq; need external synchronization - stTempFileNum uint64 stor storage.Storage - storLock util.Releaser + storLock storage.Lock o *cachedOptions icmp *iComparer tops *tOps manifest *journal.Writer manifestWriter storage.Writer - manifestFile storage.File + manifestFd storage.FileDesc - stCompPtrs []iKey // compaction pointers; need external synchronization - stVersion *version // current version + stCompPtrs []internalKey // compaction pointers; need external synchronization + stVersion *version // current version vmu sync.Mutex } @@ -66,9 +66,8 @@ func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) { return } s = &session{ - stor: stor, - storLock: storLock, - stCompPtrs: make([]iKey, o.GetNumLevel()), + stor: stor, + storLock: storLock, } s.setOptions(o) s.tops = newTableOps(s) @@ -88,7 +87,6 @@ func (s *session) close() { } s.manifest = nil s.manifestWriter = nil - s.manifestFile = nil s.stVersion = nil } @@ -109,18 +107,18 @@ func (s *session) recover() (err error) { if os.IsNotExist(err) { // Don't return os.ErrNotExist if the underlying storage contains // other files that belong to LevelDB. So the DB won't get trashed. - if files, _ := s.stor.GetFiles(storage.TypeAll); len(files) > 0 { - err = &errors.ErrCorrupted{File: &storage.FileInfo{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}} + if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 { + err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}} } } }() - m, err := s.stor.GetManifest() + fd, err := s.stor.GetMeta() if err != nil { return } - reader, err := m.Open() + reader, err := s.stor.Open(fd) if err != nil { return } @@ -128,10 +126,9 @@ func (s *session) recover() (err error) { var ( // Options. - numLevel = s.o.GetNumLevel() - strict = s.o.GetStrict(opt.StrictManifest) + strict = s.o.GetStrict(opt.StrictManifest) - jr = journal.NewReader(reader, dropper{s, m}, strict, true) + jr = journal.NewReader(reader, dropper{s, fd}, strict, true) rec = &sessionRecord{} staging = s.stVersion.newStaging() ) @@ -143,24 +140,23 @@ func (s *session) recover() (err error) { err = nil break } - return errors.SetFile(err, m) + return errors.SetFd(err, fd) } - err = rec.decode(r, numLevel) + err = rec.decode(r) if err == nil { // save compact pointers for _, r := range rec.compPtrs { - s.stCompPtrs[r.level] = iKey(r.ikey) + s.setCompPtr(r.level, internalKey(r.ikey)) } // commit record to version staging staging.commit(rec) } else { - err = errors.SetFile(err, m) + err = errors.SetFd(err, fd) if strict || !errors.IsCorrupted(err) { return - } else { - s.logf("manifest error: %v (skipped)", errors.SetFile(err, m)) } + s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd)) } rec.resetCompPtrs() rec.resetAddedTables() @@ -169,18 +165,18 @@ func (s *session) recover() (err error) { switch { case !rec.has(recComparer): - return newErrManifestCorrupted(m, "comparer", "missing") + return newErrManifestCorrupted(fd, "comparer", "missing") case rec.comparer != s.icmp.uName(): - return newErrManifestCorrupted(m, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer)) + return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer)) case !rec.has(recNextFileNum): - return newErrManifestCorrupted(m, "next-file-num", "missing") + return newErrManifestCorrupted(fd, "next-file-num", "missing") case !rec.has(recJournalNum): - return newErrManifestCorrupted(m, "journal-file-num", "missing") + return newErrManifestCorrupted(fd, "journal-file-num", "missing") case !rec.has(recSeqNum): - return newErrManifestCorrupted(m, "seq-num", "missing") + return newErrManifestCorrupted(fd, "seq-num", "missing") } - s.manifestFile = m + s.manifestFd = fd s.setVersion(staging.finish()) s.setNextFileNum(rec.nextFileNum) s.recordCommited(rec) diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go index 7c5a79418..089cd00b2 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go @@ -14,41 +14,46 @@ import ( "github.com/syndtr/goleveldb/leveldb/opt" ) -func (s *session) pickMemdbLevel(umin, umax []byte) int { +func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int { v := s.version() defer v.release() - return v.pickMemdbLevel(umin, umax) + return v.pickMemdbLevel(umin, umax, maxLevel) } -func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, level int) (level_ int, err error) { +func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) { // Create sorted table. iter := mdb.NewIterator(nil) defer iter.Release() t, n, err := s.tops.createFrom(iter) if err != nil { - return level, err + return 0, err } - // Pick level and add to record. - if level < 0 { - level = s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey()) - } - rec.addTableFile(level, t) + // Pick level other than zero can cause compaction issue with large + // bulk insert and delete on strictly incrementing key-space. The + // problem is that the small deletion markers trapped at lower level, + // while key/value entries keep growing at higher level. Since the + // key-space is strictly incrementing it will not overlaps with + // higher level, thus maximum possible level is always picked, while + // overlapping deletion marker pushed into lower level. + // See: https://github.com/syndtr/goleveldb/issues/127. + flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel) + rec.addTableFile(flushLevel, t) - s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", level, t.file.Num(), n, shortenb(int(t.size)), t.imin, t.imax) - return level, nil + s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax) + return flushLevel, nil } // Pick a compaction based on current state; need external synchronization. func (s *session) pickCompaction() *compaction { v := s.version() - var level int + var sourceLevel int var t0 tFiles if v.cScore >= 1 { - level = v.cLevel - cptr := s.stCompPtrs[level] - tables := v.tables[level] + sourceLevel = v.cLevel + cptr := s.getCompPtr(sourceLevel) + tables := v.levels[sourceLevel] for _, t := range tables { if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 { t0 = append(t0, t) @@ -61,7 +66,7 @@ func (s *session) pickCompaction() *compaction { } else { if p := atomic.LoadPointer(&v.cSeek); p != nil { ts := (*tSet)(p) - level = ts.level + sourceLevel = ts.level t0 = append(t0, ts.table) } else { v.release() @@ -69,14 +74,19 @@ func (s *session) pickCompaction() *compaction { } } - return newCompaction(s, v, level, t0) + return newCompaction(s, v, sourceLevel, t0) } // Create compaction from given level and range; need external synchronization. -func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction { +func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction { v := s.version() - t0 := v.tables[level].getOverlaps(nil, s.icmp, umin, umax, level == 0) + if sourceLevel >= len(v.levels) { + v.release() + return nil + } + + t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0) if len(t0) == 0 { v.release() return nil @@ -86,9 +96,9 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction { // But we cannot do this for level-0 since level-0 files can overlap // and we must not pick one file and drop another older file if the // two files overlap. - if level > 0 { - limit := uint64(v.s.o.GetCompactionSourceLimit(level)) - total := uint64(0) + if !noLimit && sourceLevel > 0 { + limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel)) + total := int64(0) for i, t := range t0 { total += t.size if total >= limit { @@ -99,17 +109,17 @@ func (s *session) getCompactionRange(level int, umin, umax []byte) *compaction { } } - return newCompaction(s, v, level, t0) + return newCompaction(s, v, sourceLevel, t0) } -func newCompaction(s *session, v *version, level int, t0 tFiles) *compaction { +func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction { c := &compaction{ s: s, v: v, - level: level, - tables: [2]tFiles{t0, nil}, - maxGPOverlaps: uint64(s.o.GetCompactionGPOverlaps(level)), - tPtrs: make([]int, s.o.GetNumLevel()), + sourceLevel: sourceLevel, + levels: [2]tFiles{t0, nil}, + maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)), + tPtrs: make([]int, len(v.levels)), } c.expand() c.save() @@ -121,21 +131,21 @@ type compaction struct { s *session v *version - level int - tables [2]tFiles - maxGPOverlaps uint64 + sourceLevel int + levels [2]tFiles + maxGPOverlaps int64 gp tFiles gpi int seenKey bool - gpOverlappedBytes uint64 - imin, imax iKey + gpOverlappedBytes int64 + imin, imax internalKey tPtrs []int released bool snapGPI int snapSeenKey bool - snapGPOverlappedBytes uint64 + snapGPOverlappedBytes int64 snapTPtrs []int } @@ -162,30 +172,34 @@ func (c *compaction) release() { // Expand compacted tables; need external synchronization. func (c *compaction) expand() { - limit := uint64(c.s.o.GetCompactionExpandLimit(c.level)) - vt0, vt1 := c.v.tables[c.level], c.v.tables[c.level+1] + limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel)) + vt0 := c.v.levels[c.sourceLevel] + vt1 := tFiles{} + if level := c.sourceLevel + 1; level < len(c.v.levels) { + vt1 = c.v.levels[level] + } - t0, t1 := c.tables[0], c.tables[1] + t0, t1 := c.levels[0], c.levels[1] imin, imax := t0.getRange(c.s.icmp) // We expand t0 here just incase ukey hop across tables. - t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.level == 0) - if len(t0) != len(c.tables[0]) { + t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0) + if len(t0) != len(c.levels[0]) { imin, imax = t0.getRange(c.s.icmp) } t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false) // Get entire range covered by compaction. amin, amax := append(t0, t1...).getRange(c.s.icmp) - // See if we can grow the number of inputs in "level" without - // changing the number of "level+1" files we pick up. + // See if we can grow the number of inputs in "sourceLevel" without + // changing the number of "sourceLevel+1" files we pick up. if len(t1) > 0 { - exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.level == 0) + exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0) if len(exp0) > len(t0) && t1.size()+exp0.size() < limit { xmin, xmax := exp0.getRange(c.s.icmp) exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false) if len(exp1) == len(t1) { c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)", - c.level, c.level+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), + c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())), len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size()))) imin, imax = xmin, xmax t0, t1 = exp0, exp1 @@ -195,22 +209,23 @@ func (c *compaction) expand() { } // Compute the set of grandparent files that overlap this compaction - // (parent == level+1; grandparent == level+2) - if c.level+2 < c.s.o.GetNumLevel() { - c.gp = c.v.tables[c.level+2].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) + // (parent == sourceLevel+1; grandparent == sourceLevel+2) + if level := c.sourceLevel + 2; level < len(c.v.levels) { + c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false) } - c.tables[0], c.tables[1] = t0, t1 + c.levels[0], c.levels[1] = t0, t1 c.imin, c.imax = imin, imax } // Check whether compaction is trivial. func (c *compaction) trivial() bool { - return len(c.tables[0]) == 1 && len(c.tables[1]) == 0 && c.gp.size() <= c.maxGPOverlaps + return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps } func (c *compaction) baseLevelForKey(ukey []byte) bool { - for level, tables := range c.v.tables[c.level+2:] { + for level := c.sourceLevel + 2; level < len(c.v.levels); level++ { + tables := c.v.levels[level] for c.tPtrs[level] < len(tables) { t := tables[c.tPtrs[level]] if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 { @@ -227,7 +242,7 @@ func (c *compaction) baseLevelForKey(ukey []byte) bool { return true } -func (c *compaction) shouldStopBefore(ikey iKey) bool { +func (c *compaction) shouldStopBefore(ikey internalKey) bool { for ; c.gpi < len(c.gp); c.gpi++ { gp := c.gp[c.gpi] if c.s.icmp.Compare(ikey, gp.imax) <= 0 { @@ -250,10 +265,10 @@ func (c *compaction) shouldStopBefore(ikey iKey) bool { // Creates an iterator. func (c *compaction) newIterator() iterator.Iterator { // Creates iterator slice. - icap := len(c.tables) - if c.level == 0 { + icap := len(c.levels) + if c.sourceLevel == 0 { // Special case for level-0. - icap = len(c.tables[0]) + 1 + icap = len(c.levels[0]) + 1 } its := make([]iterator.Iterator, 0, icap) @@ -267,13 +282,13 @@ func (c *compaction) newIterator() iterator.Iterator { ro.Strict |= opt.StrictReader } - for i, tables := range c.tables { + for i, tables := range c.levels { if len(tables) == 0 { continue } // Level-0 is not sorted and may overlaps each other. - if c.level+i == 0 { + if c.sourceLevel+i == 0 { for _, t := range tables { its = append(its, c.s.tops.newIterator(t, nil, ro)) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go index 405e07bef..854e1aa6f 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/syndtr/goleveldb/leveldb/errors" + "github.com/syndtr/goleveldb/leveldb/storage" ) type byteReader interface { @@ -35,28 +36,28 @@ const ( type cpRecord struct { level int - ikey iKey + ikey internalKey } type atRecord struct { level int - num uint64 - size uint64 - imin iKey - imax iKey + num int64 + size int64 + imin internalKey + imax internalKey } type dtRecord struct { level int - num uint64 + num int64 } type sessionRecord struct { hasRec int comparer string - journalNum uint64 - prevJournalNum uint64 - nextFileNum uint64 + journalNum int64 + prevJournalNum int64 + nextFileNum int64 seqNum uint64 compPtrs []cpRecord addedTables []atRecord @@ -75,17 +76,17 @@ func (p *sessionRecord) setComparer(name string) { p.comparer = name } -func (p *sessionRecord) setJournalNum(num uint64) { +func (p *sessionRecord) setJournalNum(num int64) { p.hasRec |= 1 << recJournalNum p.journalNum = num } -func (p *sessionRecord) setPrevJournalNum(num uint64) { +func (p *sessionRecord) setPrevJournalNum(num int64) { p.hasRec |= 1 << recPrevJournalNum p.prevJournalNum = num } -func (p *sessionRecord) setNextFileNum(num uint64) { +func (p *sessionRecord) setNextFileNum(num int64) { p.hasRec |= 1 << recNextFileNum p.nextFileNum = num } @@ -95,7 +96,7 @@ func (p *sessionRecord) setSeqNum(num uint64) { p.seqNum = num } -func (p *sessionRecord) addCompPtr(level int, ikey iKey) { +func (p *sessionRecord) addCompPtr(level int, ikey internalKey) { p.hasRec |= 1 << recCompPtr p.compPtrs = append(p.compPtrs, cpRecord{level, ikey}) } @@ -105,13 +106,13 @@ func (p *sessionRecord) resetCompPtrs() { p.compPtrs = p.compPtrs[:0] } -func (p *sessionRecord) addTable(level int, num, size uint64, imin, imax iKey) { +func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) { p.hasRec |= 1 << recAddTable p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax}) } func (p *sessionRecord) addTableFile(level int, t *tFile) { - p.addTable(level, t.file.Num(), t.size, t.imin, t.imax) + p.addTable(level, t.fd.Num, t.size, t.imin, t.imax) } func (p *sessionRecord) resetAddedTables() { @@ -119,7 +120,7 @@ func (p *sessionRecord) resetAddedTables() { p.addedTables = p.addedTables[:0] } -func (p *sessionRecord) delTable(level int, num uint64) { +func (p *sessionRecord) delTable(level int, num int64) { p.hasRec |= 1 << recDelTable p.deletedTables = append(p.deletedTables, dtRecord{level, num}) } @@ -137,6 +138,13 @@ func (p *sessionRecord) putUvarint(w io.Writer, x uint64) { _, p.err = w.Write(p.scratch[:n]) } +func (p *sessionRecord) putVarint(w io.Writer, x int64) { + if x < 0 { + panic("invalid negative value") + } + p.putUvarint(w, uint64(x)) +} + func (p *sessionRecord) putBytes(w io.Writer, x []byte) { if p.err != nil { return @@ -156,11 +164,11 @@ func (p *sessionRecord) encode(w io.Writer) error { } if p.has(recJournalNum) { p.putUvarint(w, recJournalNum) - p.putUvarint(w, p.journalNum) + p.putVarint(w, p.journalNum) } if p.has(recNextFileNum) { p.putUvarint(w, recNextFileNum) - p.putUvarint(w, p.nextFileNum) + p.putVarint(w, p.nextFileNum) } if p.has(recSeqNum) { p.putUvarint(w, recSeqNum) @@ -174,13 +182,13 @@ func (p *sessionRecord) encode(w io.Writer) error { for _, r := range p.deletedTables { p.putUvarint(w, recDelTable) p.putUvarint(w, uint64(r.level)) - p.putUvarint(w, r.num) + p.putVarint(w, r.num) } for _, r := range p.addedTables { p.putUvarint(w, recAddTable) p.putUvarint(w, uint64(r.level)) - p.putUvarint(w, r.num) - p.putUvarint(w, r.size) + p.putVarint(w, r.num) + p.putVarint(w, r.size) p.putBytes(w, r.imin) p.putBytes(w, r.imax) } @@ -194,9 +202,9 @@ func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF x, err := binary.ReadUvarint(r) if err != nil { if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) { - p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"}) + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"}) } else if strings.HasPrefix(err.Error(), "binary:") { - p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, err.Error()}) + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()}) } else { p.err = err } @@ -209,6 +217,14 @@ func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 { return p.readUvarintMayEOF(field, r, false) } +func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 { + x := int64(p.readUvarintMayEOF(field, r, false)) + if x < 0 { + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"}) + } + return x +} + func (p *sessionRecord) readBytes(field string, r byteReader) []byte { if p.err != nil { return nil @@ -221,14 +237,14 @@ func (p *sessionRecord) readBytes(field string, r byteReader) []byte { _, p.err = io.ReadFull(r, x) if p.err != nil { if p.err == io.ErrUnexpectedEOF { - p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "short read"}) + p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"}) } return nil } return x } -func (p *sessionRecord) readLevel(field string, r io.ByteReader, numLevel int) int { +func (p *sessionRecord) readLevel(field string, r io.ByteReader) int { if p.err != nil { return 0 } @@ -236,14 +252,10 @@ func (p *sessionRecord) readLevel(field string, r io.ByteReader, numLevel int) i if p.err != nil { return 0 } - if x >= uint64(numLevel) { - p.err = errors.NewErrCorrupted(nil, &ErrManifestCorrupted{field, "invalid level number"}) - return 0 - } return int(x) } -func (p *sessionRecord) decode(r io.Reader, numLevel int) error { +func (p *sessionRecord) decode(r io.Reader) error { br, ok := r.(byteReader) if !ok { br = bufio.NewReader(r) @@ -264,17 +276,17 @@ func (p *sessionRecord) decode(r io.Reader, numLevel int) error { p.setComparer(string(x)) } case recJournalNum: - x := p.readUvarint("journal-num", br) + x := p.readVarint("journal-num", br) if p.err == nil { p.setJournalNum(x) } case recPrevJournalNum: - x := p.readUvarint("prev-journal-num", br) + x := p.readVarint("prev-journal-num", br) if p.err == nil { p.setPrevJournalNum(x) } case recNextFileNum: - x := p.readUvarint("next-file-num", br) + x := p.readVarint("next-file-num", br) if p.err == nil { p.setNextFileNum(x) } @@ -284,23 +296,23 @@ func (p *sessionRecord) decode(r io.Reader, numLevel int) error { p.setSeqNum(x) } case recCompPtr: - level := p.readLevel("comp-ptr.level", br, numLevel) + level := p.readLevel("comp-ptr.level", br) ikey := p.readBytes("comp-ptr.ikey", br) if p.err == nil { - p.addCompPtr(level, iKey(ikey)) + p.addCompPtr(level, internalKey(ikey)) } case recAddTable: - level := p.readLevel("add-table.level", br, numLevel) - num := p.readUvarint("add-table.num", br) - size := p.readUvarint("add-table.size", br) + level := p.readLevel("add-table.level", br) + num := p.readVarint("add-table.num", br) + size := p.readVarint("add-table.size", br) imin := p.readBytes("add-table.imin", br) imax := p.readBytes("add-table.imax", br) if p.err == nil { p.addTable(level, num, size, imin, imax) } case recDelTable: - level := p.readLevel("del-table.level", br, numLevel) - num := p.readUvarint("del-table.num", br) + level := p.readLevel("del-table.level", br) + num := p.readVarint("del-table.num", br) if p.err == nil { p.delTable(level, num) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go index 7ec9f86f7..674182fb2 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go @@ -17,15 +17,15 @@ import ( // Logging. type dropper struct { - s *session - file storage.File + s *session + fd storage.FileDesc } func (d dropper) Drop(err error) { if e, ok := err.(*journal.ErrCorrupted); ok { - d.s.logf("journal@drop %s-%d S·%s %q", d.file.Type(), d.file.Num(), shortenb(e.Size), e.Reason) + d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason) } else { - d.s.logf("journal@drop %s-%d %q", d.file.Type(), d.file.Num(), err) + d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err) } } @@ -34,25 +34,9 @@ func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf // File utils. -func (s *session) getJournalFile(num uint64) storage.File { - return s.stor.GetFile(num, storage.TypeJournal) -} - -func (s *session) getTableFile(num uint64) storage.File { - return s.stor.GetFile(num, storage.TypeTable) -} - -func (s *session) getFiles(t storage.FileType) ([]storage.File, error) { - return s.stor.GetFiles(t) -} - -func (s *session) newTemp() storage.File { - num := atomic.AddUint64(&s.stTempFileNum, 1) - 1 - return s.stor.GetFile(num, storage.TypeTemp) -} - -func (s *session) tableFileFromRecord(r atRecord) *tFile { - return newTableFile(s.getTableFile(r.num), r.size, r.imin, r.imax) +func (s *session) newTemp() storage.FileDesc { + num := atomic.AddInt64(&s.stTempFileNum, 1) - 1 + return storage.FileDesc{storage.TypeTemp, num} } // Session state. @@ -80,47 +64,65 @@ func (s *session) setVersion(v *version) { } // Get current unused file number. -func (s *session) nextFileNum() uint64 { - return atomic.LoadUint64(&s.stNextFileNum) +func (s *session) nextFileNum() int64 { + return atomic.LoadInt64(&s.stNextFileNum) } // Set current unused file number to num. -func (s *session) setNextFileNum(num uint64) { - atomic.StoreUint64(&s.stNextFileNum, num) +func (s *session) setNextFileNum(num int64) { + atomic.StoreInt64(&s.stNextFileNum, num) } // Mark file number as used. -func (s *session) markFileNum(num uint64) { +func (s *session) markFileNum(num int64) { nextFileNum := num + 1 for { old, x := s.stNextFileNum, nextFileNum if old > x { x = old } - if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) { + if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) { break } } } // Allocate a file number. -func (s *session) allocFileNum() uint64 { - return atomic.AddUint64(&s.stNextFileNum, 1) - 1 +func (s *session) allocFileNum() int64 { + return atomic.AddInt64(&s.stNextFileNum, 1) - 1 } // Reuse given file number. -func (s *session) reuseFileNum(num uint64) { +func (s *session) reuseFileNum(num int64) { for { old, x := s.stNextFileNum, num if old != x+1 { x = old } - if atomic.CompareAndSwapUint64(&s.stNextFileNum, old, x) { + if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) { break } } } +// Set compaction ptr at given level; need external synchronization. +func (s *session) setCompPtr(level int, ik internalKey) { + if level >= len(s.stCompPtrs) { + newCompPtrs := make([]internalKey, level+1) + copy(newCompPtrs, s.stCompPtrs) + s.stCompPtrs = newCompPtrs + } + s.stCompPtrs[level] = append(internalKey{}, ik...) +} + +// Get compaction ptr at given level; need external synchronization. +func (s *session) getCompPtr(level int) internalKey { + if level >= len(s.stCompPtrs) { + return nil + } + return s.stCompPtrs[level] +} + // Manifest related utils. // Fill given session record obj with current states; need external @@ -149,29 +151,28 @@ func (s *session) fillRecord(r *sessionRecord, snapshot bool) { // Mark if record has been committed, this will update session state; // need external synchronization. -func (s *session) recordCommited(r *sessionRecord) { - if r.has(recJournalNum) { - s.stJournalNum = r.journalNum +func (s *session) recordCommited(rec *sessionRecord) { + if rec.has(recJournalNum) { + s.stJournalNum = rec.journalNum } - if r.has(recPrevJournalNum) { - s.stPrevJournalNum = r.prevJournalNum + if rec.has(recPrevJournalNum) { + s.stPrevJournalNum = rec.prevJournalNum } - if r.has(recSeqNum) { - s.stSeqNum = r.seqNum + if rec.has(recSeqNum) { + s.stSeqNum = rec.seqNum } - for _, p := range r.compPtrs { - s.stCompPtrs[p.level] = iKey(p.ikey) + for _, r := range rec.compPtrs { + s.setCompPtr(r.level, internalKey(r.ikey)) } } // Create a new manifest file; need external synchronization. func (s *session) newManifest(rec *sessionRecord, v *version) (err error) { - num := s.allocFileNum() - file := s.stor.GetFile(num, storage.TypeManifest) - writer, err := file.Create() + fd := storage.FileDesc{storage.TypeManifest, s.allocFileNum()} + writer, err := s.stor.Create(fd) if err != nil { return } @@ -196,16 +197,16 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) { if s.manifestWriter != nil { s.manifestWriter.Close() } - if s.manifestFile != nil { - s.manifestFile.Remove() + if !s.manifestFd.Nil() { + s.stor.Remove(s.manifestFd) } - s.manifestFile = file + s.manifestFd = fd s.manifestWriter = writer s.manifest = jw } else { writer.Close() - file.Remove() - s.reuseFileNum(num) + s.stor.Remove(fd) + s.reuseFileNum(fd.Num) } }() @@ -221,7 +222,7 @@ func (s *session) newManifest(rec *sessionRecord, v *version) (err error) { if err != nil { return } - err = s.stor.SetManifest(file) + err = s.stor.SetMeta(fd) return } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go index 420b27732..cbe1dc103 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go @@ -17,11 +17,12 @@ import ( "strings" "sync" "time" - - "github.com/syndtr/goleveldb/leveldb/util" ) -var errFileOpen = errors.New("leveldb/storage: file still open") +var ( + errFileOpen = errors.New("leveldb/storage: file still open") + errReadOnly = errors.New("leveldb/storage: storage is read-only") +) type fileLock interface { release() error @@ -32,40 +33,52 @@ type fileStorageLock struct { } func (lock *fileStorageLock) Release() { - fs := lock.fs - fs.mu.Lock() - defer fs.mu.Unlock() - if fs.slock == lock { - fs.slock = nil + if lock.fs != nil { + lock.fs.mu.Lock() + defer lock.fs.mu.Unlock() + if lock.fs.slock == lock { + lock.fs.slock = nil + } } - return } +const logSizeThreshold = 1024 * 1024 // 1 MiB + // fileStorage is a file-system backed storage. type fileStorage struct { - path string + path string + readOnly bool - mu sync.Mutex - flock fileLock - slock *fileStorageLock - logw *os.File - buf []byte + mu sync.Mutex + flock fileLock + slock *fileStorageLock + logw *os.File + logSize int64 + buf []byte // Opened file counter; if open < 0 means closed. open int day int } // OpenFile returns a new filesytem-backed storage implementation with the given -// path. This also hold a file lock, so any subsequent attempt to open the same -// path will fail. +// path. This also acquire a file lock, so any subsequent attempt to open the +// same path will fail. // // The storage must be closed after use, by calling Close method. -func OpenFile(path string) (Storage, error) { - if err := os.MkdirAll(path, 0755); err != nil { +func OpenFile(path string, readOnly bool) (Storage, error) { + if fi, err := os.Stat(path); err == nil { + if !fi.IsDir() { + return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path) + } + } else if os.IsNotExist(err) && !readOnly { + if err := os.MkdirAll(path, 0755); err != nil { + return nil, err + } + } else { return nil, err } - flock, err := newFileLock(filepath.Join(path, "LOCK")) + flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly) if err != nil { return nil, err } @@ -76,23 +89,42 @@ func OpenFile(path string) (Storage, error) { } }() - rename(filepath.Join(path, "LOG"), filepath.Join(path, "LOG.old")) - logw, err := os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644) - if err != nil { - return nil, err + var ( + logw *os.File + logSize int64 + ) + if !readOnly { + logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return nil, err + } + logSize, err = logw.Seek(0, os.SEEK_END) + if err != nil { + logw.Close() + return nil, err + } } - fs := &fileStorage{path: path, flock: flock, logw: logw} + fs := &fileStorage{ + path: path, + readOnly: readOnly, + flock: flock, + logw: logw, + logSize: logSize, + } runtime.SetFinalizer(fs, (*fileStorage).Close) return fs, nil } -func (fs *fileStorage) Lock() (util.Releaser, error) { +func (fs *fileStorage) Lock() (Lock, error) { fs.mu.Lock() defer fs.mu.Unlock() if fs.open < 0 { return nil, ErrClosed } + if fs.readOnly { + return &fileStorageLock{}, nil + } if fs.slock != nil { return nil, ErrLocked } @@ -101,7 +133,7 @@ func (fs *fileStorage) Lock() (util.Releaser, error) { } func itoa(buf []byte, i int, wid int) []byte { - var u uint = uint(i) + u := uint(i) if u == 0 && wid <= 1 { return append(buf, '0') } @@ -126,6 +158,22 @@ func (fs *fileStorage) printDay(t time.Time) { } func (fs *fileStorage) doLog(t time.Time, str string) { + if fs.logSize > logSizeThreshold { + // Rotate log file. + fs.logw.Close() + fs.logw = nil + fs.logSize = 0 + rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old")) + } + if fs.logw == nil { + var err error + fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644) + if err != nil { + return + } + // Force printDay on new log file. + fs.day = 0 + } fs.printDay(t) hour, min, sec := t.Clock() msec := t.Nanosecond() / 1e3 @@ -145,65 +193,71 @@ func (fs *fileStorage) doLog(t time.Time, str string) { } func (fs *fileStorage) Log(str string) { - t := time.Now() - fs.mu.Lock() - defer fs.mu.Unlock() - if fs.open < 0 { - return + if !fs.readOnly { + t := time.Now() + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return + } + fs.doLog(t, str) } - fs.doLog(t, str) } func (fs *fileStorage) log(str string) { - fs.doLog(time.Now(), str) + if !fs.readOnly { + fs.doLog(time.Now(), str) + } } -func (fs *fileStorage) GetFile(num uint64, t FileType) File { - return &file{fs: fs, num: num, t: t} -} +func (fs *fileStorage) SetMeta(fd FileDesc) (err error) { + if !FileDescOk(fd) { + return ErrInvalidFile + } + if fs.readOnly { + return errReadOnly + } -func (fs *fileStorage) GetFiles(t FileType) (ff []File, err error) { fs.mu.Lock() defer fs.mu.Unlock() if fs.open < 0 { - return nil, ErrClosed + return ErrClosed } - dir, err := os.Open(fs.path) - if err != nil { - return - } - fnn, err := dir.Readdirnames(0) - // Close the dir first before checking for Readdirnames error. - if err := dir.Close(); err != nil { - fs.log(fmt.Sprintf("close dir: %v", err)) - } - if err != nil { - return - } - f := &file{fs: fs} - for _, fn := range fnn { - if f.parse(fn) && (f.t&t) != 0 { - ff = append(ff, f) - f = &file{fs: fs} + defer func() { + if err != nil { + fs.log(fmt.Sprintf("CURRENT: %v", err)) } + }() + path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num) + w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return } - return + _, err = fmt.Fprintln(w, fsGenName(fd)) + // Close the file first. + if cerr := w.Close(); cerr != nil { + fs.log(fmt.Sprintf("close CURRENT.%d: %v", fd.Num, cerr)) + } + if err != nil { + return + } + return rename(path, filepath.Join(fs.path, "CURRENT")) } -func (fs *fileStorage) GetManifest() (f File, err error) { +func (fs *fileStorage) GetMeta() (fd FileDesc, err error) { fs.mu.Lock() defer fs.mu.Unlock() if fs.open < 0 { - return nil, ErrClosed + return FileDesc{}, ErrClosed } dir, err := os.Open(fs.path) if err != nil { return } - fnn, err := dir.Readdirnames(0) + names, err := dir.Readdirnames(0) // Close the dir first before checking for Readdirnames error. - if err := dir.Close(); err != nil { - fs.log(fmt.Sprintf("close dir: %v", err)) + if ce := dir.Close(); ce != nil { + fs.log(fmt.Sprintf("close dir: %v", ce)) } if err != nil { return @@ -212,58 +266,64 @@ func (fs *fileStorage) GetManifest() (f File, err error) { var rem []string var pend bool var cerr error - for _, fn := range fnn { - if strings.HasPrefix(fn, "CURRENT") { - pend1 := len(fn) > 7 + for _, name := range names { + if strings.HasPrefix(name, "CURRENT") { + pend1 := len(name) > 7 + var pendNum int64 // Make sure it is valid name for a CURRENT file, otherwise skip it. if pend1 { - if fn[7] != '.' || len(fn) < 9 { - fs.log(fmt.Sprintf("skipping %s: invalid file name", fn)) + if name[7] != '.' || len(name) < 9 { + fs.log(fmt.Sprintf("skipping %s: invalid file name", name)) continue } - if _, e1 := strconv.ParseUint(fn[8:], 10, 0); e1 != nil { - fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", fn, e1)) + var e1 error + if pendNum, e1 = strconv.ParseInt(name[8:], 10, 0); e1 != nil { + fs.log(fmt.Sprintf("skipping %s: invalid file num: %v", name, e1)) continue } } - path := filepath.Join(fs.path, fn) + path := filepath.Join(fs.path, name) r, e1 := os.OpenFile(path, os.O_RDONLY, 0) if e1 != nil { - return nil, e1 + return FileDesc{}, e1 } b, e1 := ioutil.ReadAll(r) if e1 != nil { r.Close() - return nil, e1 + return FileDesc{}, e1 } - f1 := &file{fs: fs} - if len(b) < 1 || b[len(b)-1] != '\n' || !f1.parse(string(b[:len(b)-1])) { - fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", fn)) + var fd1 FileDesc + if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd1) { + fs.log(fmt.Sprintf("skipping %s: corrupted or incomplete", name)) if pend1 { - rem = append(rem, fn) + rem = append(rem, name) } if !pend1 || cerr == nil { + metaFd, _ := fsParseName(name) cerr = &ErrCorrupted{ - File: fsParseName(filepath.Base(fn)), - Err: errors.New("leveldb/storage: corrupted or incomplete manifest file"), + Fd: metaFd, + Err: errors.New("leveldb/storage: corrupted or incomplete meta file"), } } - } else if f != nil && f1.Num() < f.Num() { - fs.log(fmt.Sprintf("skipping %s: obsolete", fn)) + } else if pend1 && pendNum != fd1.Num { + fs.log(fmt.Sprintf("skipping %s: inconsistent pending-file num: %d vs %d", name, pendNum, fd1.Num)) + rem = append(rem, name) + } else if fd1.Num < fd.Num { + fs.log(fmt.Sprintf("skipping %s: obsolete", name)) if pend1 { - rem = append(rem, fn) + rem = append(rem, name) } } else { - f = f1 + fd = fd1 pend = pend1 } if err := r.Close(); err != nil { - fs.log(fmt.Sprintf("close %s: %v", fn, err)) + fs.log(fmt.Sprintf("close %s: %v", name, err)) } } } // Don't remove any files if there is no valid CURRENT file. - if f == nil { + if fd.Nil() { if cerr != nil { err = cerr } else { @@ -271,52 +331,140 @@ func (fs *fileStorage) GetManifest() (f File, err error) { } return } - // Rename pending CURRENT file to an effective CURRENT. - if pend { - path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), f.Num()) - if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil { - fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", f.Num(), err)) + if !fs.readOnly { + // Rename pending CURRENT file to an effective CURRENT. + if pend { + path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num) + if err := rename(path, filepath.Join(fs.path, "CURRENT")); err != nil { + fs.log(fmt.Sprintf("CURRENT.%d -> CURRENT: %v", fd.Num, err)) + } } - } - // Remove obsolete or incomplete pending CURRENT files. - for _, fn := range rem { - path := filepath.Join(fs.path, fn) - if err := os.Remove(path); err != nil { - fs.log(fmt.Sprintf("remove %s: %v", fn, err)) + // Remove obsolete or incomplete pending CURRENT files. + for _, name := range rem { + path := filepath.Join(fs.path, name) + if err := os.Remove(path); err != nil { + fs.log(fmt.Sprintf("remove %s: %v", name, err)) + } } } return } -func (fs *fileStorage) SetManifest(f File) (err error) { +func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) { + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + dir, err := os.Open(fs.path) + if err != nil { + return + } + names, err := dir.Readdirnames(0) + // Close the dir first before checking for Readdirnames error. + if cerr := dir.Close(); cerr != nil { + fs.log(fmt.Sprintf("close dir: %v", cerr)) + } + if err == nil { + for _, name := range names { + if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 { + fds = append(fds, fd) + } + } + } + return +} + +func (fs *fileStorage) Open(fd FileDesc) (Reader, error) { + if !FileDescOk(fd) { + return nil, ErrInvalidFile + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0) + if err != nil { + if fsHasOldName(fd) && os.IsNotExist(err) { + of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0) + if err == nil { + goto ok + } + } + return nil, err + } +ok: + fs.open++ + return &fileWrap{File: of, fs: fs, fd: fd}, nil +} + +func (fs *fileStorage) Create(fd FileDesc) (Writer, error) { + if !FileDescOk(fd) { + return nil, ErrInvalidFile + } + if fs.readOnly { + return nil, errReadOnly + } + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return nil, ErrClosed + } + of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return nil, err + } + fs.open++ + return &fileWrap{File: of, fs: fs, fd: fd}, nil +} + +func (fs *fileStorage) Remove(fd FileDesc) error { + if !FileDescOk(fd) { + return ErrInvalidFile + } + if fs.readOnly { + return errReadOnly + } + fs.mu.Lock() defer fs.mu.Unlock() if fs.open < 0 { return ErrClosed } - f2, ok := f.(*file) - if !ok || f2.t != TypeManifest { + err := os.Remove(filepath.Join(fs.path, fsGenName(fd))) + if err != nil { + if fsHasOldName(fd) && os.IsNotExist(err) { + if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) { + fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err)) + err = e1 + } + } else { + fs.log(fmt.Sprintf("remove %s: %v", fd, err)) + } + } + return err +} + +func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error { + if !FileDescOk(oldfd) || !FileDescOk(newfd) { return ErrInvalidFile } - defer func() { - if err != nil { - fs.log(fmt.Sprintf("CURRENT: %v", err)) - } - }() - path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), f2.Num()) - w, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) - if err != nil { - return err + if oldfd == newfd { + return nil } - _, err = fmt.Fprintln(w, f2.name()) - // Close the file first. - if err := w.Close(); err != nil { - fs.log(fmt.Sprintf("close CURRENT.%d: %v", f2.num, err)) + if fs.readOnly { + return errReadOnly } - if err != nil { - return err + + fs.mu.Lock() + defer fs.mu.Unlock() + if fs.open < 0 { + return ErrClosed } - return rename(path, filepath.Join(fs.path, "CURRENT")) + return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd))) } func (fs *fileStorage) Close() error { @@ -332,212 +480,104 @@ func (fs *fileStorage) Close() error { fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open)) } fs.open = -1 - e1 := fs.logw.Close() - err := fs.flock.release() - if err == nil { - err = e1 + if fs.logw != nil { + fs.logw.Close() } - return err + return fs.flock.release() } type fileWrap struct { *os.File - f *file + fs *fileStorage + fd FileDesc + closed bool } -func (fw fileWrap) Sync() error { +func (fw *fileWrap) Sync() error { if err := fw.File.Sync(); err != nil { return err } - if fw.f.Type() == TypeManifest { + if fw.fd.Type == TypeManifest { // Also sync parent directory if file type is manifest. // See: https://code.google.com/p/leveldb/issues/detail?id=190. - if err := syncDir(fw.f.fs.path); err != nil { + if err := syncDir(fw.fs.path); err != nil { + fw.fs.log(fmt.Sprintf("syncDir: %v", err)) return err } } return nil } -func (fw fileWrap) Close() error { - f := fw.f - f.fs.mu.Lock() - defer f.fs.mu.Unlock() - if !f.open { +func (fw *fileWrap) Close() error { + fw.fs.mu.Lock() + defer fw.fs.mu.Unlock() + if fw.closed { return ErrClosed } - f.open = false - f.fs.open-- + fw.closed = true + fw.fs.open-- err := fw.File.Close() if err != nil { - f.fs.log(fmt.Sprintf("close %s.%d: %v", f.Type(), f.Num(), err)) + fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err)) } return err } -type file struct { - fs *fileStorage - num uint64 - t FileType - open bool -} - -func (f *file) Open() (Reader, error) { - f.fs.mu.Lock() - defer f.fs.mu.Unlock() - if f.fs.open < 0 { - return nil, ErrClosed - } - if f.open { - return nil, errFileOpen - } - of, err := os.OpenFile(f.path(), os.O_RDONLY, 0) - if err != nil { - if f.hasOldName() && os.IsNotExist(err) { - of, err = os.OpenFile(f.oldPath(), os.O_RDONLY, 0) - if err == nil { - goto ok - } - } - return nil, err - } -ok: - f.open = true - f.fs.open++ - return fileWrap{of, f}, nil -} - -func (f *file) Create() (Writer, error) { - f.fs.mu.Lock() - defer f.fs.mu.Unlock() - if f.fs.open < 0 { - return nil, ErrClosed - } - if f.open { - return nil, errFileOpen - } - of, err := os.OpenFile(f.path(), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) - if err != nil { - return nil, err - } - f.open = true - f.fs.open++ - return fileWrap{of, f}, nil -} - -func (f *file) Replace(newfile File) error { - f.fs.mu.Lock() - defer f.fs.mu.Unlock() - if f.fs.open < 0 { - return ErrClosed - } - newfile2, ok := newfile.(*file) - if !ok { - return ErrInvalidFile - } - if f.open || newfile2.open { - return errFileOpen - } - return rename(newfile2.path(), f.path()) -} - -func (f *file) Type() FileType { - return f.t -} - -func (f *file) Num() uint64 { - return f.num -} - -func (f *file) Remove() error { - f.fs.mu.Lock() - defer f.fs.mu.Unlock() - if f.fs.open < 0 { - return ErrClosed - } - if f.open { - return errFileOpen - } - err := os.Remove(f.path()) - if err != nil { - f.fs.log(fmt.Sprintf("remove %s.%d: %v", f.Type(), f.Num(), err)) - } - // Also try remove file with old name, just in case. - if f.hasOldName() { - if e1 := os.Remove(f.oldPath()); !os.IsNotExist(e1) { - f.fs.log(fmt.Sprintf("remove %s.%d: %v (old name)", f.Type(), f.Num(), err)) - err = e1 - } - } - return err -} - -func (f *file) hasOldName() bool { - return f.t == TypeTable -} - -func (f *file) oldName() string { - switch f.t { - case TypeTable: - return fmt.Sprintf("%06d.sst", f.num) - } - return f.name() -} - -func (f *file) oldPath() string { - return filepath.Join(f.fs.path, f.oldName()) -} - -func (f *file) name() string { - switch f.t { +func fsGenName(fd FileDesc) string { + switch fd.Type { case TypeManifest: - return fmt.Sprintf("MANIFEST-%06d", f.num) + return fmt.Sprintf("MANIFEST-%06d", fd.Num) case TypeJournal: - return fmt.Sprintf("%06d.log", f.num) + return fmt.Sprintf("%06d.log", fd.Num) case TypeTable: - return fmt.Sprintf("%06d.ldb", f.num) + return fmt.Sprintf("%06d.ldb", fd.Num) case TypeTemp: - return fmt.Sprintf("%06d.tmp", f.num) + return fmt.Sprintf("%06d.tmp", fd.Num) default: panic("invalid file type") } } -func (f *file) path() string { - return filepath.Join(f.fs.path, f.name()) +func fsHasOldName(fd FileDesc) bool { + return fd.Type == TypeTable } -func fsParseName(name string) *FileInfo { - fi := &FileInfo{} +func fsGenOldName(fd FileDesc) string { + switch fd.Type { + case TypeTable: + return fmt.Sprintf("%06d.sst", fd.Num) + } + return fsGenName(fd) +} + +func fsParseName(name string) (fd FileDesc, ok bool) { var tail string - _, err := fmt.Sscanf(name, "%d.%s", &fi.Num, &tail) + _, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail) if err == nil { switch tail { case "log": - fi.Type = TypeJournal + fd.Type = TypeJournal case "ldb", "sst": - fi.Type = TypeTable + fd.Type = TypeTable case "tmp": - fi.Type = TypeTemp + fd.Type = TypeTemp default: - return nil + return } - return fi + return fd, true } - n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fi.Num, &tail) + n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail) if n == 1 { - fi.Type = TypeManifest - return fi + fd.Type = TypeManifest + return fd, true } - return nil + return } -func (f *file) parse(name string) bool { - fi := fsParseName(name) - if fi == nil { - return false +func fsParseNamePtr(name string, fd *FileDesc) bool { + _fd, ok := fsParseName(name) + if fd != nil { + *fd = _fd } - f.t = fi.Type - f.num = fi.Num - return true + return ok } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go new file mode 100644 index 000000000..5545aeef2 --- /dev/null +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go @@ -0,0 +1,34 @@ +// Copyright (c) 2012, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// +build nacl + +package storage + +import ( + "os" + "syscall" +) + +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + return nil, syscall.ENOTSUP +} + +func setFileLock(f *os.File, readOnly, lock bool) error { + return syscall.ENOTSUP +} + +func rename(oldpath, newpath string) error { + return syscall.ENOTSUP +} + +func isErrInvalid(err error) bool { + return false +} + +func syncDir(name string) error { + return syscall.ENOTSUP +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go index 42940d769..bab62bfce 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go @@ -19,8 +19,21 @@ func (fl *plan9FileLock) release() error { return fl.f.Close() } -func newFileLock(path string) (fl fileLock, err error) { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, os.ModeExclusive|0644) +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + var ( + flag int + perm os.FileMode + ) + if readOnly { + flag = os.O_RDONLY + } else { + flag = os.O_RDWR + perm = os.ModeExclusive + } + f, err := os.OpenFile(path, flag, perm) + if os.IsNotExist(err) { + f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644) + } if err != nil { return } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go index 102031bfd..79901ee4a 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go @@ -18,18 +18,27 @@ type unixFileLock struct { } func (fl *unixFileLock) release() error { - if err := setFileLock(fl.f, false); err != nil { + if err := setFileLock(fl.f, false, false); err != nil { return err } return fl.f.Close() } -func newFileLock(path string) (fl fileLock, err error) { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644) +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + var flag int + if readOnly { + flag = os.O_RDONLY + } else { + flag = os.O_RDWR + } + f, err := os.OpenFile(path, flag, 0) + if os.IsNotExist(err) { + f, err = os.OpenFile(path, flag|os.O_CREATE, 0644) + } if err != nil { return } - err = setFileLock(f, true) + err = setFileLock(f, readOnly, true) if err != nil { f.Close() return @@ -38,7 +47,7 @@ func newFileLock(path string) (fl fileLock, err error) { return } -func setFileLock(f *os.File, lock bool) error { +func setFileLock(f *os.File, readOnly, lock bool) error { flock := syscall.Flock_t{ Type: syscall.F_UNLCK, Start: 0, @@ -46,7 +55,11 @@ func setFileLock(f *os.File, lock bool) error { Whence: 1, } if lock { - flock.Type = syscall.F_WRLCK + if readOnly { + flock.Type = syscall.F_RDLCK + } else { + flock.Type = syscall.F_WRLCK + } } return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go index 6eb327421..7e2991537 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go @@ -18,18 +18,27 @@ type unixFileLock struct { } func (fl *unixFileLock) release() error { - if err := setFileLock(fl.f, false); err != nil { + if err := setFileLock(fl.f, false, false); err != nil { return err } return fl.f.Close() } -func newFileLock(path string) (fl fileLock, err error) { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0644) +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { + var flag int + if readOnly { + flag = os.O_RDONLY + } else { + flag = os.O_RDWR + } + f, err := os.OpenFile(path, flag, 0) + if os.IsNotExist(err) { + f, err = os.OpenFile(path, flag|os.O_CREATE, 0644) + } if err != nil { return } - err = setFileLock(f, true) + err = setFileLock(f, readOnly, true) if err != nil { f.Close() return @@ -38,10 +47,14 @@ func newFileLock(path string) (fl fileLock, err error) { return } -func setFileLock(f *os.File, lock bool) error { +func setFileLock(f *os.File, readOnly, lock bool) error { how := syscall.LOCK_UN if lock { - how = syscall.LOCK_EX + if readOnly { + how = syscall.LOCK_SH + } else { + how = syscall.LOCK_EX + } } return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go index 50c3c454e..899335fd7 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go @@ -29,12 +29,22 @@ func (fl *windowsFileLock) release() error { return syscall.Close(fl.fd) } -func newFileLock(path string) (fl fileLock, err error) { +func newFileLock(path string, readOnly bool) (fl fileLock, err error) { pathp, err := syscall.UTF16PtrFromString(path) if err != nil { return } - fd, err := syscall.CreateFile(pathp, syscall.GENERIC_READ|syscall.GENERIC_WRITE, 0, nil, syscall.CREATE_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0) + var access, shareMode uint32 + if readOnly { + access = syscall.GENERIC_READ + shareMode = syscall.FILE_SHARE_READ + } else { + access = syscall.GENERIC_READ | syscall.GENERIC_WRITE + } + fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0) + if err == syscall.ERROR_FILE_NOT_FOUND { + fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0) + } if err != nil { return } @@ -47,9 +57,8 @@ func moveFileEx(from *uint16, to *uint16, flags uint32) error { if r1 == 0 { if e1 != 0 { return error(e1) - } else { - return syscall.EINVAL } + return syscall.EINVAL } return nil } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go index fc1c8165d..9b70e1513 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go @@ -10,8 +10,6 @@ import ( "bytes" "os" "sync" - - "github.com/syndtr/goleveldb/leveldb/util" ) const typeShift = 3 @@ -32,10 +30,10 @@ func (lock *memStorageLock) Release() { // memStorage is a memory-backed storage. type memStorage struct { - mu sync.Mutex - slock *memStorageLock - files map[uint64]*memFile - manifest *memFilePtr + mu sync.Mutex + slock *memStorageLock + files map[uint64]*memFile + meta FileDesc } // NewMemStorage returns a new memory-backed storage implementation. @@ -45,7 +43,7 @@ func NewMemStorage() Storage { } } -func (ms *memStorage) Lock() (util.Releaser, error) { +func (ms *memStorage) Lock() (Lock, error) { ms.mu.Lock() defer ms.mu.Unlock() if ms.slock != nil { @@ -57,147 +55,164 @@ func (ms *memStorage) Lock() (util.Releaser, error) { func (*memStorage) Log(str string) {} -func (ms *memStorage) GetFile(num uint64, t FileType) File { - return &memFilePtr{ms: ms, num: num, t: t} -} - -func (ms *memStorage) GetFiles(t FileType) ([]File, error) { - ms.mu.Lock() - var ff []File - for x, _ := range ms.files { - num, mt := x>>typeShift, FileType(x)&TypeAll - if mt&t == 0 { - continue - } - ff = append(ff, &memFilePtr{ms: ms, num: num, t: mt}) - } - ms.mu.Unlock() - return ff, nil -} - -func (ms *memStorage) GetManifest() (File, error) { - ms.mu.Lock() - defer ms.mu.Unlock() - if ms.manifest == nil { - return nil, os.ErrNotExist - } - return ms.manifest, nil -} - -func (ms *memStorage) SetManifest(f File) error { - fm, ok := f.(*memFilePtr) - if !ok || fm.t != TypeManifest { +func (ms *memStorage) SetMeta(fd FileDesc) error { + if !FileDescOk(fd) { return ErrInvalidFile } + ms.mu.Lock() - ms.manifest = fm + ms.meta = fd ms.mu.Unlock() return nil } -func (*memStorage) Close() error { return nil } - -type memReader struct { - *bytes.Reader - m *memFile -} - -func (mr *memReader) Close() error { - return mr.m.Close() -} - -type memFile struct { - bytes.Buffer - ms *memStorage - open bool -} - -func (*memFile) Sync() error { return nil } -func (m *memFile) Close() error { - m.ms.mu.Lock() - m.open = false - m.ms.mu.Unlock() - return nil -} - -type memFilePtr struct { - ms *memStorage - num uint64 - t FileType -} - -func (p *memFilePtr) x() uint64 { - return p.Num()<> typeShift)} +} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go index a4e037ca1..9b30b6727 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go @@ -15,7 +15,7 @@ import ( "github.com/syndtr/goleveldb/leveldb/util" ) -type FileType uint32 +type FileType int const ( TypeManifest FileType = 1 << iota @@ -50,13 +50,13 @@ var ( // a file. Package storage has its own type instead of using // errors.ErrCorrupted to prevent circular import. type ErrCorrupted struct { - File *FileInfo - Err error + Fd FileDesc + Err error } func (e *ErrCorrupted) Error() string { - if e.File != nil { - return fmt.Sprintf("%v [file=%v]", e.Err, e.File) + if !e.Fd.Nil() { + return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd) } else { return e.Err.Error() } @@ -83,31 +83,47 @@ type Writer interface { Syncer } -// File is the file. A file instance must be goroutine-safe. -type File interface { - // Open opens the file for read. Returns os.ErrNotExist error - // if the file does not exist. - // Returns ErrClosed if the underlying storage is closed. - Open() (r Reader, err error) +type Lock interface { + util.Releaser +} - // Create creates the file for writting. Truncate the file if - // already exist. - // Returns ErrClosed if the underlying storage is closed. - Create() (w Writer, err error) +// FileDesc is a file descriptor. +type FileDesc struct { + Type FileType + Num int64 +} - // Replace replaces file with newfile. - // Returns ErrClosed if the underlying storage is closed. - Replace(newfile File) error +func (fd FileDesc) String() string { + switch fd.Type { + case TypeManifest: + return fmt.Sprintf("MANIFEST-%06d", fd.Num) + case TypeJournal: + return fmt.Sprintf("%06d.log", fd.Num) + case TypeTable: + return fmt.Sprintf("%06d.ldb", fd.Num) + case TypeTemp: + return fmt.Sprintf("%06d.tmp", fd.Num) + default: + return fmt.Sprintf("%#x-%d", fd.Type, fd.Num) + } +} - // Type returns the file type - Type() FileType +// Nil returns true if fd == (FileDesc{}). +func (fd FileDesc) Nil() bool { + return fd == (FileDesc{}) +} - // Num returns the file number. - Num() uint64 - - // Remove removes the file. - // Returns ErrClosed if the underlying storage is closed. - Remove() error +// FileDescOk returns true if fd is a valid file descriptor. +func FileDescOk(fd FileDesc) bool { + switch fd.Type { + case TypeManifest: + case TypeJournal: + case TypeTable: + case TypeTemp: + default: + return false + } + return fd.Num >= 0 } // Storage is the storage. A storage instance must be goroutine-safe. @@ -115,59 +131,47 @@ type Storage interface { // Lock locks the storage. Any subsequent attempt to call Lock will fail // until the last lock released. // After use the caller should call the Release method. - Lock() (l util.Releaser, err error) + Lock() (Lock, error) - // Log logs a string. This is used for logging. An implementation - // may write to a file, stdout or simply do nothing. + // Log logs a string. This is used for logging. + // An implementation may write to a file, stdout or simply do nothing. Log(str string) - // GetFile returns a file for the given number and type. GetFile will never - // returns nil, even if the underlying storage is closed. - GetFile(num uint64, t FileType) File + // SetMeta sets to point to the given fd, which then can be acquired using + // GetMeta method. + // SetMeta should be implemented in such way that changes should happened + // atomically. + SetMeta(fd FileDesc) error - // GetFiles returns a slice of files that match the given file types. + // GetManifest returns a manifest file. + // Returns os.ErrNotExist if meta doesn't point to any fd, or point to fd + // that doesn't exist. + GetMeta() (FileDesc, error) + + // List returns fds that match the given file types. // The file types may be OR'ed together. - GetFiles(t FileType) ([]File, error) + List(ft FileType) ([]FileDesc, error) - // GetManifest returns a manifest file. Returns os.ErrNotExist if manifest - // file does not exist. - GetManifest() (File, error) + // Open opens file with the given fd read-only. + // Returns os.ErrNotExist error if the file does not exist. + // Returns ErrClosed if the underlying storage is closed. + Open(fd FileDesc) (Reader, error) - // SetManifest sets the given file as manifest file. The given file should - // be a manifest file type or error will be returned. - SetManifest(f File) error + // Create creates file with the given fd, truncate if already exist and + // opens write-only. + // Returns ErrClosed if the underlying storage is closed. + Create(fd FileDesc) (Writer, error) - // Close closes the storage. It is valid to call Close multiple times. - // Other methods should not be called after the storage has been closed. + // Remove removes file with the given fd. + // Returns ErrClosed if the underlying storage is closed. + Remove(fd FileDesc) error + + // Rename renames file from oldfd to newfd. + // Returns ErrClosed if the underlying storage is closed. + Rename(oldfd, newfd FileDesc) error + + // Close closes the storage. + // It is valid to call Close multiple times. Other methods should not be + // called after the storage has been closed. Close() error } - -// FileInfo wraps basic file info. -type FileInfo struct { - Type FileType - Num uint64 -} - -func (fi FileInfo) String() string { - switch fi.Type { - case TypeManifest: - return fmt.Sprintf("MANIFEST-%06d", fi.Num) - case TypeJournal: - return fmt.Sprintf("%06d.log", fi.Num) - case TypeTable: - return fmt.Sprintf("%06d.ldb", fi.Num) - case TypeTemp: - return fmt.Sprintf("%06d.tmp", fi.Num) - default: - return fmt.Sprintf("%#x-%d", fi.Type, fi.Num) - } -} - -// NewFileInfo creates new FileInfo from the given File. It will returns nil -// if File is nil. -func NewFileInfo(f File) *FileInfo { - if f == nil { - return nil - } - return &FileInfo{f.Type(), f.Num()} -} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go index 37be47aeb..310ba6c22 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/table.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go @@ -21,10 +21,10 @@ import ( // tFile holds basic information about a table. type tFile struct { - file storage.File + fd storage.FileDesc seekLeft int32 - size uint64 - imin, imax iKey + size int64 + imin, imax internalKey } // Returns true if given key is after largest key of this table. @@ -48,9 +48,9 @@ func (t *tFile) consumeSeek() int32 { } // Creates new tFile. -func newTableFile(file storage.File, size uint64, imin, imax iKey) *tFile { +func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile { f := &tFile{ - file: file, + fd: fd, size: size, imin: imin, imax: imax, @@ -77,6 +77,10 @@ func newTableFile(file storage.File, size uint64, imin, imax iKey) *tFile { return f } +func tableFileFromRecord(r atRecord) *tFile { + return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax) +} + // tFiles hold multiple tFile. type tFiles []*tFile @@ -89,7 +93,7 @@ func (tf tFiles) nums() string { if i != 0 { x += ", " } - x += fmt.Sprint(f.file.Num()) + x += fmt.Sprint(f.fd.Num) } x += " ]" return x @@ -101,7 +105,7 @@ func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool { a, b := tf[i], tf[j] n := icmp.Compare(a.imin, b.imin) if n == 0 { - return a.file.Num() < b.file.Num() + return a.fd.Num < b.fd.Num } return n < 0 } @@ -109,7 +113,7 @@ func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool { // Returns true if i file number is greater than j. // This used for sort by file number in descending order. func (tf tFiles) lessByNum(i, j int) bool { - return tf[i].file.Num() > tf[j].file.Num() + return tf[i].fd.Num > tf[j].fd.Num } // Sorts tables by key in ascending order. @@ -123,7 +127,7 @@ func (tf tFiles) sortByNum() { } // Returns sum of all tables size. -func (tf tFiles) size() (sum uint64) { +func (tf tFiles) size() (sum int64) { for _, t := range tf { sum += t.size } @@ -132,7 +136,7 @@ func (tf tFiles) size() (sum uint64) { // Searches smallest index of tables whose its smallest // key is after or equal with given key. -func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int { +func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int { return sort.Search(len(tf), func(i int) bool { return icmp.Compare(tf[i].imin, ikey) >= 0 }) @@ -140,7 +144,7 @@ func (tf tFiles) searchMin(icmp *iComparer, ikey iKey) int { // Searches smallest index of tables whose its largest // key is after or equal with given key. -func (tf tFiles) searchMax(icmp *iComparer, ikey iKey) int { +func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int { return sort.Search(len(tf), func(i int) bool { return icmp.Compare(tf[i].imax, ikey) >= 0 }) @@ -162,7 +166,7 @@ func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) boo i := 0 if len(umin) > 0 { // Find the earliest possible internal key for min. - i = tf.searchMax(icmp, newIkey(umin, kMaxSeq, ktSeek)) + i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek)) } if i >= len(tf) { // Beginning of range is after all files, so no overlap. @@ -205,7 +209,7 @@ func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, ove } // Returns tables key range. -func (tf tFiles) getRange(icmp *iComparer) (imin, imax iKey) { +func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) { for i, t := range tf { if i == 0 { imin, imax = t.imin, t.imax @@ -227,10 +231,10 @@ func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range if slice != nil { var start, limit int if slice.Start != nil { - start = tf.searchMax(icmp, iKey(slice.Start)) + start = tf.searchMax(icmp, internalKey(slice.Start)) } if slice.Limit != nil { - limit = tf.searchMin(icmp, iKey(slice.Limit)) + limit = tf.searchMin(icmp, internalKey(slice.Limit)) } else { limit = tf.Len() } @@ -255,7 +259,7 @@ type tFilesArrayIndexer struct { } func (a *tFilesArrayIndexer) Search(key []byte) int { - return a.searchMax(a.icmp, iKey(key)) + return a.searchMax(a.icmp, internalKey(key)) } func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator { @@ -295,16 +299,16 @@ type tOps struct { // Creates an empty table and returns table writer. func (t *tOps) create() (*tWriter, error) { - file := t.s.getTableFile(t.s.allocFileNum()) - fw, err := file.Create() + fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()} + fw, err := t.s.stor.Create(fd) if err != nil { return nil, err } return &tWriter{ - t: t, - file: file, - w: fw, - tw: table.NewWriter(fw, t.s.o.Options), + t: t, + fd: fd, + w: fw, + tw: table.NewWriter(fw, t.s.o.Options), }, nil } @@ -340,21 +344,20 @@ func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) { // Opens table. It returns a cache handle, which should // be released after use. func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) { - num := f.file.Num() - ch = t.cache.Get(0, num, func() (size int, value cache.Value) { + ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) { var r storage.Reader - r, err = f.file.Open() + r, err = t.s.stor.Open(f.fd) if err != nil { return 0, nil } - var bcache *cache.CacheGetter + var bcache *cache.NamespaceGetter if t.bcache != nil { - bcache = &cache.CacheGetter{Cache: t.bcache, NS: num} + bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)} } var tr *table.Reader - tr, err = table.NewReader(r, int64(f.size), storage.NewFileInfo(f.file), bcache, t.bpool, t.s.o.Options) + tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options) if err != nil { r.Close() return 0, nil @@ -390,14 +393,13 @@ func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, } // Returns approximate offset of the given key. -func (t *tOps) offsetOf(f *tFile, key []byte) (offset uint64, err error) { +func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) { ch, err := t.open(f) if err != nil { return } defer ch.Release() - offset_, err := ch.Value().(*table.Reader).OffsetOf(key) - return uint64(offset_), err + return ch.Value().(*table.Reader).OffsetOf(key) } // Creates an iterator from the given table. @@ -414,15 +416,14 @@ func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) ite // Removes table from persistent storage. It waits until // no one use the the table. func (t *tOps) remove(f *tFile) { - num := f.file.Num() - t.cache.Delete(0, num, func() { - if err := f.file.Remove(); err != nil { - t.s.logf("table@remove removing @%d %q", num, err) + t.cache.Delete(0, uint64(f.fd.Num), func() { + if err := t.s.stor.Remove(f.fd); err != nil { + t.s.logf("table@remove removing @%d %q", f.fd.Num, err) } else { - t.s.logf("table@remove removed @%d", num) + t.s.logf("table@remove removed @%d", f.fd.Num) } if t.bcache != nil { - t.bcache.EvictNS(num) + t.bcache.EvictNS(uint64(f.fd.Num)) } }) } @@ -471,9 +472,9 @@ func newTableOps(s *session) *tOps { type tWriter struct { t *tOps - file storage.File - w storage.Writer - tw *table.Writer + fd storage.FileDesc + w storage.Writer + tw *table.Writer first, last []byte } @@ -513,16 +514,15 @@ func (w *tWriter) finish() (f *tFile, err error) { return } } - f = newTableFile(w.file, uint64(w.tw.BytesLen()), iKey(w.first), iKey(w.last)) + f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last)) return } // Drops the table. func (w *tWriter) drop() { w.close() - w.file.Remove() - w.t.s.reuseFileNum(w.file.Num()) - w.file = nil + w.t.s.stor.Remove(w.fd) + w.t.s.reuseFileNum(w.fd.Num) w.tw = nil w.first = nil w.last = nil diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go index 23c7c6129..ae61bece9 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go @@ -507,9 +507,9 @@ func (i *indexIter) Get() iterator.Iterator { // Reader is a table reader. type Reader struct { mu sync.RWMutex - fi *storage.FileInfo + fd storage.FileDesc reader io.ReaderAt - cache *cache.CacheGetter + cache *cache.NamespaceGetter err error bpool *util.BufferPool // Options @@ -539,7 +539,7 @@ func (r *Reader) blockKind(bh blockHandle) string { } func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error { - return &errors.ErrCorrupted{File: r.fi, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}} + return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}} } func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error { @@ -551,7 +551,7 @@ func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error { cerr.Pos = int64(bh.offset) cerr.Size = int64(bh.length) cerr.Kind = r.blockKind(bh) - return &errors.ErrCorrupted{File: r.fi, Err: cerr} + return &errors.ErrCorrupted{Fd: r.fd, Err: cerr} } return err } @@ -988,13 +988,13 @@ func (r *Reader) Release() { // The fi, cache and bpool is optional and can be nil. // // The returned table reader instance is goroutine-safe. -func NewReader(f io.ReaderAt, size int64, fi *storage.FileInfo, cache *cache.CacheGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { +func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) { if f == nil { return nil, errors.New("leveldb/table: nil file") } r := &Reader{ - fi: fi, + fd: fd, reader: f, cache: cache, bpool: bpool, diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util.go index 1a5bf71a3..3b663d1cc 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/util.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util.go @@ -72,20 +72,20 @@ func maxInt(a, b int) int { return b } -type files []storage.File +type fdSorter []storage.FileDesc -func (p files) Len() int { +func (p fdSorter) Len() int { return len(p) } -func (p files) Less(i, j int) bool { - return p[i].Num() < p[j].Num() +func (p fdSorter) Less(i, j int) bool { + return p[i].Num < p[j].Num } -func (p files) Swap(i, j int) { +func (p fdSorter) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p files) sort() { - sort.Sort(p) +func sortFds(fds []storage.FileDesc) { + sort.Sort(fdSorter(fds)) } diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go index 54903660f..7f3fa4e2c 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go @@ -7,38 +7,38 @@ package util import ( - "bytes" "encoding/binary" ) // Hash return hash of the given data. func Hash(data []byte, seed uint32) uint32 { // Similar to murmur hash - var m uint32 = 0xc6a4a793 - var r uint32 = 24 - h := seed ^ (uint32(len(data)) * m) + const ( + m = uint32(0xc6a4a793) + r = uint32(24) + ) + var ( + h = seed ^ (uint32(len(data)) * m) + i int + ) - buf := bytes.NewBuffer(data) - for buf.Len() >= 4 { - var w uint32 - binary.Read(buf, binary.LittleEndian, &w) - h += w + for n := len(data) - len(data)%4; i < n; i += 4 { + h += binary.LittleEndian.Uint32(data[i:]) h *= m h ^= (h >> 16) } - rest := buf.Bytes() - switch len(rest) { + switch len(data) - i { default: panic("not reached") case 3: - h += uint32(rest[2]) << 16 + h += uint32(data[i+2]) << 16 fallthrough case 2: - h += uint32(rest[1]) << 8 + h += uint32(data[i+1]) << 8 fallthrough case 1: - h += uint32(rest[0]) + h += uint32(data[i]) h *= m h ^= (h >> r) case 0: diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/pool.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/pool.go deleted file mode 100644 index 1f7fdd41f..000000000 --- a/vendor/github.com/syndtr/goleveldb/leveldb/util/pool.go +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) 2014, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// +build go1.3 - -package util - -import ( - "sync" -) - -type Pool struct { - sync.Pool -} - -func NewPool(cap int) *Pool { - return &Pool{} -} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go deleted file mode 100644 index 27b8d03be..000000000 --- a/vendor/github.com/syndtr/goleveldb/leveldb/util/pool_legacy.go +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2014, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// +build !go1.3 - -package util - -type Pool struct { - pool chan interface{} -} - -func (p *Pool) Get() interface{} { - select { - case x := <-p.pool: - return x - default: - return nil - } -} - -func (p *Pool) Put(x interface{}) { - select { - case p.pool <- x: - default: - } -} - -func NewPool(cap int) *Pool { - return &Pool{pool: make(chan interface{}, cap)} -} diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/version.go b/vendor/github.com/syndtr/goleveldb/leveldb/version.go index 011d982da..d274eeff2 100644 --- a/vendor/github.com/syndtr/goleveldb/leveldb/version.go +++ b/vendor/github.com/syndtr/goleveldb/leveldb/version.go @@ -7,6 +7,7 @@ package leveldb import ( + "fmt" "sync/atomic" "unsafe" @@ -23,7 +24,7 @@ type tSet struct { type version struct { s *session - tables []tFiles + levels []tFiles // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields @@ -39,7 +40,7 @@ type version struct { } func newVersion(s *session) *version { - return &version{s: s, tables: make([]tFiles, s.o.GetNumLevel())} + return &version{s: s} } func (v *version) releaseNB() { @@ -51,18 +52,18 @@ func (v *version) releaseNB() { panic("negative version ref") } - tables := make(map[uint64]bool) - for _, tt := range v.next.tables { + nextTables := make(map[int64]bool) + for _, tt := range v.next.levels { for _, t := range tt { - num := t.file.Num() - tables[num] = true + num := t.fd.Num + nextTables[num] = true } } - for _, tt := range v.tables { + for _, tt := range v.levels { for _, t := range tt { - num := t.file.Num() - if _, ok := tables[num]; !ok { + num := t.fd.Num + if _, ok := nextTables[num]; !ok { v.s.tops.remove(t) } } @@ -78,11 +79,26 @@ func (v *version) release() { v.s.vmu.Unlock() } -func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, lf func(level int) bool) { +func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) { ukey := ikey.ukey() + // Aux level. + if aux != nil { + for _, t := range aux { + if t.overlaps(v.s.icmp, ukey, ukey) { + if !f(-1, t) { + return + } + } + } + + if lf != nil && !lf(-1) { + return + } + } + // Walk tables level-by-level. - for level, tables := range v.tables { + for level, tables := range v.levels { if len(tables) == 0 { continue } @@ -114,7 +130,7 @@ func (v *version) walkOverlapping(ikey iKey, f func(level int, t *tFile) bool, l } } -func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) { +func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) { ukey := ikey.ukey() var ( @@ -124,16 +140,16 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt // Level-0. zfound bool zseq uint64 - zkt kType + zkt keyType zval []byte ) err = ErrNotFound - // Since entries never hope across level, finding key/value + // Since entries never hop across level, finding key/value // in smaller level make later levels irrelevant. - v.walkOverlapping(ikey, func(level int, t *tFile) bool { - if !tseek { + v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool { + if level >= 0 && !tseek { if tset == nil { tset = &tSet{level, t} } else { @@ -150,6 +166,7 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt } else { fikey, fval, ferr = v.s.tops.find(t, ikey, ro) } + switch ferr { case nil: case ErrNotFound: @@ -159,9 +176,10 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt return false } - if fukey, fseq, fkt, fkerr := parseIkey(fikey); fkerr == nil { + if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil { if v.s.icmp.uCompare(ukey, fukey) == 0 { - if level == 0 { + // Level <= 0 may overlaps each-other. + if level <= 0 { if fseq >= zseq { zfound = true zseq = fseq @@ -170,12 +188,12 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt } } else { switch fkt { - case ktVal: + case keyTypeVal: value = fval err = nil - case ktDel: + case keyTypeDel: default: - panic("leveldb: invalid iKey type") + panic("leveldb: invalid internalKey type") } return false } @@ -189,12 +207,12 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt }, func(level int) bool { if zfound { switch zkt { - case ktVal: + case keyTypeVal: value = zval err = nil - case ktDel: + case keyTypeDel: default: - panic("leveldb: invalid iKey type") + panic("leveldb: invalid internalKey type") } return false } @@ -209,46 +227,40 @@ func (v *version) get(ikey iKey, ro *opt.ReadOptions, noValue bool) (value []byt return } -func (v *version) sampleSeek(ikey iKey) (tcomp bool) { +func (v *version) sampleSeek(ikey internalKey) (tcomp bool) { var tset *tSet - v.walkOverlapping(ikey, func(level int, t *tFile) bool { + v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool { if tset == nil { tset = &tSet{level, t} return true - } else { - if tset.table.consumeSeek() <= 0 { - tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) - } - return false } + if tset.table.consumeSeek() <= 0 { + tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset)) + } + return false }, nil) return } func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) { - // Merge all level zero files together since they may overlap - for _, t := range v.tables[0] { - it := v.s.tops.newIterator(t, slice, ro) - its = append(its, it) - } - strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader) - for _, tables := range v.tables[1:] { - if len(tables) == 0 { - continue + for level, tables := range v.levels { + if level == 0 { + // Merge all level zero files together since they may overlap. + for _, t := range tables { + its = append(its, v.s.tops.newIterator(t, slice, ro)) + } + } else if len(tables) != 0 { + its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict)) } - - it := iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict) - its = append(its, it) } - return } func (v *version) newStaging() *versionStaging { - return &versionStaging{base: v, tables: make([]tablesScratch, v.s.o.GetNumLevel())} + return &versionStaging{base: v} } // Spawn a new version based on this version. @@ -259,19 +271,22 @@ func (v *version) spawn(r *sessionRecord) *version { } func (v *version) fillRecord(r *sessionRecord) { - for level, ts := range v.tables { - for _, t := range ts { + for level, tables := range v.levels { + for _, t := range tables { r.addTableFile(level, t) } } } func (v *version) tLen(level int) int { - return len(v.tables[level]) + if level < len(v.levels) { + return len(v.levels[level]) + } + return 0 } -func (v *version) offsetOf(ikey iKey) (n uint64, err error) { - for level, tables := range v.tables { +func (v *version) offsetOf(ikey internalKey) (n int64, err error) { + for level, tables := range v.levels { for _, t := range tables { if v.s.icmp.Compare(t.imax, ikey) <= 0 { // Entire file is before "ikey", so just add the file size @@ -287,12 +302,11 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) { } else { // "ikey" falls in the range for this table. Add the // approximate offset of "ikey" within the table. - var nn uint64 - nn, err = v.s.tops.offsetOf(t, ikey) - if err != nil { + if m, err := v.s.tops.offsetOf(t, ikey); err == nil { + n += m + } else { return 0, err } - n += nn } } } @@ -300,37 +314,50 @@ func (v *version) offsetOf(ikey iKey) (n uint64, err error) { return } -func (v *version) pickMemdbLevel(umin, umax []byte) (level int) { - if !v.tables[0].overlaps(v.s.icmp, umin, umax, true) { - var overlaps tFiles - maxLevel := v.s.o.GetMaxMemCompationLevel() - for ; level < maxLevel; level++ { - if v.tables[level+1].overlaps(v.s.icmp, umin, umax, false) { - break - } - overlaps = v.tables[level+2].getOverlaps(overlaps, v.s.icmp, umin, umax, false) - if overlaps.size() > uint64(v.s.o.GetCompactionGPOverlaps(level)) { - break +func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) { + if maxLevel > 0 { + if len(v.levels) == 0 { + return maxLevel + } + if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) { + var overlaps tFiles + for ; level < maxLevel; level++ { + if pLevel := level + 1; pLevel >= len(v.levels) { + return maxLevel + } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) { + break + } + if gpLevel := level + 2; gpLevel < len(v.levels) { + overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false) + if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) { + break + } + } } } } - return } func (v *version) computeCompaction() { // Precomputed best level for next compaction - var bestLevel int = -1 - var bestScore float64 = -1 + bestLevel := int(-1) + bestScore := float64(-1) - for level, tables := range v.tables { + statFiles := make([]int, len(v.levels)) + statSizes := make([]string, len(v.levels)) + statScore := make([]string, len(v.levels)) + statTotSize := int64(0) + + for level, tables := range v.levels { var score float64 + size := tables.size() if level == 0 { // We treat level-0 specially by bounding the number of files // instead of number of bytes for two reasons: // // (1) With larger write-buffer sizes, it is nice not to do too - // many level-0 compactions. + // many level-0 compaction. // // (2) The files in level-0 are merged on every read and // therefore we wish to avoid too many files when the individual @@ -339,17 +366,24 @@ func (v *version) computeCompaction() { // overwrites/deletions). score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger()) } else { - score = float64(tables.size()) / float64(v.s.o.GetCompactionTotalSize(level)) + score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level)) } if score > bestScore { bestLevel = level bestScore = score } + + statFiles[level] = len(tables) + statSizes[level] = shortenb(int(size)) + statScore[level] = fmt.Sprintf("%.2f", score) + statTotSize += size } v.cLevel = bestLevel v.cScore = bestScore + + v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore) } func (v *version) needCompaction() bool { @@ -357,43 +391,48 @@ func (v *version) needCompaction() bool { } type tablesScratch struct { - added map[uint64]atRecord - deleted map[uint64]struct{} + added map[int64]atRecord + deleted map[int64]struct{} } type versionStaging struct { base *version - tables []tablesScratch + levels []tablesScratch +} + +func (p *versionStaging) getScratch(level int) *tablesScratch { + if level >= len(p.levels) { + newLevels := make([]tablesScratch, level+1) + copy(newLevels, p.levels) + p.levels = newLevels + } + return &(p.levels[level]) } func (p *versionStaging) commit(r *sessionRecord) { // Deleted tables. for _, r := range r.deletedTables { - tm := &(p.tables[r.level]) - - if len(p.base.tables[r.level]) > 0 { - if tm.deleted == nil { - tm.deleted = make(map[uint64]struct{}) + scratch := p.getScratch(r.level) + if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 { + if scratch.deleted == nil { + scratch.deleted = make(map[int64]struct{}) } - tm.deleted[r.num] = struct{}{} + scratch.deleted[r.num] = struct{}{} } - - if tm.added != nil { - delete(tm.added, r.num) + if scratch.added != nil { + delete(scratch.added, r.num) } } // New tables. for _, r := range r.addedTables { - tm := &(p.tables[r.level]) - - if tm.added == nil { - tm.added = make(map[uint64]atRecord) + scratch := p.getScratch(r.level) + if scratch.added == nil { + scratch.added = make(map[int64]atRecord) } - tm.added[r.num] = r - - if tm.deleted != nil { - delete(tm.deleted, r.num) + scratch.added[r.num] = r + if scratch.deleted != nil { + delete(scratch.deleted, r.num) } } } @@ -401,39 +440,62 @@ func (p *versionStaging) commit(r *sessionRecord) { func (p *versionStaging) finish() *version { // Build new version. nv := newVersion(p.base.s) - for level, tm := range p.tables { - btables := p.base.tables[level] - - n := len(btables) + len(tm.added) - len(tm.deleted) - if n < 0 { - n = 0 - } - nt := make(tFiles, 0, n) - - // Base tables. - for _, t := range btables { - if _, ok := tm.deleted[t.file.Num()]; ok { - continue - } - if _, ok := tm.added[t.file.Num()]; ok { - continue - } - nt = append(nt, t) - } - - // New tables. - for _, r := range tm.added { - nt = append(nt, p.base.s.tableFileFromRecord(r)) - } - - // Sort tables. - if level == 0 { - nt.sortByNum() - } else { - nt.sortByKey(p.base.s.icmp) - } - nv.tables[level] = nt + numLevel := len(p.levels) + if len(p.base.levels) > numLevel { + numLevel = len(p.base.levels) } + nv.levels = make([]tFiles, numLevel) + for level := 0; level < numLevel; level++ { + var baseTabels tFiles + if level < len(p.base.levels) { + baseTabels = p.base.levels[level] + } + + if level < len(p.levels) { + scratch := p.levels[level] + + var nt tFiles + // Prealloc list if possible. + if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 { + nt = make(tFiles, 0, n) + } + + // Base tables. + for _, t := range baseTabels { + if _, ok := scratch.deleted[t.fd.Num]; ok { + continue + } + if _, ok := scratch.added[t.fd.Num]; ok { + continue + } + nt = append(nt, t) + } + + // New tables. + for _, r := range scratch.added { + nt = append(nt, tableFileFromRecord(r)) + } + + if len(nt) != 0 { + // Sort tables. + if level == 0 { + nt.sortByNum() + } else { + nt.sortByKey(p.base.s.icmp) + } + + nv.levels[level] = nt + } + } else { + nv.levels[level] = baseTabels + } + } + + // Trim levels. + n := len(nv.levels) + for ; n > 0 && nv.levels[n-1] == nil; n-- { + } + nv.levels = nv.levels[:n] // Compute compaction score for new version. nv.computeCompaction() diff --git a/vendor/vendor.json b/vendor/vendor.json index 308516ad2..5dbcc927b 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -207,64 +207,76 @@ "revisionTime": "2015-08-17T10:50:50-07:00" }, { + "checksumSHA1": "sUPlrnoPPmYuvjEtw9HUTKPCZa4=", "path": "github.com/syndtr/goleveldb/leveldb", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "BX+u3k6if9kZNYYqbL56gC48BAQ=", "path": "github.com/syndtr/goleveldb/leveldb/cache", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "5KPgnvCPlR0ysDAqo6jApzRQ3tw=", "path": "github.com/syndtr/goleveldb/leveldb/comparer", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "Vpvz4qmbq/kz0SN95yt0tmSI7JE=", "path": "github.com/syndtr/goleveldb/leveldb/errors", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "eqKeD6DS7eNCtxVYZEHHRKkyZrw=", "path": "github.com/syndtr/goleveldb/leveldb/filter", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "cRn09EwfU3k2ZjvClHYmVFlakRY=", "path": "github.com/syndtr/goleveldb/leveldb/iterator", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "CMBbso8ZuG2kBGDL2Blf/wpeheU=", "path": "github.com/syndtr/goleveldb/leveldb/journal", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "LshzRv+3spfwuHLepRxiyjf/3sQ=", "path": "github.com/syndtr/goleveldb/leveldb/memdb", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "MP/sSiEbzIN5M664sO4r9+dwzV4=", "path": "github.com/syndtr/goleveldb/leveldb/opt", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "XO5e4bQsWDdNqoHbFWy2TKoOWrQ=", "path": "github.com/syndtr/goleveldb/leveldb/storage", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "4EGplyU1Q07vIczP2yZgKvjuYVA=", "path": "github.com/syndtr/goleveldb/leveldb/table", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { + "checksumSHA1": "4zil8Gwg8VPkDn1YzlgCvtukJFU=", "path": "github.com/syndtr/goleveldb/leveldb/util", - "revision": "1a9d62f03ea92815b46fcaab357cfd4df264b1a0", - "revisionTime": "2015-08-19T12:16:22+07:00" + "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", + "revisionTime": "2016-06-29T10:12:33Z" }, { "path": "github.com/vaughan0/go-ini",